https://www.kaggle.com/competitions/vehicle/overview

In [3]:
import os
#os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [4]:
# Import library
import os
import shutil
import glob
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from math import ceil
from collections import Counter

In [5]:
import cv2
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import class_weight

In [6]:
import itertools
import tensorflow as tf 
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, GlobalAveragePooling2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras import models, optimizers, regularizers

In [7]:
from tensorflow.keras.applications.efficientnet import preprocess_input as preprocess_input_efficientnet
from tensorflow.keras.applications.inception_v3 import preprocess_input as preprocess_input_inception_v3

In [8]:
import warnings
warnings.filterwarnings('ignore')

In [9]:
path_train_orig='vehicle/train/trainset/'

In [10]:
pd.DataFrame(os.listdir(path_train_orig)).T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
0,Ambulance,Barge,Bicycle,Boat,Bus,Car,Cart,Caterpillar,Helicopter,Limousine,Motorcycle,Segway,Snowmobile,Tank,Taxi,Truck,Van


In [11]:
# make dataset
data=[]
for categories in os.listdir(path_train_orig):
    #for f in glob.iglob(path+'data/*.SAFE')
    for img in os.listdir(path_train_orig+categories):
        data.append((path_train_orig+categories+'/'+img, categories, img))
                    
df_train=pd.DataFrame(data, columns=['data_path', 'class', 'imagen' ])
df_train.head()

Unnamed: 0,data_path,class,imagen
0,vehicle/train/trainset/Ambulance/000040_09.jpg,Ambulance,000040_09.jpg
1,vehicle/train/trainset/Ambulance/000050_10.jpg,Ambulance,000050_10.jpg
2,vehicle/train/trainset/Ambulance/000052_03.jpg,Ambulance,000052_03.jpg
3,vehicle/train/trainset/Ambulance/000057_04.jpg,Ambulance,000057_04.jpg
4,vehicle/train/trainset/Ambulance/000067_14.jpg,Ambulance,000067_14.jpg


In [12]:
# value count of class
df_train['class'].value_counts()

Boat           8695
Car            6781
Motorcycle     2986
Bus            2133
Truck          2033
Bicycle        1618
Van            1111
Taxi            748
Helicopter      668
Caterpillar     331
Tank            206
Barge           202
Segway          153
Ambulance       132
Snowmobile      123
Limousine        74
Cart             51
Name: class, dtype: int64

In [13]:
categories = df_train['class'].unique()

In [15]:
categories = df_train['class'].unique()

In [16]:
len(categories)

17

In [17]:
X=df_train.drop('class', axis=1)

In [18]:
# label encoder (class)
y_encoder=LabelEncoder().fit_transform(df_train['class'].values)
#y=tf.keras.utils.to_categorical(y_encoder, num_classes=len(categories))
y=y_encoder
y

array([ 0,  0,  0, ..., 16, 16, 16])

In [19]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [20]:
X_train.shape, y_train.shape

((22436, 2), (22436,))

In [21]:
X_val.shape, y_val.shape

((5609, 2), (5609,))

In [22]:
def make_dataframe_src_dst(df, path_orig, folder1, folder2):
     
    # make dataframe src-dst 
    sym=df[['data_path']].applymap(lambda x: x.replace(folder1, folder2))
    sym.columns=['dst']
    sym['src']=df['data_path'].copy()
    sym=sym[['src','dst']]
    #print(sym.head(2))
    # make directory of links for data flow from directory
    path = os.path.join('', *path_orig.split('/')[:-2],folder2)
    for new_path in categories:
        if os.path.exists(path+'/'+ new_path):
            shutil.rmtree(path+'/'+ new_path)
            os.makedirs(path+'/'+ new_path)
        else:
            os.makedirs(path+'/'+ new_path)

    # make symlink
    for index, row in sym.iterrows():
        os.symlink(os.getcwd()+'/'+row['src'], os.getcwd()+'/'+row['dst'])

In [23]:
# make dataset train - val
#make_dataframe_src_dst(X_train, path_train_orig, 'trainset', 'train_folder')
#make_dataframe_src_dst(X_val, path_train_orig, 'trainset', 'val_folder')

In [24]:
train_path=os.path.join('', *path_train_orig.split('/')[:-2],'train_folder/')
val_path=os.path.join('', *path_train_orig.split('/')[:-2],'val_folder/')
test_path='vehicle/test/'

In [25]:
train_path, val_path, test_path

('vehicle/train/train_folder/', 'vehicle/train/val_folder/', 'vehicle/test/')

In [26]:
# min-max image values
img.min(), img.max()

(0, 255)

## Convolutional Neural Network (CNN)

In [27]:
# epochs
epochs_val = 15

# batch_size
batch_size_val = 32

# target_size
#224
target_size_val=(224,224)

# input_shape
input_shape_val = (target_size_val[0],target_size_val[1], 3)

### Datagen

In [28]:
# Datagen and augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=10,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    preprocessing_function=None)

validation_datagen = ImageDataGenerator(rescale=1./255)

test_datagen = ImageDataGenerator(rescale=1./255)

### Data Generator

In [29]:
# Train / Validation - Data Generator
train_generator = train_datagen.flow_from_directory(train_path,
                                 target_size=target_size_val,
                                 batch_size=batch_size_val,
                                 class_mode = 'categorical')

validation_generator = validation_datagen.flow_from_directory(val_path,
                                 target_size=target_size_val,
                                 batch_size=batch_size_val,
                                 class_mode = 'categorical')

# Test 
test_generator = test_datagen.flow_from_directory(test_path,
                                 target_size=target_size_val,
                                 shuffle = False,
                                 class_mode = 'categorical')

Found 22436 images belonging to 17 classes.
Found 5609 images belonging to 17 classes.
Found 7958 images belonging to 1 classes.


In [30]:
# example from DataFrame
# Train Data Generator from DataFrame
train_generator_df = train_datagen.flow_from_dataframe(dataframe=df_train, # original, without validation split
                                              x_col='data_path', 
                                              y_col='class', 
                                              target_size=target_size_val,      
                                              batch_size=batch_size_val,
                                              class_mode = 'categorical')

Found 28045 validated image filenames belonging to 17 classes.


### plot images of train_generator

### Models

In [30]:
# Custom model

model_custom = models.Sequential()
model_custom.add(Conv2D(32, (3, 3), strides = 1, padding = 'same', activation = 'relu', input_shape=input_shape_val))
model_custom.add(BatchNormalization())
model_custom.add(MaxPooling2D((2, 2)))
model_custom.add(Dropout(0.2))

model_custom.add(Conv2D(64, (3, 3), strides=1, padding = 'same', activation = 'relu'))
model_custom.add(BatchNormalization())
model_custom.add(MaxPooling2D((2, 2)))
model_custom.add(Dropout(0.2))

model_custom.add(Conv2D(128, (3, 3), strides=1, padding = 'same', activation = 'relu'))
model_custom.add(BatchNormalization())
model_custom.add(MaxPooling2D((2, 2)))
model_custom.add(Dropout(0.5))

model_custom.add(Conv2D(256, (3, 3), strides=1, padding = 'same', activation = 'relu'))
model_custom.add(BatchNormalization())
model_custom.add(MaxPooling2D((2, 2)))
model_custom.add(Dropout(0.5))


# flatten
model_custom.add(Flatten())

model_custom.add(Dense(128, activation = 'relu'))
model_custom.add(BatchNormalization())
model_custom.add(Dropout(0.5))

model_custom.add(Dense(64, activation = 'relu'))
model_custom.add(BatchNormalization())

model_custom.add(Dense(17, activation = 'softmax'))

# name
model_custom._name = "model_custom"
# summary
#model_custom.summary()

2022-04-27 20:13:41.891397: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-04-27 20:13:41.935359: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-04-27 20:13:41.935856: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-04-27 20:13:41.937494: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags