In [1]:
import os
import cv2
import  pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split
import keras 
from glob import glob
from keras.models import Sequential
from keras.layers import BatchNormalization,MaxPool2D,Dense,Conv2D,Flatten
from keras.callbacks import EarlyStopping,LearningRateScheduler
from sklearn.svm import SVC

import warnings
from tqdm import tqdm
from PIL import Image
warnings.filterwarnings('ignore')

In [2]:
vehicle_dir = 'data/vehicles/'
non_vehicle_dir = 'data/non-vehicles/'

In [4]:
vehicle_names = os.listdir(vehicle_dir)
non_vehicle_names = os.listdir(non_vehicle_dir)

In [None]:
# print(vehicle_names[:10], non_vehicle_names[:10])

In [5]:
print("No of vehicle images:", len(vehicle_names))
print("No of non-vehicle images:", len(non_vehicle_names))

No of vehicle images: 8792
No of non-vehicle images: 8968


In [None]:
# import random
# random.shuffle(vehicle_names)
# random.shuffle(non_vehicle_names)

In [None]:
# print(vehicle_names[:10], non_vehicle_names[:10])

In [6]:
cat = {
    'non-vehicles': 0,
    'vehicles': 1
}

#get all the paths for vehicles and non-vehicles
def get_paths(folder):
    
    path = []
    labels = []
    for dirname, _, filenames in os.walk(folder):
        dir_class = dirname.split('/')[1] #either vehicles or non-vehicles
        for filename in filenames:
            path.append(os.path.join(dirname, filename))
            labels.append(cat[dir_class])
    
    return pd.DataFrame({'path': path, 'class': labels})

In [7]:
vehicles_df = get_paths(vehicle_dir)
vehicles_df.head()

Unnamed: 0,path,class
0,data/vehicles/1.png,1
1,data/vehicles/10.png,1
2,data/vehicles/1000.png,1
3,data/vehicles/1001.png,1
4,data/vehicles/1002.png,1


In [8]:
non_vehicles_df = get_paths(non_vehicle_dir)
non_vehicles_df.head()

Unnamed: 0,path,class
0,data/non-vehicles/extra1.png,0
1,data/non-vehicles/extra10.png,0
2,data/non-vehicles/extra100.png,0
3,data/non-vehicles/extra1000.png,0
4,data/non-vehicles/extra1001.png,0


In [9]:
df = pd.concat([vehicles_df, non_vehicles_df])
df.shape

(17760, 2)

In [10]:
df.head()

Unnamed: 0,path,class
0,data/vehicles/1.png,1
1,data/vehicles/10.png,1
2,data/vehicles/1000.png,1
3,data/vehicles/1001.png,1
4,data/vehicles/1002.png,1


In [11]:
#shuffle dataset using sample
df = df.sample(frac=1, random_state=1).reset_index(drop=True)
df.head()

Unnamed: 0,path,class
0,data/vehicles/3519.png,1
1,data/non-vehicles/image604.png,0
2,data/vehicles/5546.png,1
3,data/non-vehicles/image525.png,0
4,data/vehicles/far (574).png,1


In [12]:
#sample for 200 rows first
sample_df = df.iloc[:200, :]
sample_df

Unnamed: 0,path,class
0,data/vehicles/3519.png,1
1,data/non-vehicles/image604.png,0
2,data/vehicles/5546.png,1
3,data/non-vehicles/image525.png,0
4,data/vehicles/far (574).png,1
...,...,...
195,data/vehicles/4806.png,1
196,data/non-vehicles/image2718.png,0
197,data/non-vehicles/image2421.png,0
198,data/vehicles/left (615).png,1


In [13]:
sample_df['class'].value_counts() 

1    101
0     99
Name: class, dtype: int64

In [14]:
def load_images(paths, img_size):
    images = []
    
    for path in paths:
        img = cv2.imread(path)
        img = cv2.resize(img, img_size) #resize the images
        img = np.array(img)
        
        images.append(img)
    
    images = np.array(images)
    images = images.astype(np.int64)
    
    #scale images
    images = images/255
    return images

In [15]:
images = load_images(sample_df['path'], (64, 64))
print(images.shape)

(200, 64, 64, 3)


In [16]:
images

array([[[[0.29803922, 0.42352941, 0.52156863],
         [0.30588235, 0.4627451 , 0.46666667],
         [0.37647059, 0.5254902 , 0.5372549 ],
         ...,
         [0.58431373, 0.51372549, 0.68627451],
         [0.89803922, 0.76078431, 0.80784314],
         [0.62745098, 0.70196078, 0.70980392]],

        [[0.34901961, 0.51764706, 0.48235294],
         [0.34901961, 0.5372549 , 0.55294118],
         [0.38431373, 0.52941176, 0.5372549 ],
         ...,
         [0.66666667, 0.6745098 , 0.66666667],
         [0.91372549, 0.9254902 , 0.79607843],
         [0.6627451 , 0.76470588, 0.67058824]],

        [[0.40784314, 0.59607843, 0.50588235],
         [0.4       , 0.55686275, 0.63529412],
         [0.4       , 0.54901961, 0.61568627],
         ...,
         [0.68627451, 0.65490196, 0.6745098 ],
         [0.91764706, 0.90196078, 0.8745098 ],
         [0.75686275, 0.78823529, 0.81176471]],

        ...,

        [[0.18039216, 0.16470588, 0.16470588],
         [0.11764706, 0.1372549 , 0.14901961]

In [None]:
#train test split - 75% train, 25% test?