# Preprocessing images and videos

In [1]:
import numpy as np
import PIL
import glob
import pandas as pd
import os
from os import listdir
import cv2
from tqdm import tqdm
from tensorflow.keras.preprocessing import image

In [2]:
from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

Due to the large size of the original dataset it was stored on an external drive and all the preprocessing of both images and videos into arrays was done in the local environment; the resulting arrays were then uploaded onto the Google Drive to be used for model training.

# Images

In [3]:
def make_training_dataset(path, number_of_images):
    error_counter = 0
    counter = 0
    dataset = []

    for filename in listdir(path):
        if counter < number_of_images:
            try:
                image_path = path + filename
                # loading the image and keeping the target size as (224,224,3)
                img = image.load_img(image_path, target_size=(224,224,3))
                # converting it to array
                img = image.img_to_array(img)
                # normalizing the pixel value
                img = img/255
                dataset.append(img)
                counter = counter + 1
            # check for damaged images
            except IOError:
                error_counter = error_counter + 1

    print('Images added: ' + str(counter))
    print('Errors: ' + str(error_counter))

    return dataset

In [4]:
def make_training_counterexamples_dataset(dataset, other_folders, n_other):

    for i in range(0, len(other_folders)):
        path_other = '/Volumes/Drive/IMAGES/' + other_folders[i]

        counter = 0
        error_counter = 0

        for filename in listdir(path_other):
            if counter < n_other:

                try:
                    image_path = path_other + filename
                    img = image.load_img(image_path, target_size=(224,224,3))
                    img = image.img_to_array(img)
                    img = img/255
                    dataset.append(img)
                    counter = counter + 1

                except IOError:
                    error_counter = error_counter + 1

    print('Images added: ' + str(counter))
    print('Errors: ' + str(error_counter))
    
    return dataset

In [5]:
folder_names = ['advanced_stop/', 'crossing/', 'cycle_lane/','parking/', 'restricted_route/', 'signal/', 'traffic_calming/']

## 1 - advanced stop

In [6]:
class_name = 'advanced_stop'

In [7]:
training_dataset_1 = make_training_dataset('/Volumes/Drive/IMAGES/advanced_stop/', 3500)

Images added: 3500
Errors: 1


In [8]:
training_dataset_labels_1 = [class_name for i in range(0, len(training_dataset_1))]

In [9]:
other_folders_1 = [i for i in folder_names if i != (class_name + '/')]

In [10]:
n_other = 583
training_dataset_1 = make_training_counterexamples_dataset(training_dataset_1, other_folders_1, n_other)

Images added: 583
Errors: 0


In [11]:
counter_class_name = 'other_a'
training_dataset_labels_1 = [counter_class_name for i in range(0, n_other*6)]
training_dataset_labels_1.count(counter_class_name)

3498

In [12]:
converting the list to numpy array
X = np.array(training_dataset_1)
np.save('/Volumes/Drive/NEW/1/X_full_final.npy', X)

In [13]:
np.savetxt("/Volumes/Drive/NEW/1/y_full_final.csv", 
           training_dataset_labels_1,
           delimiter =", ",
           fmt ='% s')

## 2 - Crossing

In [14]:
len(listdir('/Volumes/Drive/IMAGES/crossing/'))

3257

In [15]:
for filename in listdir('/Volumes/Drive/IMAGES/crossing/'):
    image_path = '/Volumes/Drive/IMAGES/crossing/' + filename
    try:
        im=image.load_img(image_path)
    except IOError:
        print(filename)

._RWG999514_2.jpg
._RWG292165_2.jpg
._RWG292619_1.jpg
._RWG292618_1.jpg
._RWG293101_1.jpg
._RWG999328_1.jpg
._RWG999449_1.jpg
._RWG999925_1.jpg
._RWG999844_1.jpg


In [16]:
class_name = 'crossing'
training_dataset_2 = make_training_dataset('/Volumes/Drive/IMAGES/crossing/', 3500)

Images added: 3248
Errors: 9


In [17]:
training_dataset_labels_2 = [class_name for i in range(0, len(training_dataset_2))]

In [18]:
other_folders_2 = [i for i in folder_names if i != (class_name + '/')]

In [19]:
n_other = 541
training_dataset_2 = make_training_counterexamples_dataset(training_dataset_2, other_folders_2, n_other)

Images added: 541
Errors: 0


In [20]:
counter_class_name = 'other_c'
training_dataset_labels_2 = [counter_class_name for i in range(0, n_other*6)]
training_dataset_labels_2.count(counter_class_name)

3246

In [21]:
X = np.array(training_dataset_2)
np.save('/Volumes/Drive/NEW/2/X_full_final.npy', X)

np.savetxt("/Volumes/Drive/NEW/2/y_full_final.csv", 
           training_dataset_labels_2,
           delimiter =", ",
           fmt ='% s')

## 3 - Cycle lane

In [22]:
class_name = 'cycle_lane'
training_dataset_3 = make_training_dataset('/Volumes/Drive/IMAGES/cycle_lane/', 3500)

Images added: 3500
Errors: 0


In [23]:
training_dataset_labels_3 = [class_name for i in range(0, len(training_dataset_3))]

In [24]:
other_folders_3 = [i for i in folder_names if i != (class_name + '/')]
n_other = 583
training_dataset_3 = make_training_counterexamples_dataset(training_dataset_3, other_folders_3, n_other)

Images added: 583
Errors: 0


In [25]:
counter_class_name = 'other_l'
training_dataset_labels_3 = [counter_class_name for i in range(0, n_other*6)]
training_dataset_labels_3.count(counter_class_name)

3498

In [26]:
X = np.array(training_dataset_3)
np.save('/Volumes/Drive/NEW/3/X_full_final.npy', X)

np.savetxt("/Volumes/Drive/NEW/3/y_full_final.csv", 
           training_dataset_labels_3,
           delimiter =", ",
           fmt ='% s')

## 4 - Parking

In [27]:
class_name = 'parking'
training_dataset_4 = make_training_dataset('/Volumes/Drive/IMAGES/parking/', 3500)

Images added: 3500
Errors: 0


In [28]:
training_dataset_labels_4 = [class_name for i in range(0, len(training_dataset_4))]

In [29]:
other_folders_4 = [i for i in folder_names if i != (class_name + '/')]
n_other = 583
training_dataset_4 = make_training_counterexamples_dataset(training_dataset_4, other_folders_4, n_other)

Images added: 583
Errors: 0


In [30]:
counter_class_name = 'other_p'
training_dataset_labels_4 = [counter_class_name for i in range(0, n_other*6)]
training_dataset_labels_4.count(counter_class_name)

3498

In [31]:
X = np.array(training_dataset_4)
np.save('/Volumes/Drive/NEW/4/X_full_final.npy', X)

np.savetxt("/Volumes/Drive/NEW/4/y_full_final.csv", 
           training_dataset_labels_4,
           delimiter =", ",
           fmt ='% s')

## 5 - Restricted route

In [32]:
class_name = 'restricted_route'
training_dataset_5 = make_training_dataset('/Volumes/Drive/IMAGES/restricted_route/', 2550)

Images added: 2550
Errors: 0


In [33]:
training_dataset_labels_5 = [class_name for i in range(0, len(training_dataset_5))]

In [34]:
other_folders_5 = [i for i in folder_names if i != (class_name + '/')]
n_other = 425
training_dataset_5 = make_training_counterexamples_dataset(training_dataset_5, other_folders_5, n_other)

Images added: 425
Errors: 0


In [35]:
counter_class_name = 'other_r'
training_dataset_labels_5 = [counter_class_name for i in range(0, n_other*6)]
training_dataset_labels_5.count(counter_class_name)

2550

In [36]:
X = np.array(training_dataset_5)
np.save('/Volumes/Drive/NEW/5/X_full_final_e.npy', X)

np.savetxt("/Volumes/Drive/NEW/5/y_full_final_e.csv", 
           training_dataset_labels_5,
           delimiter =", ",
           fmt ='% s')

## 6 - signal

In [37]:
class_name = 'signal'
training_dataset_6 = make_training_dataset('/Volumes/Drive/IMAGES/signal/', 809)

Images added: 809
Errors: 0


In [38]:
training_dataset_labels_6 = [class_name for i in range(0, len(training_dataset_6))]

In [39]:
other_folders_6 = [i for i in folder_names if i != (class_name + '/')]
n_other = 135
training_dataset_6 = make_training_counterexamples_dataset(training_dataset_6, other_folders_6, n_other)

Images added: 135
Errors: 0


In [40]:
counter_class_name = 'other_s'
training_dataset_labels_6 = [counter_class_name for i in range(0, n_other*6)]
training_dataset_labels_6.count(counter_class_name)

810

In [41]:
X = np.array(training_dataset_6)
np.save('/Volumes/Drive/NEW/6/X_full_final.npy', X)

np.savetxt("/Volumes/Drive/NEW/6/y_full_final.csv", 
           training_dataset_labels_6,
           delimiter =", ",
           fmt ='% s')

## 7 - traffic calming

In [42]:
class_name = 'traffic_calming'
training_dataset_7 = make_training_dataset('/Volumes/Drive/IMAGES/traffic_calming/', 3500)

Images added: 3500
Errors: 0


In [43]:
training_dataset_labels_7 = [class_name for i in range(0, len(training_dataset_7))]

In [44]:
other_folders_7 = [i for i in folder_names if i != (class_name + '/')]
n_other = 583
training_dataset_7 = make_training_counterexamples_dataset(training_dataset_7, other_folders_7, n_other)

Images added: 583
Errors: 0


In [45]:
counter_class_name = 'other_t'
training_dataset_labels_7 = [counter_class_name for i in range(0, n_other*6)]
training_dataset_labels_7.count(counter_class_name)

3498

In [46]:
X = np.array(training_dataset_7)
np.save('/Volumes/Drive/NEW/7/X_full_final.npy', X)

np.savetxt("/Volumes/Drive/NEW/7/y_full_final.csv", 
           training_dataset_labels_7,
           delimiter =", ",
           fmt ='% s')

# Videos

In [47]:
def delete_extra(path, frame_rate):
    
    counter = 0
    error_counter = 0

    for filename in os.listdir(path):
        file_path = path + filename
        try:
            if counter%frame_rate == 0:
                counter = counter + 1
            else:
                os.remove(file_path)
                counter = counter + 1

        except IOError:
                error_counter = error_counter + 1

In [48]:
# creating a dataframe with video names
test_video = pd.DataFrame(columns=['video_name'])
for i in range(1,12):
    name = 'video_' + str(i) + '.MOV'
    test_video = test_video.append({'video_name' : name}, ignore_index=True)

In [49]:
cap = cv2.VideoCapture('/Users/ak/Desktop/videos/video_10.MOV')
count = 0
fps = round(cap.get(cv2.CAP_PROP_FPS))

success,image = cap.read()

while success:
    filename = "/Volumes/Drive/video_10/video_10_frame%d.jpg" % count
    cv2.imwrite(filename, image)
    success,image = cap.read()
    count += 1

cap.release()
cv2.destroyAllWindows()

delete_extra('/Volumes/Drive/video_10/', 60)

In [54]:
error_counter

0