# Import required modules:

In [5]:
import numpy as np
import os
import matplotlib.pyplot as plt

# other alternatives to load plt
#from scipy.misc import imread
#from scipy.misc import imread, imsave, imresize
#from skimage import io

import pandas as pd

# Source training and test image files:

In [6]:
train_dir = "data/train/"
test_dir = "data/test/"

***

# Training set

### Obtain labels: For each different category:

In [7]:
class_names = [
"Max Speed 20 km/h",
"Max Speed 30 km/h",
"Max Speed 50 km/h",
"Max Speed 60 km/h",
"Max Speed 70 km/h",
"Max Speed 80 km/h",
"End of 80 km/h zone",
"Max Speed 100 km/h",
"Max Speed 120 km/h",
"No passing",
"No passing for vehicles over 3.5 tonnes",
"Priority",
"Priority road",
"Yield",
"Stop",
"Road closed",
"Vehicles over 3.5 tonnes prohibited",
"Do not enter",
"General danger",
"Left curve",
"Right curve",
"Double curve",
"Uneven road surface",
"Slippery when wet or dirty",
"Road narrows",
"Roadworks",
"Traffic signals ahead",
"Pedestrians",
"Watch for children",
"Bicycle crossing",
"Ice - snow",
"Wild animal crossing",
"End of all restrictions",
"Turn right ahead",
"Turn left ahead",
"Ahead only",
"Ahead or turn right only",
"Ahead or turn left only",
"Pass by on right",
"Pass by on left",
"Roundabout",
"End of no passing zone",
"End of no passing zone for trucks"
]

### Label all training pictures (y_train)

In [8]:
y_train = []

for folder in os.listdir(train_dir):
    
    pictures = [pic for pic in os.listdir(train_dir + "/" + folder) if pic.split(".")[-1] == "ppm" ] # filter out non-ppm files
    
    for picture in pictures:
        y_train.append(class_names[int(folder)]) # translates ClassId figure into label name, for example class_names[int(000000)] = 'Max Speed 20 km/h'

        
y_train = np.array(y_train)

### Create a function to decode all ppm pictures from a given directory into a numpy array;

In [10]:
#Create a filtered list of .ppm files, then call plt.imread on the resulting list.

def PpmToNumpy(directory):
    
    '''
    Given a host directory containing ppm pictures, converts the pictures into a numpy array, returning an array with the whole list of pictures.

    Params:
    String containing a directory address.

    Returns:
    numpy array whose items are decoded ppm pictures (np.array)

    '''

    files = os.listdir(directory)
    files = [file for file in files if file.split(".")[1] == "ppm"] # to ensure only ppm images are called
    
    return np.array([plt.imread(directory + file) for file in files], dtype=object)
                       

### Build training dataset (x_train)

In [11]:
all_images = [ PpmToNumpy(train_dir + "/" + folder + "/") for folder in os.listdir(train_dir)]

x_train = np.concatenate(all_images, axis=0, out=None, dtype=None, casting="same_kind")

### Show pictures - trial cell:

In [31]:
type(x_train[0].shape[0])

int

In [35]:
# Train summary file:

train_summary = dict.fromkeys(["Width", "Height", "ClassId"], [])
widths = []
heights = []

for picture in x_train:
    widths.append(picture.shape[0])
    heights.append(picture.shape[1])

train_summary["Width"] = widths
train_summary["Height"] = heights
train_summary["ClassId"] = y_train
 
train_summary_df = pd.DataFrame( train_summary)
train_summary_df.head(50)


Unnamed: 0,Width,Height,ClassId
0,30,29,Max Speed 20 km/h
1,30,30,Max Speed 20 km/h
2,30,30,Max Speed 20 km/h
3,31,31,Max Speed 20 km/h
4,32,30,Max Speed 20 km/h
5,31,31,Max Speed 20 km/h
6,34,33,Max Speed 20 km/h
7,35,34,Max Speed 20 km/h
8,34,33,Max Speed 20 km/h
9,36,36,Max Speed 20 km/h


***

# Test set

In [55]:
test_meta = pd.read_csv("data/Test.csv")
test_meta.head(20)

Unnamed: 0,Width,Height,Roi.X1,Roi.Y1,Roi.X2,Roi.Y2,ClassId,Path
0,53,54,6,5,48,49,16,Test/00000.png
1,42,45,5,5,36,40,1,Test/00001.png
2,48,52,6,6,43,47,38,Test/00002.png
3,27,29,5,5,22,24,33,Test/00003.png
4,60,57,5,5,55,52,11,Test/00004.png
5,52,56,5,5,47,51,38,Test/00005.png
6,147,130,12,12,135,119,18,Test/00006.png
7,32,33,5,5,26,28,12,Test/00007.png
8,45,50,6,5,40,45,25,Test/00008.png
9,81,86,7,7,74,79,35,Test/00009.png


In [63]:
y_test = np.array(test_meta["ClassId"].apply(lambda x: class_names[x]))

y_test


array(['Vehicles over 3.5 tonnes prohibited', 'Max Speed 30 km/h',
       'Pass by on right', ..., 'End of 80 km/h zone',
       'Max Speed 100 km/h', 'No passing for vehicles over 3.5 tonnes'],
      dtype=object)

In [64]:
x_test = PpmToNumpy(test_dir)

## Show pictures - trial cell: