In [14]:
import pandas as pd
import numpy as np
import cv2

from time import time
import logging
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA

In [2]:
# Read list of images, select a subset of around 3642 images
image_list = pd.read_csv('driver_imgs_list.csv')
subject_subset = image_list.subject.value_counts()[-6:].index.values
#image_subset = image_list[image_list.subject.isin(subject_subset)].reset_index(drop=True)
image_subset = image_list

In [4]:
# Try converting image to greyscale
first_image = image_list.values[0]
first_image_path = './train/'+first_image[1]+'/'+first_image[2]
image = cv2.imread(first_image_path, 0)
image = cv2.resize(image, (0,0), fx=0.25, fy=0.25) # resize the images since they're too large
h, w = image.shape

In [5]:
image_subset.index.values

array([    0,     1,     2, ..., 22421, 22422, 22423])

In [6]:
greyscale_image_matrix = np.zeros((h*w, image_subset.shape[0]), dtype=np.uint8)

In [7]:
greyscale_image_matrix.shape

(19200, 22424)

In [8]:
from IPython.core.display import clear_output

nitems = image_subset.shape[0]
for i, n in image_subset.iterrows():
    img_path = './train/' + n.classname + '/' + n.img
    clear_output()
    print('Current index: {}'.format(i))
    print('Starting image {}'.format(img_path))
    img = cv2.imread(first_image_path, 0) # Read as greyscale
    img = cv2.resize(img, (0,0), fx=0.25, fy=0.25) # resize the images since they're too large
    greyscale_image_matrix[:, i] = img.reshape(h*w)
    print('Completed {} of {} images.'.format(i+1, nitems))
    

Current index: 22423
Starting image ./train/c9/img_9684.jpg
Completed 22424 of 22424 images.


In [9]:
X = greyscale_image_matrix.T

n_components = 200

t0 = time()
pca = PCA(n_components=n_components, svd_solver='randomized').fit(X) #,whiten=True)
print("done in {}s".format(time() - t0))


done in 183.678696871s


In [10]:
print("Projecting the input data on the PCA stuff")
t0 = time()
X_train_pca = pca.transform(X)
# X_test_pca = pca.transform(X_test)
print("done in {}s".format(time() - t0))

Projecting the input data on the PCA stuff
done in 12.4533851147s


In [11]:
mapping = pca.components_.reshape((n_components, h, w))

In [12]:
mapping.shape

(200, 120, 160)

In [13]:
mapping[199]

array([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       ..., 
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.]])