# Imports

In [32]:
import cv2
import random
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import time

from numpy import genfromtxt
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.feature_selection import f_regression
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.manifold import TSNE
from mpl_toolkits.mplot3d import Axes3D
from scipy.stats import mode
from sklearn.decomposition import PCA



num_pixels = 24
%matplotlib inline

In [2]:
# Importing training  data from CSV files
x_train = genfromtxt('Data/Training/x_train_gr_smpl.csv', delimiter=',', skip_header =1)
y_train = genfromtxt('Data/Training/y_train_smpl.csv', delimiter=',', skip_header =1 )

In [3]:
# Importing testing data from CSV files
x_test = genfromtxt('Data/Testing/x_test_gr_smpl.csv', delimiter=',', skip_header =1)
y_test = genfromtxt('Data/Testing/y_test_smpl.csv', delimiter=',', skip_header =1 )

In [4]:
# Check data has been imported correctly 
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(12660, 2304)
(12660,)
(4170, 2304)
(4170,)


# Preprocessing

In [5]:
# Combinging data with labels

y_train = y_train.reshape(12660,1)
train_data_combined = np.append(x_train, y_train, axis=1)

y_test = y_test.reshape(4170,1)
test_data_combined = np.append(x_test, y_test, axis=1)


In [6]:
# Randomises row order in a reproducable way 
np.random.seed(0)
np.random.shuffle(train_data_combined)
np.random.shuffle(test_data_combined)

In [7]:
# Split data and lables that have undergone the same randomization process
x_train = train_data_combined[:,:-1]
y_train = train_data_combined[:, [-1]].reshape(12660,)

x_test = test_data_combined[:,:-1]
y_test = test_data_combined[:, [-1]].reshape(4170,)

In [8]:
# Normalisation of pixels
x_train_norm = x_train.astype('float')/255
x_test_norm  = x_test.astype('float')/255

In [9]:
#  Reducing the number of attributes using opencv's reduce functionality as shown in the second python Lecture
#  As seen in python lectures: https://vision.hw.ac.uk/webapps/blackboard/content/listContent.jsp?course_id=_94419_1&content_id=_3391865_1

x_train_reduced = np.apply_along_axis(
        func1d=lambda img: cv2.resize(img.reshape((48,48)), (num_pixels,num_pixels)),
        axis =1, arr = x_train_norm).reshape((-1,num_pixels*num_pixels))

x_test_reduced = np.apply_along_axis(
        func1d=lambda img: cv2.resize(img.reshape((48,48)), (num_pixels,num_pixels)),
        axis =1, arr = x_test_norm).reshape((-1,num_pixels*num_pixels))

In [22]:
# pixel_data_combined = np.concatenate((x_train_reduced, x_test_reduced), axis=0)

In [23]:
# pixel_data_combined.shape

(16830, 576)

In [24]:
# label_data_combined = np.concatenate((y_train, y_test), axis=0)

In [25]:
# # Selecting the k best attributes
# kBest = SelectKBest(score_func=chi2, k=100)
# kBest = kBest.fit(pixel_data_combined,label_data_combined)
# x_comb_100 = kBest.transform(pixel_data_combined)

In [26]:
# x_comb_100.shape

(16830, 100)

In [36]:
# pca = PCA(n_components=3)
# pca.fit(pixel_data_combined)
# pixel_data_combined_3 = pca.transform(pixel_data_combined)

In [38]:
# pixel_data_combined_3.shape

(16830, 3)

In [64]:
# x_train= pixel_data_combined_3[0:12660,:]
# x_test= pixel_data_combined_3[12660:16830,:]

In [67]:
# print(x_train.shape)
# print(x_test.shape)
# print(y_train.shape)
# print(y_test.shape)

(12660, 3)
(4170, 3)
(12660,)
(4170,)
