# P1

In [1]:
import warnings

import numpy as np
import matplotlib.pyplot as plt

from sklearn.decomposition import PCA, SparsePCA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.utils import shuffle

from skimage.exposure import equalize_hist
from skimage.filters import gaussian

%matplotlib inline

warnings.filterwarnings('ignore')

Let's show some images from dataset.

In [None]:
size = 80
shape =  2 * (size,)

X, y = shuffle(np.load('x_train.npy'), np.load('y_train.npy'))

width, height = 8, 8

plt.figure(figsize=(16, 20))
for n, (image, name) in enumerate(zip(X, y), 1):
    if n > width * height:
        break
        
    plt.subplot(height, width, n)
    plt.title(name)
    plt.imshow(image.reshape(shape), cmap='gray')

Define simple image preparing.

In [None]:
def prepare(img):
    img = img.reshape(shape)
    img = equalize_hist(img)
    img = gaussian(img, sigma=1)
    img - img.mean()
    
    return img.flatten()
    
def transform(X):
    height, width = X.shape
    for i in range(height):
        X[i] = prepare(X[i]) 
        
    return X

In [None]:
X = transform(X_train)


Use pca decomposition to reduce the dimensionality.

Show some components.

In [None]:
width, height = 8, 8

plt.figure(figsize=(16, 20))
for n, component in enumerate(pca.components_, 1):
    if n > width * height:
        break
        
    plt.subplot(height, width, n)
    plt.imshow(component.reshape(shape), cmap='gray')

In [None]:
from sklearn.metrics import roc_auc_score, roc_curve
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, Reshape
from keras.layers import Conv2D, Conv2DTranspose, UpSampling2D, MaxPooling2D 
from keras.layers import LeakyReLU, Dropout
from keras.layers import BatchNormalization
from keras.optimizers import Adam, RMSprop
from keras.utils import to_categorical

In [None]:
from sklearn.model_selection import train_test_split
a_train, a_test, b_train, b_test = train_test_split(pca.transform(X_train), y_train, test_size=0.4, random_state=0)

In [None]:
def createModel():
    
    model = Sequential()
    model.add(Conv2D(64, (3, 3), padding='same', input_shape=(10,20,6)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2),strides=(2,2)))
    model.add(Dropout(0.25))
    
    model.add(Conv2D(128, (2, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2)))
    model.add(Dropout(0.25))
    
    model.add(Conv2D(256, (2, 2), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.5))

    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))
    
    return model

In [None]:
my_network=createModel()

In [None]:
batch_size = 256
epochs = 10

my_network.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['acc'])
print('The crash is right after this')
history = my_network.fit(trainX, trainY, batch_size=batch_size, epochs=epochs, validation_data=(testX, testY))
my_network.evaluate(testX, testY)

In [None]:
from sklearn.model_selection import GridSearchCV

weight_options = ['uniform', 'distance']

param_grid = dict(n_neighbors=[1,2,3], weights=weight_options, p=[1, 1.5, 2] )

grid = GridSearchCV(KNeighborsClassifier(), param_grid, cv=10, scoring='accuracy')

# fit the grid with data
grid.fit(X_train, y_train)

print(grid.best_score_)
print(grid.best_params_)

And in the end we use the nearest neighbors to classify faces.

In [None]:
# Read test data
data_test = pd.read_csv(data_path+'/data_test.csv.gz')

# Create images
images_test = create_images(data_test, 
                            n_theta_bins=10, 
                            n_phi_bins=20, 
                            n_time_bins=6)

# Scale images
X_test = images_test / 10.

In [None]:
prediction= my_network.predict_proba(X_test)

In [None]:
with open('prediction.csv', 'w') as out:
    print('Id,Name', file=out)
    for pair in enumerate(prediction, 1):
        print('%i,%s' % pair, file=out)