In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
import time

In [3]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [4]:
train = pd.read_csv('/content/drive/MyDrive/DeepLearningCourse/persons_pics_train.csv')
test = pd.read_csv('/content/drive/MyDrive/DeepLearningCourse/persons_pics_reserved.csv')

In [5]:
# Split data into training and testing sets
X = train.drop('label', axis=1)
y = train['label']

In [6]:
pca1 = PCA(svd_solver='full')
pca1.fit(X)

In [7]:
round(sum(pca1.explained_variance_ratio_[0:350]), 3)

0.982

In [8]:
pca1 = PCA(n_components=350, svd_solver='full')
pca1.fit(X)

train_pca1 = pca1.transform(X)
test_pca1 = pca1.transform(test)

tuned_parameters = [{'kernel': ['linear', 'poly', 'rbf', 'sigmoid'], 'gamma': [1e-3, 1e-4],
                     'C': [1, 10, 100, 1000], 'class_weight': [None, 'balanced'], 'random_state':[17]}]

start_time = time.time()
cv = GridSearchCV(SVC(), tuned_parameters, refit=True, verbose=3)
cv.fit(train_pca1, y)
end_time = time.time()

Fitting 5 folds for each of 64 candidates, totalling 320 fits
[CV 1/5] END C=1, class_weight=None, gamma=0.001, kernel=linear, random_state=17;, score=0.801 total time=   0.2s
[CV 2/5] END C=1, class_weight=None, gamma=0.001, kernel=linear, random_state=17;, score=0.792 total time=   0.2s
[CV 3/5] END C=1, class_weight=None, gamma=0.001, kernel=linear, random_state=17;, score=0.792 total time=   0.2s
[CV 4/5] END C=1, class_weight=None, gamma=0.001, kernel=linear, random_state=17;, score=0.789 total time=   0.2s
[CV 5/5] END C=1, class_weight=None, gamma=0.001, kernel=linear, random_state=17;, score=0.785 total time=   0.2s
[CV 1/5] END C=1, class_weight=None, gamma=0.001, kernel=poly, random_state=17;, score=0.338 total time=   0.2s
[CV 2/5] END C=1, class_weight=None, gamma=0.001, kernel=poly, random_state=17;, score=0.340 total time=   0.2s
[CV 3/5] END C=1, class_weight=None, gamma=0.001, kernel=poly, random_state=17;, score=0.340 total time=   0.2s
[CV 4/5] END C=1, class_weight=N

In [11]:
print(list(cv.predict(test_pca1)))

['George W Bush', 'Ariel Sharon', 'Tony Blair', 'Jacques Chirac', 'Tony Blair', 'Colin Powell', 'Donald Rumsfeld', 'Colin Powell', 'Tony Blair', 'Gerhard Schroeder', 'Donald Rumsfeld', 'Hugo Chavez', 'George W Bush', 'Hugo Chavez', 'Colin Powell', 'George W Bush', 'Ariel Sharon', 'Colin Powell', 'John Ashcroft', 'Gerhard Schroeder', 'Ariel Sharon', 'Donald Rumsfeld', 'Ariel Sharon', 'George W Bush', 'George W Bush', 'Donald Rumsfeld', 'Donald Rumsfeld', 'Tony Blair', 'Serena Williams', 'Jean Chretien', 'George W Bush', 'George W Bush', 'George W Bush', 'George W Bush', 'George W Bush', 'Colin Powell', 'Donald Rumsfeld', 'Jacques Chirac', 'George W Bush', 'Gerhard Schroeder', 'Colin Powell', 'Donald Rumsfeld', 'Gerhard Schroeder', 'George W Bush', 'Tony Blair', 'George W Bush', 'George W Bush', 'Gerhard Schroeder', 'Colin Powell', 'Tony Blair', 'Serena Williams', 'Jean Chretien', 'Tony Blair', 'George W Bush', 'Colin Powell', 'George W Bush', 'George W Bush', 'George W Bush', 'George W 