In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import scipy.stats as sstats
from sklearn.model_selection import train_test_split
from sklearn.cluster import Birch
from sklearn.metrics import confusion_matrix
from sklearn import (datasets, decomposition, ensemble, manifold, random_projection)
from sklearn.naive_bayes import MultinomialNB
import tqdm

from KNN import KNN

In [2]:
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

data_batch_1 = unpickle('cifar10/data_batch_1')


db1_labels = np.array(data_batch_1[b'labels'])
db1_data = np.array(data_batch_1[b'data'])

trainx, testx, trainy, testy = train_test_split(db1_data, db1_labels, test_size=0.33)

In [3]:
def to_grayscale(x):
    new_x = x[:,:,0] * 0.3 + x[:, :, 1] * 0.59 + x[:, :, 2] * 0.11
    return new_x.astype(int)

train_x = trainx.reshape(-1, 32*32, 3)
test_x = testx.reshape(-1, 32*32, 3)
gray_train_x = to_grayscale(train_x)
gray_test_x = to_grayscale(test_x)

In [5]:
trainx.shape

(6700, 3072)

# tSNE

## perplexity check  - related to the number of nearest neighbors used

In [6]:
def tSNE():
    print("tSNE")
    OS = np.arange(5,55,15)

    accs = []
    for oss in tqdm.tqdm(OS):
        tsne = manifold.TSNE(n_components=2, init='pca', perplexity=oss)
        x_proj = tsne.fit_transform(np.vstack((gray_train_x, gray_test_x)))
        knn = KNN(x_proj[:gray_train_x.shape[0]], trainy, x_proj[gray_train_x.shape[0]:], [7])

        temp_acc = np.round(np.count_nonzero(knn[7] == testy)/len(testy),2)*100
        accs.append(temp_acc)
        print(f"KNN accuracy: {temp_acc} %  for {oss} perplexity")
    return OS, accs
    
tSNE_perplexity = tSNE()

  0%|                                                                                            | 0/4 [00:00<?, ?it/s]

tSNE


 25%|████████████████████▊                                                              | 1/4 [04:03<12:11, 243.68s/it]

KNN accuracy: 17.0 %  for 5 perplexity


 50%|█████████████████████████████████████████▌                                         | 2/4 [08:54<08:35, 257.69s/it]

KNN accuracy: 17.0 %  for 20 perplexity


 75%|██████████████████████████████████████████████████████████████▎                    | 3/4 [13:43<04:27, 267.16s/it]

KNN accuracy: 16.0 %  for 35 perplexity


100%|███████████████████████████████████████████████████████████████████████████████████| 4/4 [18:32<00:00, 278.11s/it]

KNN accuracy: 16.0 %  for 50 perplexity





## n_componenets - dimension of the embedded space

In [8]:
def tSNE_dimensions():
    print("tSNE")
    OS = np.arange(1,4,1)

    accs = []
    for oss in tqdm.tqdm(OS):
        tsne = manifold.TSNE(n_components=oss, init='pca')
        x_proj = tsne.fit_transform(np.vstack((gray_train_x, gray_test_x)))
        knn = KNN(x_proj[:gray_train_x.shape[0]], trainy, x_proj[gray_train_x.shape[0]:], [7])

        temp_acc = np.round(np.count_nonzero(knn[7] == testy)/len(testy),2)*100
        accs.append(temp_acc)
        print(f"KNN accuracy: {temp_acc} %  for {oss} dimensions")
    return OS, accs
    
tSNE_dimensions = tSNE_dimensions()


  0%|                                                                                            | 0/3 [00:00<?, ?it/s][A

tSNE



 33%|███████████████████████████▋                                                       | 1/3 [04:15<08:30, 255.14s/it][A

KNN accuracy: 15.0 %  for 1 dimensions



 67%|███████████████████████████████████████████████████████▎                           | 2/3 [09:06<04:25, 265.89s/it][A

KNN accuracy: 17.0 %  for 2 dimensions



100%|███████████████████████████████████████████████████████████████████████████████████| 3/3 [17:06<00:00, 342.02s/it][A

KNN accuracy: 20.0 %  for 3 dimensions



