In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

import torch
from torch.nn.functional import softmax, interpolate
from PIL import Image
from torchvision import models, transforms
from torchvision.transforms.functional import resize, to_pil_image
from torchinfo import summary

from torchcam.methods import LayerCAM
from torchcam.utils import overlay_mask

from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

from skimage import color

import os

In [2]:
# model import
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

mc_model = models.resnet50(pretrained=True).to(device)
    
mc_model.fc = torch.nn.Sequential(
               torch.nn.Linear(2048, 128),
               torch.nn.ReLU(),
               torch.nn.Linear(128, 7)).to(device)

mc_model.load_state_dict(torch.load("../models/mc_model.pth", map_location=torch.device('cpu')))

<All keys matched successfully>

In [3]:
mc_model.eval()

tsfm = transforms.Compose([
    transforms.Resize(size=(224, 224)),
    transforms.CenterCrop(112),
    transforms.ToTensor() 
])

In [4]:
# class activation map extractor
cam_extractor = LayerCAM(mc_model)



In [2]:
styles = pd.read_csv("../data/styles_subset.csv")
styles = styles.dropna()
styles

Unnamed: 0,id,masterCategory,subCategory,articleType,baseColour
0,15970,Apparel,Topwear,Shirts,Navy Blue
1,39386,Apparel,Bottomwear,Jeans,Blue
2,59263,Accessories,Watches,Watches,Silver
3,21379,Apparel,Bottomwear,Track Pants,Black
4,53759,Apparel,Topwear,Tshirts,Grey
...,...,...,...,...,...
44414,17036,Footwear,Shoes,Casual Shoes,White
44415,6461,Footwear,Flip Flops,Flip Flops,Red
44416,18842,Apparel,Topwear,Tshirts,Blue
44417,46694,Personal Care,Fragrance,Perfume and Body Mist,Blue


In [3]:
# 10% data for testing
train, test = train_test_split(styles, test_size=0.1, stratify=styles["baseColour"], random_state=42)

# 1. Using CIE for the KNNClassifier

In [4]:
def convert_rgb2lab(rgb):
    """
    convert a single rgb value to corresponding lab value
    """
    lab = color.rgb2lab(np.array([[rgb]])/255)
    return lab[0, 0]

In [5]:
# manual mappings
lab_df = pd.read_csv("../data/lab_color_map.csv")
lab_df

Unnamed: 0,l1,l2,l3,target_color
0,0.0,0.0,0.0,Black
1,100.0,-0.002455,0.004653,White
2,48.373518,7.713933,-53.080052,Blue
3,29.866009,10.324204,12.794989,Brown
4,35.724149,-0.001095,0.002075,Grey
5,53.240588,80.092308,67.202751,Red
6,87.735099,-86.18303,83.179703,Green
7,69.590784,51.670535,0.505159,Pink
8,50.104911,65.49662,-41.474739,Purple
9,97.139507,-21.554681,94.478122,Yellow


In [6]:
X = lab_df.drop(["target_color"], axis=1)
y = lab_df["target_color"]

# KNN classifier with custom metric
knn_classifier = KNeighborsClassifier(n_neighbors=1,
                                    metric=color.deltaE_cie76)

knn_classifier.fit(X, y)

KNeighborsClassifier(algorithm='auto', leaf_size=30,
                     metric=<function deltaE_cie76 at 0x7f5b6d7564d0>,
                     metric_params=None, n_jobs=None, n_neighbors=1, p=2,
                     weights='uniform')

In [24]:
def find_color(img):
    """
    returns the predicted color category, given a PIL Image object
    """
    input_tensor = resize(img, (112, 112))
    input_tensor = tsfm(input_tensor).to(device)

    out = mc_model(input_tensor.unsqueeze(0))
    cams = cam_extractor(out.squeeze(0).argmax().item(), out)

    scores = cams[0].cpu()
    resized_scores = resize(to_pil_image(scores.squeeze(0)), size=(112, 112))
    scores_array = np.array(resized_scores)
    norm_scores = scores_array/255
    seg_mask = np.where(norm_scores > 0.8, 1, 0)
    
    img_array = np.array(to_pil_image(input_tensor.cpu()))
    masked_image = img_array[seg_mask == 1]

    kmeans = KMeans(n_clusters=3)
    kmeans.fit(masked_image)
    colors = kmeans.cluster_centers_

    unique, counts = np.unique(kmeans.labels_, return_counts=True)

    idx = np.where(counts == counts.max())[0]

    # converting extracted rgb value to cie
    s = tuple(map(int,kmeans.cluster_centers_[idx][0]))
    s = convert_rgb2lab(s)

    # knnclassifier using cie values and distance metric
    predicted_class = knn_classifier.predict(np.array(s).reshape(1, -1))

    return predicted_class[0]

In [30]:
root_dir = "../data/myntradataset/images"
true = []
predicted = []
for i in test.id:
    try:
        img_path = os.path.join(root_dir, str(i)) + ".jpg"
        img = Image.open(img_path).convert("RGB")

    except Exception as e:
        print(e)
        continue
    
    predicted_label = find_color(img)
    true_label = test[test.id == i]["baseColour"]
    true.append(true_label)
    predicted.append(predicted_label)



[Errno 2] No such file or directory: '../data/myntradataset/images/12347.jpg'




[Errno 2] No such file or directory: '../data/myntradataset/images/39403.jpg'




In [31]:
accuracy_score(true, predicted)

0.2710069835548547

# 2. Using CIE for KMeans clustering

Previously, KMeans clustering to obtain the 3 most dominant colors in the product was done in the RGB color space. Now, trying out KMeans in the CIE space.

In [10]:
# lab color map
lab_df = pd.read_csv("../data/lab_color_map.csv")
lab_df

Unnamed: 0,l1,l2,l3,target_color
0,0.0,0.0,0.0,Black
1,100.0,-0.002455,0.004653,White
2,48.373518,7.713933,-53.080052,Blue
3,29.866009,10.324204,12.794989,Brown
4,35.724149,-0.001095,0.002075,Grey
5,53.240588,80.092308,67.202751,Red
6,87.735099,-86.18303,83.179703,Green
7,69.590784,51.670535,0.505159,Pink
8,50.104911,65.49662,-41.474739,Purple
9,97.139507,-21.554681,94.478122,Yellow


In [11]:
# fitting classifier
X = lab_df.drop(["target_color"], axis=1)
y = lab_df["target_color"]

knn_classifier = KNeighborsClassifier(n_neighbors=1,
                                    metric=color.deltaE_cie76)

knn_classifier.fit(X, y)

KNeighborsClassifier(algorithm='auto', leaf_size=30,
                     metric=<function deltaE_cie76 at 0x7ff7e6fd64d0>,
                     metric_params=None, n_jobs=None, n_neighbors=1, p=2,
                     weights='uniform')

In [12]:
def find_color(img):
    """
    returns the predicted color category, given a PIL Image object
    """
    input_tensor = resize(img, (112, 112))
    input_tensor = tsfm(input_tensor).to(device)

    out = mc_model(input_tensor.unsqueeze(0))
    cams = cam_extractor(out.squeeze(0).argmax().item(), out)

    scores = cams[0].cpu()
    resized_scores = resize(to_pil_image(scores.squeeze(0)), size=(112, 112))
    scores_array = np.array(resized_scores)
    norm_scores = scores_array/255
    seg_mask = np.where(norm_scores > 0.8, 1, 0)
    
    img_array = np.array(to_pil_image(input_tensor.cpu()))
    masked_image = img_array[seg_mask == 1]
    cie_img = color.rgb2lab(np.expand_dims(masked_image, 0))
    
    # Finding dominant color from the CIE space
    kmeans = KMeans(n_clusters=3, random_state=42)
    kmeans.fit(np.squeeze(cie_img))
    colors = kmeans.cluster_centers_

    unique, counts = np.unique(kmeans.labels_, return_counts=True)

    idx = np.where(counts == counts.max())[0]

    s = tuple(kmeans.cluster_centers_[idx][0])

    predicted_class = knn_classifier.predict(np.array(s).reshape(1, -1))

    return predicted_class[0]

In [16]:
root_dir = "../data/myntradataset/images"
true = []
predicted = []
for i in test.id:
    try:
        img_path = os.path.join(root_dir, str(i)) + ".jpg"
        img = Image.open(img_path).convert("RGB")

    except Exception as e:
        print(e)
        continue
    
    predicted_label = find_color(img)
    true_label = test[test.id == i]["baseColour"]
    true.append(true_label)
    predicted.append(predicted_label)



[Errno 2] No such file or directory: '../data/myntradataset/images/12347.jpg'




[Errno 2] No such file or directory: '../data/myntradataset/images/39403.jpg'




In [17]:
accuracy_score(true, predicted)

0.2716828114440189

# 3. Using extracted CIE values to train the KNNClassifier

In [67]:
cols = ["Black", "White", "Blue", "Brown", "Grey", "Red", "Green", "Pink", "Purple", "Yellow", "Maroon", "Orange"]
train_mask = train.baseColour.isin(cols)
train_sub = train[train_mask]
train_sub.baseColour.value_counts()

Black     8755
White     4984
Blue      4426
Brown     3145
Grey      2467
Red       2209
Green     1903
Pink      1674
Purple    1476
Yellow     700
Maroon     523
Orange     477
Name: baseColour, dtype: int64

In [72]:
# using 100 random samples from each major color to train the KNNClassifier
sample = train_sub.groupby("baseColour")[["id", "baseColour"]].apply(lambda x: x.sample(100))
sample = sample.reset_index(drop=True)
sample

Unnamed: 0,id,baseColour
0,29633,Black
1,35143,Black
2,15274,Black
3,16630,Black
4,29191,Black
...,...,...
1195,4897,Yellow
1196,35889,Yellow
1197,12079,Yellow
1198,25928,Yellow


In [73]:
sample.baseColour.value_counts()

Yellow    100
Maroon    100
Red       100
Blue      100
Grey      100
Black     100
White     100
Orange    100
Pink      100
Brown     100
Purple    100
Green     100
Name: baseColour, dtype: int64

In [82]:
# extracting cie values from sampled dataset
def find_color(img):
    """
    returns the predicted color category, given a PIL Image object
    """
    input_tensor = resize(img, (112, 112))
    input_tensor = tsfm(input_tensor).to(device)

    out = mc_model(input_tensor.unsqueeze(0))
    cams = cam_extractor(out.squeeze(0).argmax().item(), out)

    scores = cams[0].cpu()
    resized_scores = resize(to_pil_image(scores.squeeze(0)), size=(112, 112))
    scores_array = np.array(resized_scores)
    norm_scores = scores_array/255
    seg_mask = np.where(norm_scores > 0.8, 1, 0)
    
    img_array = np.array(to_pil_image(input_tensor.cpu()))
    masked_image = img_array[seg_mask == 1]

    kmeans = KMeans(n_clusters=3)
    kmeans.fit(masked_image)
    colors = kmeans.cluster_centers_

    unique, counts = np.unique(kmeans.labels_, return_counts=True)

    idx = np.where(counts == counts.max())[0]

    # converting extracted rgb value to cie
    s = tuple(map(int,kmeans.cluster_centers_[idx][0]))
    s = convert_rgb2lab(s)

    return s

In [83]:
# new dataframe with only cie values and target baseColour
sample_cie = pd.DataFrame(columns=["l1", "l2", "l3", "baseColour"], dtype="object")
sample_cie

Unnamed: 0,l1,l2,l3,baseColour


In [86]:
root_dir = "../data/myntradataset/images"
for i in sample.id:
    try:
        img_path = os.path.join(root_dir, str(i)) + ".jpg"
        img = Image.open(img_path).convert("RGB")

    except Exception as e:
        print(e)
        continue
        
    c = extract_cie(img)
    row = list(c) + [sample[sample.id == i].baseColour.values[0]]
    
    sample_cie.loc[len(sample_cie)] = row

In [87]:
# Features and target
X = sample_cie.drop(["baseColour"], axis=1)
y = sample_cie["baseColour"]

# KNN classifier with custom metric
knn_classifier = KNeighborsClassifier(n_neighbors=1,
                                    metric=color.deltaE_cie76)

# sample_cie the classifier
knn_classifier.fit(X, y)

KNeighborsClassifier(algorithm='auto', leaf_size=30,
                     metric=<function deltaE_cie76 at 0x7ff7e6fd64d0>,
                     metric_params=None, n_jobs=None, n_neighbors=1, p=2,
                     weights='uniform')

In [88]:
# evaluating on test data
def find_color(img):
    """
    returns the predicted color category, given a PIL Image object
    """
    input_tensor = resize(img, (112, 112))
    input_tensor = tsfm(input_tensor).to(device)

    out = mc_model(input_tensor.unsqueeze(0))
    cams = cam_extractor(out.squeeze(0).argmax().item(), out)

    scores = cams[0].cpu()
    resized_scores = resize(to_pil_image(scores.squeeze(0)), size=(112, 112))
    scores_array = np.array(resized_scores)
    norm_scores = scores_array/255
    seg_mask = np.where(norm_scores > 0.8, 1, 0)
    
    img_array = np.array(to_pil_image(input_tensor.cpu()))
    masked_image = img_array[seg_mask == 1]
    cie_img = color.rgb2lab(np.expand_dims(masked_image, 0))
    
    # Finding dominant color from the CIE space
    kmeans = KMeans(n_clusters=3, random_state=42)
    kmeans.fit(np.squeeze(cie_img))
    colors = kmeans.cluster_centers_

    unique, counts = np.unique(kmeans.labels_, return_counts=True)

    idx = np.where(counts == counts.max())[0]

    s = tuple(kmeans.cluster_centers_[idx][0])

    predicted_class = knn_classifier.predict(np.array(s).reshape(1, -1))

    return predicted_class[0]

In [92]:
root_dir = "../data/myntradataset/images"
true = []
predicted = []
for i in test.id:
    try:
        img_path = os.path.join(root_dir, str(i)) + ".jpg"
        img = Image.open(img_path).convert("RGB")

    except Exception as e:
        print(e)
        continue
    
    predicted_label = find_color(img)
    true_label = test[test.id == i]["baseColour"]
    true.append(true_label)
    predicted.append(predicted_label)



[Errno 2] No such file or directory: '../data/myntradataset/images/12347.jpg'




[Errno 2] No such file or directory: '../data/myntradataset/images/39403.jpg'




In [93]:
accuracy_score(true, predicted)

0.31989186753773374

# 4. Finding best value of k

In [136]:
from sklearn.model_selection import GridSearchCV

In [137]:
krange = [1, 5, 10, 20, 50, 70, 100, 130, 150, 170, 200]
krange

[1, 5, 10, 20, 50, 70, 100, 130, 150, 170, 200]

In [138]:
X = sample_cie.drop(["baseColour"], axis=1)
y = sample_cie["baseColour"]

In [139]:
# evaluating on test data
def find_color(img):
    """
    returns the predicted color category, given a PIL Image object
    """
    input_tensor = resize(img, (112, 112))
    input_tensor = tsfm(input_tensor).to(device)

    out = mc_model(input_tensor.unsqueeze(0))
    cams = cam_extractor(out.squeeze(0).argmax().item(), out)

    scores = cams[0].cpu()
    resized_scores = resize(to_pil_image(scores.squeeze(0)), size=(112, 112))
    scores_array = np.array(resized_scores)
    norm_scores = scores_array/255
    seg_mask = np.where(norm_scores > 0.8, 1, 0)
    
    img_array = np.array(to_pil_image(input_tensor.cpu()))
    masked_image = img_array[seg_mask == 1]
    cie_img = color.rgb2lab(np.expand_dims(masked_image, 0))
    
    # Finding dominant color from the CIE space
    kmeans = KMeans(n_clusters=3, random_state=42)
    kmeans.fit(np.squeeze(cie_img))
    colors = kmeans.cluster_centers_

    unique, counts = np.unique(kmeans.labels_, return_counts=True)

    idx = np.where(counts == counts.max())[0]

    s = tuple(kmeans.cluster_centers_[idx][0])

    predicted_class = knn_classifier.predict(np.array(s).reshape(1, -1))

    return predicted_class[0]

In [None]:
import pickle
acc = 0
path = "../models/best_knn.pkl"
for k in krange:
    knn_classifier = KNeighborsClassifier(n_neighbors=k,
                                         metric=color.deltaE_cie76)
    knn_classifier.fit(X, y)
    
    root_dir = "../data/myntradataset/images"
    true = []
    predicted = []
    for i in test.id:
        try:
            img_path = os.path.join(root_dir, str(i)) + ".jpg"
            img = Image.open(img_path).convert("RGB")

        except Exception as e:
            continue

        predicted_label = find_color(img)
        true_label = test[test.id == i]["baseColour"]
        true.append(true_label)
        predicted.append(predicted_label)
        
    curr_acc = accuracy_score(true, predicted)
    print(f"Accuracy with {k} neighbours: {curr_acc}")
    if curr_acc > acc:
        pickle.dump(knn_classifier, open(path, 'wb'))
        acc = curr_acc



Accuracy with 1 neighbours: 0.31989186753773374




Accuracy with 5 neighbours: 0.4000901103852219




Accuracy with 10 neighbours: 0.4219418788015319




Accuracy with 20 neighbours: 0.4120297364271232




In [143]:
krange = [50, 70, 100, 130, 150, 170, 200]
path = "../models/best_knn.pkl"
for k in krange:
    knn_classifier = KNeighborsClassifier(n_neighbors=k,
                                         metric=color.deltaE_cie76)
    knn_classifier.fit(X, y)
    
    root_dir = "../data/myntradataset/images"
    true = []
    predicted = []
    for i in test.id:
        try:
            img_path = os.path.join(root_dir, str(i)) + ".jpg"
            img = Image.open(img_path).convert("RGB")

        except Exception as e:
            continue

        predicted_label = find_color(img)
        true_label = test[test.id == i]["baseColour"]
        true.append(true_label)
        predicted.append(predicted_label)
        
    curr_acc = accuracy_score(true, predicted)
    print(f"Accuracy with {k} neighbours: {curr_acc}")
    if curr_acc > acc:
        pickle.dump(knn_classifier, open(path, 'wb'))
        acc = curr_acc



Accuracy with 50 neighbours: 0.3908537958999775




Accuracy with 70 neighbours: 0.38161748141473306




Accuracy with 100 neighbours: 0.3640459562964632




Accuracy with 130 neighbours: 0.3737328227078171




Accuracy with 150 neighbours: 0.3710295111511602




Accuracy with 170 neighbours: 0.36697454381617484




Accuracy with 200 neighbours: 0.3651723361117369


In [144]:
acc

0.4219418788015319

## Evaluation on major colors only

In [148]:
# using the best model
knn_classifier = pickle.load(open("../models/best_knn.pkl", "rb"))
knn_classifier

KNeighborsClassifier(algorithm='auto', leaf_size=30,
                     metric=<function deltaE_cie76 at 0x7ff7e6fd64d0>,
                     metric_params=None, n_jobs=None, n_neighbors=10, p=2,
                     weights='uniform')

In [152]:
# using only test data with major colors
cols = ["Black", "White", "Blue", "Brown", "Grey", "Red", "Green", "Pink", "Purple", "Yellow", "Maroon", "Orange"]
test_mask = test.baseColour.isin(cols)
test_sub = test[test_mask]
test_sub.baseColour.value_counts()

Black     973
White     554
Blue      492
Brown     349
Grey      274
Red       246
Green     212
Pink      186
Purple    164
Yellow     78
Maroon     58
Orange     53
Name: baseColour, dtype: int64

In [153]:
# finding accuracy 
root_dir = "../data/myntradataset/images"
true = []
predicted = []
for i in test_sub.id:
    try:
        img_path = os.path.join(root_dir, str(i)) + ".jpg"
        img = Image.open(img_path).convert("RGB")

    except Exception as e:
        print(e)
        continue
    
    predicted_label = find_color(img)
    true_label = test_sub[test_sub.id == i]["baseColour"]
    true.append(true_label)
    predicted.append(predicted_label)
    
print(accuracy_score(true, predicted))



[Errno 2] No such file or directory: '../data/myntradataset/images/12347.jpg'




[Errno 2] No such file or directory: '../data/myntradataset/images/39403.jpg'




0.5149848776464119


# 5. Finding k for full training dataset, all colors

In [156]:
train

Unnamed: 0,id,masterCategory,subCategory,articleType,baseColour
19396,45629,Apparel,Innerwear,Briefs,Black
41118,33267,Apparel,Bottomwear,Shorts,Blue
10913,32849,Footwear,Shoes,Heels,Brown
24585,33642,Footwear,Flip Flops,Flip Flops,Navy Blue
22821,30058,Accessories,Watches,Watches,Brown
...,...,...,...,...,...
4356,22596,Apparel,Topwear,Tshirts,Red
15481,38163,Accessories,Eyewear,Sunglasses,White
2978,47197,Footwear,Sandal,Sandals,Black
34603,6728,Footwear,Shoes,Sports Shoes,White


In [157]:
# extracting cie values for entire training data
def extract_cie(img):
    """
    returns the extracted CIE value, given a PIL Image object
    """
    input_tensor = resize(img, (112, 112))
    input_tensor = tsfm(input_tensor).to(device)

    out = mc_model(input_tensor.unsqueeze(0))
    cams = cam_extractor(out.squeeze(0).argmax().item(), out)

    scores = cams[0].cpu()
    resized_scores = resize(to_pil_image(scores.squeeze(0)), size=(112, 112))
    scores_array = np.array(resized_scores)
    norm_scores = scores_array/255
    seg_mask = np.where(norm_scores > 0.8, 1, 0)
    
    img_array = np.array(to_pil_image(input_tensor.cpu()))
    masked_image = img_array[seg_mask == 1]
    cie_img = color.rgb2lab(np.expand_dims(masked_image, 0))
    
    kmeans = KMeans(n_clusters=3)
    kmeans.fit(np.squeeze(cie_img))
    colors = kmeans.cluster_centers_

    unique, counts = np.unique(kmeans.labels_, return_counts=True)

    idx = np.where(counts == counts.max())[0]

    s = tuple(map(int,kmeans.cluster_centers_[idx][0]))
    
    return s

In [167]:
cie_df = pd.DataFrame(columns=["id", "l1", "l2", "l3", "baseColour"], dtype="object")
cie_df

Unnamed: 0,id,l1,l2,l3,baseColour


In [168]:
root_dir = "../data/myntradataset/images"
for i in train[train.baseColour != "Multi"].id:
    try:
        img_path = os.path.join(root_dir, str(i)) + ".jpg"
        img = Image.open(img_path).convert("RGB")

    except Exception as e:
        print(e)
        continue
        
    c = extract_cie(img)
    row = [i] + list(c) + [train[train.id == i].baseColour.values[0]]
    
    cie_df.loc[len(cie_df)] = row



[Errno 2] No such file or directory: '../data/myntradataset/images/39401.jpg'




[Errno 2] No such file or directory: '../data/myntradataset/images/39425.jpg'




[Errno 2] No such file or directory: '../data/myntradataset/images/39410.jpg'




In [169]:
cie_df.to_csv("../data/cie_df.csv", index=False)

In [170]:
cie_df

Unnamed: 0,id,l1,l2,l3,baseColour
0,45629,74,14,24,Black
1,33267,54,-5,-31,Blue
2,32849,94,0,0,Brown
3,33642,16,3,-8,Navy Blue
4,30058,18,10,13,Brown
...,...,...,...,...,...
39605,22596,47,62,39,Red
39606,38163,97,0,0,White
39607,47197,18,0,0,Black
39608,6728,92,0,0,White


In [172]:
import warnings
warnings.filterwarnings("ignore")

In [173]:
# search for best value of k on full training data
X = cie_df.drop(["id", "baseColour"], axis=1)
y = cie_df["baseColour"]

acc = 0
krange = [1, 10, 20, 40, 60, 80, 100, 150, 200, 250, 300, 350, 400]
path = "../models/best_knn_full.pkl"
for k in krange:
    knn_classifier = KNeighborsClassifier(n_neighbors=k,
                                         metric=color.deltaE_cie76)
    knn_classifier.fit(X, y)
    
    root_dir = "../data/myntradataset/images"
    true = []
    predicted = []
    for i in test.id:
        try:
            img_path = os.path.join(root_dir, str(i)) + ".jpg"
            img = Image.open(img_path).convert("RGB")

        except Exception as e:
            continue

        predicted_label = find_color(img)
        true_label = test[test.id == i]["baseColour"]
        true.append(true_label)
        predicted.append(predicted_label)
        
    curr_acc = accuracy_score(true, predicted)
    print(f"Accuracy with {k} neighbours: {curr_acc}")
    if curr_acc > acc:
        pickle.dump(knn_classifier, open(path, 'wb'))
        acc = curr_acc

Accuracy with 1 neighbours: 0.3421941878801532
Accuracy with 10 neighbours: 0.46677179544942554
Accuracy with 20 neighbours: 0.475107006082451
Accuracy with 40 neighbours: 0.4847938724938049
Accuracy with 60 neighbours: 0.4845685965307502
Accuracy with 80 neighbours: 0.4802883532327101
Accuracy with 100 neighbours: 0.47961252534354587
Accuracy with 150 neighbours: 0.4748817301193963
Accuracy with 200 neighbours: 0.4697003829691372
Accuracy with 250 neighbours: 0.47645866186077945
Accuracy with 300 neighbours: 0.4733047983780131
Accuracy with 350 neighbours: 0.47127731471052037
Accuracy with 400 neighbours: 0.4669970714124803


In [174]:
acc

0.4847938724938049