In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from os.path import join
import numpy as np
from tqdm import tqdm

from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt

from constants import DATA_DIR, idx_to_class
from classify import STL, BoWClassifier, relevant_classes

sys.version_info(major=3, minor=9, micro=7, releaselevel='final', serial=0)


### Dataset object

In [3]:
TRAIN_X_PATH = join(DATA_DIR, "train_X.bin")
TRAIN_y_PATH = join(DATA_DIR, "train_y.bin")

TEST_X_PATH = join(DATA_DIR, "test_X.bin")
TEST_y_PATH = join(DATA_DIR, "test_y.bin")

### Run experiment

In [4]:
bow = BoWClassifier(n_clusters=500)

In [10]:
C_values = [0.1, 1.0, 5.0]
# C_values = [0.1]
kernel_values = ["poly", "rbf", "linear"]
# kernel_values = ["poly"]

In [11]:
df = pd.DataFrame(None, index=C_values, columns=kernel_values)

for C in C_values:
    for kernel in kernel_values:
        svm_args = dict(C=C, kernel=kernel)
        
        bow = BoWClassifier(n_clusters=500, svm_args=svm_args)

        bow.fit(
            train_data_path=TRAIN_X_PATH,
            train_label_path=TRAIN_y_PATH,
            show_steps=False,
        )
        class_wise_ap, accuracy, svm_features, svm_labels, svm_pred_labels = bow.evaluate(
            test_data_path=TEST_X_PATH,
            test_label_path=TEST_y_PATH,
            show_steps=False,
        )
        df.at[C, kernel] = class_wise_ap["mean"]

::::::::: Loaded dataset with images ((5000, 96, 96, 3)) and labels ((5000,)) ::::::::::


Extracting image features with SIFTDescriptorExtractor: 100%|██████████| 5000/5000 [00:09<00:00, 538.52it/s]                                


::::: Loading pre-saved k-means clustering model from ./checkpoints/kmeans_sift_500.pkl


Encoding features: 100%|██████████| 1250/1250 [00:01<00:00, 970.81it/s]                                                                     


............... SVM Trained with following results on the training set ...............
..... Model: OneVsRestClassifier(estimator=SVC(C=0.1, kernel='poly', probability=True))
..... Dataset: kMeans: X (93203, 128) SVM: X ((1250, 500))
..... Hyperparameters: Number of clusters 500
|                   |   airplane |   bird |      car |   horse |     ship |     mean |
|:------------------|-----------:|-------:|---------:|--------:|---------:|---------:|
| Average Precision |   0.997983 |      1 | 0.999952 |       1 | 0.992624 | 0.998112 |
...... Accuracy: 0.9984
::::::::: Loaded dataset with images ((8000, 96, 96, 3)) and labels ((8000,)) ::::::::::


Extracting image features with SIFTDescriptorExtractor: 100%|██████████| 8000/8000 [00:14<00:00, 537.21it/s]                                
Encoding features: 100%|██████████| 4000/4000 [00:07<00:00, 531.36it/s]                                                                     


............... SVM Trained with following results on the test set ...............
..... Model: OneVsRestClassifier(estimator=SVC(C=0.1, kernel='poly', probability=True))
..... Hyperparameters: Number of clusters 500
|                   |   airplane |     bird |      car |    horse |     ship |     mean |
|:------------------|-----------:|---------:|---------:|---------:|---------:|---------:|
| Average Precision |   0.563556 | 0.263398 | 0.336816 | 0.308961 | 0.512073 | 0.396961 |
...... Accuracy: 0.5095
::::::::: Loaded dataset with images ((5000, 96, 96, 3)) and labels ((5000,)) ::::::::::


Extracting image features with SIFTDescriptorExtractor: 100%|██████████| 5000/5000 [00:09<00:00, 540.47it/s]                                


::::: Loading pre-saved k-means clustering model from ./checkpoints/kmeans_sift_500.pkl


Encoding features: 100%|██████████| 1250/1250 [00:01<00:00, 844.53it/s]                                                                     


............... SVM Trained with following results on the training set ...............
..... Model: OneVsRestClassifier(estimator=SVC(C=0.1, probability=True))
..... Dataset: kMeans: X (93203, 128) SVM: X ((1250, 500))
..... Hyperparameters: Number of clusters 500
|                   |   airplane |     bird |      car |    horse |     ship |     mean |
|:------------------|-----------:|---------:|---------:|---------:|---------:|---------:|
| Average Precision |   0.985854 | 0.996045 | 0.994222 | 0.977967 | 0.994971 | 0.989812 |
...... Accuracy: 0.9712
::::::::: Loaded dataset with images ((8000, 96, 96, 3)) and labels ((8000,)) ::::::::::


Extracting image features with SIFTDescriptorExtractor: 100%|██████████| 8000/8000 [00:14<00:00, 537.49it/s]                                
Encoding features: 100%|██████████| 4000/4000 [00:09<00:00, 422.72it/s]                                                                     


............... SVM Trained with following results on the test set ...............
..... Model: OneVsRestClassifier(estimator=SVC(C=0.1, probability=True))
..... Hyperparameters: Number of clusters 500
|                   |   airplane |     bird |      car |    horse |    ship |     mean |
|:------------------|-----------:|---------:|---------:|---------:|--------:|---------:|
| Average Precision |   0.617142 | 0.473688 | 0.619896 | 0.607088 | 0.59924 | 0.583411 |
...... Accuracy: 0.54275
::::::::: Loaded dataset with images ((5000, 96, 96, 3)) and labels ((5000,)) ::::::::::


Extracting image features with SIFTDescriptorExtractor: 100%|██████████| 5000/5000 [00:09<00:00, 508.34it/s]                                


::::: Loading pre-saved k-means clustering model from ./checkpoints/kmeans_sift_500.pkl


Encoding features: 100%|██████████| 1250/1250 [00:02<00:00, 493.68it/s]                                                                     


............... SVM Trained with following results on the training set ...............
..... Model: OneVsRestClassifier(estimator=SVC(C=0.1, kernel='linear', probability=True))
..... Dataset: kMeans: X (93203, 128) SVM: X ((1250, 500))
..... Hyperparameters: Number of clusters 500
|                   |   airplane |     bird |      car |    horse |     ship |     mean |
|:------------------|-----------:|---------:|---------:|---------:|---------:|---------:|
| Average Precision |   0.722024 | 0.886087 | 0.855632 | 0.841195 | 0.686604 | 0.798309 |
...... Accuracy: 0.7024
::::::::: Loaded dataset with images ((8000, 96, 96, 3)) and labels ((8000,)) ::::::::::


Extracting image features with SIFTDescriptorExtractor: 100%|██████████| 8000/8000 [00:14<00:00, 540.24it/s]                                
Encoding features: 100%|██████████| 4000/4000 [00:04<00:00, 834.35it/s]                                                                     


............... SVM Trained with following results on the test set ...............
..... Model: OneVsRestClassifier(estimator=SVC(C=0.1, kernel='linear', probability=True))
..... Hyperparameters: Number of clusters 500
|                   |   airplane |    bird |      car |    horse |     ship |     mean |
|:------------------|-----------:|--------:|---------:|---------:|---------:|---------:|
| Average Precision |   0.569612 | 0.40642 | 0.541845 | 0.394998 | 0.552011 | 0.492977 |
...... Accuracy: 0.481
::::::::: Loaded dataset with images ((5000, 96, 96, 3)) and labels ((5000,)) ::::::::::


Extracting image features with SIFTDescriptorExtractor: 100%|██████████| 5000/5000 [00:09<00:00, 541.35it/s]                                


::::: Loading pre-saved k-means clustering model from ./checkpoints/kmeans_sift_500.pkl


Encoding features: 100%|██████████| 1250/1250 [00:01<00:00, 964.54it/s]                                                                     


............... SVM Trained with following results on the training set ...............
..... Model: OneVsRestClassifier(estimator=SVC(kernel='poly', probability=True))
..... Dataset: kMeans: X (93203, 128) SVM: X ((1250, 500))
..... Hyperparameters: Number of clusters 500
|                   |   airplane |   bird |      car |   horse |   ship |     mean |
|:------------------|-----------:|-------:|---------:|--------:|-------:|---------:|
| Average Precision |          1 |      1 | 0.999984 |       1 |      1 | 0.999997 |
...... Accuracy: 0.9992
::::::::: Loaded dataset with images ((8000, 96, 96, 3)) and labels ((8000,)) ::::::::::


Extracting image features with SIFTDescriptorExtractor: 100%|██████████| 8000/8000 [00:15<00:00, 532.27it/s]                                
Encoding features: 100%|██████████| 4000/4000 [00:04<00:00, 921.40it/s]                                                                     


............... SVM Trained with following results on the test set ...............
..... Model: OneVsRestClassifier(estimator=SVC(kernel='poly', probability=True))
..... Hyperparameters: Number of clusters 500
|                   |   airplane |     bird |     car |   horse |     ship |     mean |
|:------------------|-----------:|---------:|--------:|--------:|---------:|---------:|
| Average Precision |   0.546609 | 0.291055 | 0.34642 | 0.32363 | 0.544501 | 0.410443 |
...... Accuracy: 0.53125
::::::::: Loaded dataset with images ((5000, 96, 96, 3)) and labels ((5000,)) ::::::::::


Extracting image features with SIFTDescriptorExtractor: 100%|██████████| 5000/5000 [00:09<00:00, 534.62it/s]                                


::::: Loading pre-saved k-means clustering model from ./checkpoints/kmeans_sift_500.pkl


Encoding features: 100%|██████████| 1250/1250 [00:01<00:00, 955.29it/s]                                                                     


............... SVM Trained with following results on the training set ...............
..... Model: OneVsRestClassifier(estimator=SVC(probability=True))
..... Dataset: kMeans: X (93203, 128) SVM: X ((1250, 500))
..... Hyperparameters: Number of clusters 500
|                   |   airplane |     bird |      car |    horse |     ship |     mean |
|:------------------|-----------:|---------:|---------:|---------:|---------:|---------:|
| Average Precision |   0.998664 | 0.996061 | 0.994529 | 0.978707 | 0.995155 | 0.992623 |
...... Accuracy: 0.9744
::::::::: Loaded dataset with images ((8000, 96, 96, 3)) and labels ((8000,)) ::::::::::


Extracting image features with SIFTDescriptorExtractor: 100%|██████████| 8000/8000 [00:15<00:00, 507.89it/s]                                
Encoding features: 100%|██████████| 4000/4000 [00:05<00:00, 760.67it/s]                                                                     


............... SVM Trained with following results on the test set ...............
..... Model: OneVsRestClassifier(estimator=SVC(probability=True))
..... Hyperparameters: Number of clusters 500
|                   |   airplane |     bird |     car |    horse |     ship |     mean |
|:------------------|-----------:|---------:|--------:|---------:|---------:|---------:|
| Average Precision |   0.623879 | 0.473548 | 0.61977 | 0.606844 | 0.599587 | 0.584726 |
...... Accuracy: 0.5485
::::::::: Loaded dataset with images ((5000, 96, 96, 3)) and labels ((5000,)) ::::::::::


Extracting image features with SIFTDescriptorExtractor: 100%|██████████| 5000/5000 [00:09<00:00, 517.71it/s]                                


::::: Loading pre-saved k-means clustering model from ./checkpoints/kmeans_sift_500.pkl


Encoding features: 100%|██████████| 1250/1250 [00:02<00:00, 613.06it/s]                                                                     


............... SVM Trained with following results on the training set ...............
..... Model: OneVsRestClassifier(estimator=SVC(kernel='linear', probability=True))
..... Dataset: kMeans: X (93203, 128) SVM: X ((1250, 500))
..... Hyperparameters: Number of clusters 500
|                   |   airplane |     bird |      car |    horse |     ship |     mean |
|:------------------|-----------:|---------:|---------:|---------:|---------:|---------:|
| Average Precision |   0.721971 | 0.894924 | 0.858537 | 0.848622 | 0.687267 | 0.802264 |
...... Accuracy: 0.7072
::::::::: Loaded dataset with images ((8000, 96, 96, 3)) and labels ((8000,)) ::::::::::


Extracting image features with SIFTDescriptorExtractor: 100%|██████████| 8000/8000 [00:15<00:00, 517.72it/s]                                
Encoding features: 100%|██████████| 4000/4000 [00:06<00:00, 653.27it/s]                                                                     


............... SVM Trained with following results on the test set ...............
..... Model: OneVsRestClassifier(estimator=SVC(kernel='linear', probability=True))
..... Hyperparameters: Number of clusters 500
|                   |   airplane |     bird |      car |    horse |     ship |     mean |
|:------------------|-----------:|---------:|---------:|---------:|---------:|---------:|
| Average Precision |   0.569405 | 0.412724 | 0.538556 | 0.399037 | 0.552845 | 0.494513 |
...... Accuracy: 0.48475
::::::::: Loaded dataset with images ((5000, 96, 96, 3)) and labels ((5000,)) ::::::::::


Extracting image features with SIFTDescriptorExtractor: 100%|██████████| 5000/5000 [00:10<00:00, 498.39it/s]                                


::::: Loading pre-saved k-means clustering model from ./checkpoints/kmeans_sift_500.pkl


Encoding features: 100%|██████████| 1250/1250 [00:02<00:00, 434.73it/s]                                                                     


............... SVM Trained with following results on the training set ...............
..... Model: OneVsRestClassifier(estimator=SVC(C=5.0, kernel='poly', probability=True))
..... Dataset: kMeans: X (93203, 128) SVM: X ((1250, 500))
..... Hyperparameters: Number of clusters 500
|                   |   airplane |   bird |   car |   horse |   ship |   mean |
|:------------------|-----------:|-------:|------:|--------:|-------:|-------:|
| Average Precision |          1 |      1 |     1 |       1 |      1 |      1 |
...... Accuracy: 0.9992
::::::::: Loaded dataset with images ((8000, 96, 96, 3)) and labels ((8000,)) ::::::::::


Extracting image features with SIFTDescriptorExtractor: 100%|██████████| 8000/8000 [00:15<00:00, 524.92it/s]                                
Encoding features: 100%|██████████| 4000/4000 [00:05<00:00, 679.14it/s]                                                                     


............... SVM Trained with following results on the test set ...............
..... Model: OneVsRestClassifier(estimator=SVC(C=5.0, kernel='poly', probability=True))
..... Hyperparameters: Number of clusters 500
|                   |   airplane |    bird |      car |   horse |     ship |     mean |
|:------------------|-----------:|--------:|---------:|--------:|---------:|---------:|
| Average Precision |    0.53642 | 0.34554 | 0.405161 | 0.40179 | 0.560548 | 0.449892 |
...... Accuracy: 0.45375
::::::::: Loaded dataset with images ((5000, 96, 96, 3)) and labels ((5000,)) ::::::::::


Extracting image features with SIFTDescriptorExtractor: 100%|██████████| 5000/5000 [00:09<00:00, 516.43it/s]                                


::::: Loading pre-saved k-means clustering model from ./checkpoints/kmeans_sift_500.pkl


Encoding features: 100%|██████████| 1250/1250 [00:02<00:00, 624.63it/s]                                                                     


............... SVM Trained with following results on the training set ...............
..... Model: OneVsRestClassifier(estimator=SVC(C=5.0, probability=True))
..... Dataset: kMeans: X (93203, 128) SVM: X ((1250, 500))
..... Hyperparameters: Number of clusters 500
|                   |   airplane |   bird |   car |   horse |   ship |   mean |
|:------------------|-----------:|-------:|------:|--------:|-------:|-------:|
| Average Precision |          1 |      1 |     1 |       1 |      1 |      1 |
...... Accuracy: 0.9992
::::::::: Loaded dataset with images ((8000, 96, 96, 3)) and labels ((8000,)) ::::::::::


Extracting image features with SIFTDescriptorExtractor: 100%|██████████| 8000/8000 [00:15<00:00, 528.19it/s]                                
Encoding features: 100%|██████████| 4000/4000 [00:06<00:00, 623.72it/s]                                                                     


............... SVM Trained with following results on the test set ...............
..... Model: OneVsRestClassifier(estimator=SVC(C=5.0, probability=True))
..... Hyperparameters: Number of clusters 500
|                   |   airplane |     bird |      car |    horse |     ship |    mean |
|:------------------|-----------:|---------:|---------:|---------:|---------:|--------:|
| Average Precision |   0.607973 | 0.456794 | 0.620636 | 0.580245 | 0.570204 | 0.56717 |
...... Accuracy: 0.54225
::::::::: Loaded dataset with images ((5000, 96, 96, 3)) and labels ((5000,)) ::::::::::


Extracting image features with SIFTDescriptorExtractor: 100%|██████████| 5000/5000 [00:09<00:00, 512.31it/s]                                


::::: Loading pre-saved k-means clustering model from ./checkpoints/kmeans_sift_500.pkl


Encoding features: 100%|██████████| 1250/1250 [00:02<00:00, 496.62it/s]                                                                     


............... SVM Trained with following results on the training set ...............
..... Model: OneVsRestClassifier(estimator=SVC(C=5.0, kernel='linear', probability=True))
..... Dataset: kMeans: X (93203, 128) SVM: X ((1250, 500))
..... Hyperparameters: Number of clusters 500
|                   |   airplane |     bird |     car |    horse |     ship |     mean |
|:------------------|-----------:|---------:|--------:|---------:|---------:|---------:|
| Average Precision |   0.734972 | 0.896284 | 0.85821 | 0.847561 | 0.738848 | 0.815175 |
...... Accuracy: 0.7248
::::::::: Loaded dataset with images ((8000, 96, 96, 3)) and labels ((8000,)) ::::::::::


Extracting image features with SIFTDescriptorExtractor: 100%|██████████| 8000/8000 [00:15<00:00, 510.58it/s]                                
Encoding features: 100%|██████████| 4000/4000 [00:07<00:00, 547.38it/s]                                                                     


............... SVM Trained with following results on the test set ...............
..... Model: OneVsRestClassifier(estimator=SVC(C=5.0, kernel='linear', probability=True))
..... Hyperparameters: Number of clusters 500
|                   |   airplane |     bird |      car |    horse |     ship |     mean |
|:------------------|-----------:|---------:|---------:|---------:|---------:|---------:|
| Average Precision |   0.572696 | 0.412456 | 0.538236 | 0.399172 | 0.574708 | 0.499454 |
...... Accuracy: 0.49125


In [12]:
df

Unnamed: 0,poly,rbf,linear
0.1,0.396961,0.583411,0.492977
1.0,0.410443,0.584726,0.494513
5.0,0.449892,0.56717,0.499454


In [14]:
print(df.to_latex())

\begin{tabular}{llll}
\toprule
{} &      poly &       rbf &    linear \\
\midrule
0.1 &  0.396961 &  0.583411 &  0.492977 \\
1.0 &  0.410443 &  0.584726 &  0.494513 \\
5.0 &  0.449892 &   0.56717 &  0.499454 \\
\bottomrule
\end{tabular}

