In [80]:
import torch
import numpy as np
from transformers import ViTFeatureExtractor, ViTModel
from PIL import Image
from scipy.spatial.distance import mahalanobis, euclidean
from sklearn.metrics import accuracy_score
from scipy.linalg import inv
import scipy
from copy import deepcopy

In [81]:
ds1_train = np.load("ds1_train.npz", allow_pickle=True)
x1_train = ds1_train['x']
y1_train = ds1_train['y']

ds1_eval = np.load("ds1_eval.npz", allow_pickle=True)
x1_test = ds1_eval['x']
y1_test = ds1_eval['y']

In [82]:
class_means = np.empty((20, 20), dtype=object)
class_cov = np.empty((20, 20), dtype=object)
# class_cov_inv = [None]*10
# class_counts = [None]*10

scores = np.array([[0.0]*20]*20)

num_classes = len(np.unique(y1_train))

In [85]:
def predict(X_test, mean):
    predictions = []
    for x in X_test:
        distances = []
        for cls in range(num_classes):
            distance = euclidean(x, mean)
            distances.append(distance)
        predictions.append(np.argmin(distances))
    return np.array(predictions)

In [86]:
def predict2(x_test):
    predictions = []
    for x in x_test:
        distances = []
        for cls in range(num_classes):
            distance = min([euclidean(x, class_means[cls][ds]) for ds in range(len(class_means[cls])) if class_means[cls][ds] is not None])
            distances.append(distance)
        predictions.append(np.argmin(distances))
    return np.array(predictions)

In [87]:
meanvecs = []

In [88]:

for cls in range(num_classes):
    class_samples = x1_train[y1_train == cls]
    mean_vec = np.mean(class_samples, axis=0)
    class_means[cls][0] = mean_vec
    # meanvecs.append(mean_vec)
    cov_matrix = np.cov(class_samples, rowvar=False) + (1e-6 * np.eye(768)) # Regularized covariance matrix

    class_cov[cls][0] = cov_matrix


y1_pred = predict2(x1_test)
accuracy = accuracy_score(y1_test, y1_pred)
scores[0][0] = round(accuracy, 4)
print(f"Accuracy : {accuracy: .2%}")


Accuracy :  95.84%


In [89]:
last_cov = None
last_i = 0
last_class = 0
last_tmp_count = None

In [90]:
k = 0.5

for i in range(2,21):
    # last_i = i
    ds_train = np.load(f"ds{i}_train.npz", allow_pickle=True)
    x_train = ds_train['x']
    y_train = predict2(x_train)
    

    # tmp_count = [None]*10
    # tmp_mean = [None]*10
    # tmp_cov = [None]*10
    # tmp_cov_inv = [None]*10
    for cls in range(num_classes):
        # last_class = cls
        class_samples = x_train[y_train == cls]
        synthetic_samples = []
        for ds in range(1,i):
            synthetic_samples += np.random.multivariate_normal(class_means[cls][ds-1], class_cov[cls][ds-1], 250).tolist()

        good_samples = class_samples.tolist() + synthetic_samples

        # tmp_count[cls] = len(good_samples)
        # print(i, cls, tmp_count[cls])
        curr_mean_vec = np.mean(good_samples, axis=0)
        class_means[cls][i-1] = curr_mean_vec

        curr_cov_matrix = np.cov(good_samples, rowvar=False) + 1e-6 * np.eye(768)
        # last_cov = curr_cov_matrix
        inv_cov = inv(curr_cov_matrix)
        class_cov[cls][i-1] = curr_cov_matrix
        # class_cov_inv[cls] = inv_cov

    # print(tmp_count)

    for eval_set_no in range(1,i+1):
        eval_set = np.load(f"ds{eval_set_no}_eval.npz", allow_pickle=True)
        eval_x = eval_set['x']
        eval_y = eval_set['y']

        pred_y = predict2(eval_x)
        accuracy = accuracy_score(eval_y, pred_y)
        scores[i-1][eval_set_no-1] = accuracy

        



In [102]:
import pandas as pd
# display(pd.DataFrame(scores))

df = pd.DataFrame(scores*100)
df.index = [f"f_{i+1}" for i in range(20)]
df.columns = [f"D{i+1}" for i in range(20)]
#display(df)
display(df.iloc[:10, :10])
print("Accuracy of models for task 1.1")

Unnamed: 0,D1,D2,D3,D4,D5,D6,D7,D8,D9,D10
f_1,95.84,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
f_2,95.96,95.64,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
f_3,96.0,95.76,95.36,0.0,0.0,0.0,0.0,0.0,0.0,0.0
f_4,95.88,95.68,95.36,95.68,0.0,0.0,0.0,0.0,0.0,0.0
f_5,95.84,95.68,95.36,95.68,95.72,0.0,0.0,0.0,0.0,0.0
f_6,95.84,95.64,95.36,95.76,95.72,95.96,0.0,0.0,0.0,0.0
f_7,95.76,95.64,95.4,95.84,95.76,96.0,95.4,0.0,0.0,0.0
f_8,95.76,95.64,95.4,95.84,95.76,96.0,95.36,95.28,0.0,0.0
f_9,95.8,95.68,95.44,95.84,95.8,96.0,95.36,95.32,96.16,0.0
f_10,95.8,95.68,95.44,95.84,95.8,96.0,95.36,95.36,96.16,96.0


Accuracy of models for task 1.1


In [103]:
# display(pd.DataFrame(scores[10:20, 10:20]))

In [104]:
df = pd.DataFrame(scores*100)
df.index = [f"f_{i+1}" for i in range(20)]
df.columns = [f"D{i+1}" for i in range(20)]
#display(df)
display(df.iloc[10:, :20])
print("accuracy of models for task 1.2")

Unnamed: 0,D1,D2,D3,D4,D5,D6,D7,D8,D9,D10,D11,D12,D13,D14,D15,D16,D17,D18,D19,D20
f_11,95.84,95.64,95.28,95.8,95.64,95.96,95.36,95.48,96.04,95.96,82.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
f_12,95.72,95.6,95.24,95.76,95.64,95.84,95.36,95.48,96.08,95.92,82.04,73.56,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
f_13,95.72,95.6,95.24,95.76,95.64,95.84,95.36,95.4,96.04,95.96,82.04,73.6,88.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0
f_14,95.72,95.6,95.24,95.76,95.64,95.84,95.36,95.4,96.04,95.96,82.04,73.6,88.08,92.52,0.0,0.0,0.0,0.0,0.0,0.0
f_15,95.72,95.6,95.24,95.76,95.64,95.84,95.36,95.4,96.04,95.96,82.04,73.6,88.08,92.52,94.4,0.0,0.0,0.0,0.0,0.0
f_16,95.72,95.6,95.28,95.76,95.68,95.88,95.36,95.4,96.0,95.96,82.04,73.6,88.08,92.52,94.52,87.84,0.0,0.0,0.0,0.0
f_17,95.72,95.6,95.28,95.64,95.64,95.88,95.36,95.44,95.96,95.96,82.0,73.6,88.08,92.56,94.48,87.84,83.84,0.0,0.0,0.0
f_18,95.76,95.64,95.28,95.64,95.64,95.88,95.36,95.44,95.92,95.96,82.0,73.6,88.04,92.56,94.44,87.84,83.84,84.44,0.0,0.0
f_19,95.76,95.64,95.2,95.64,95.64,95.88,95.36,95.4,95.88,95.96,82.0,73.6,88.04,92.56,94.44,87.92,83.72,84.44,78.04,0.0
f_20,95.76,95.64,95.2,95.64,95.64,95.88,95.36,95.4,95.88,95.96,81.96,73.6,88.04,92.56,94.44,87.88,83.72,84.44,78.04,92.32


accuracy of models for task 1.2
