In [None]:
# Mount Google Drive
from google.colab import drive

drive.mount("/content/drive")
%load_ext autoreload
%autoreload 2

In [None]:
REPO_LOC = "/content/drive/MyDrive/Projects/rhexis-trajectory"

In [None]:
# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
import sys

sys.path.insert(0, f"{REPO_LOC}/Trajectory_Classification")
from utils import *

%load_ext autoreload
%autoreload 2

# Exploratory Visualization

In [None]:
X, y = get_pupil_std_data_traj((100, 100), False)
for i, m in enumerate(X):
    plt.figure()
    plt.imshow(m)
    # plt.show()
    if i == 4:
        plt.savefig("TrajectoryPaths.svg", format="svg", dpi=1200)

## (OLD) Full Trajectories

In [None]:
# Plot PGY2 trajectory
for i in range(max(pgy2["pull"]) + 1):
    plt.scatter(pgy2[pgy2["pull"] == i]["x"], pgy2[pgy2["pull"] == i]["y"])

In [None]:
# Plot PGY4 trajectory
for i in range(max(pgy4["pull"]) + 1):
    plt.scatter(pgy4[pgy4["pull"] == i]["x"], pgy4[pgy4["pull"] == i]["y"])

In [None]:
# Plot expert trajectory
for i in range(max(expert["pull"]) + 1):
    plt.scatter(expert[expert["pull"] == i]["x"], expert[expert["pull"] == i]["y"])

## (OLD) Individual Pulls

In [None]:
# Plot all 4 pulls in expert trajectory
for i in range(max(expert["pull"]) + 1):
    plt.figure()
    plt.scatter(expert[expert["pull"] == i]["x"], expert[expert["pull"] == i]["y"])
plt.show()

In [None]:
# Plot all PGY4 trajectories
for i in range(max(pgy4["pull"]) + 1):
    plt.figure()
    plt.scatter(pgy4[pgy4["pull"] == i]["x"], pgy4[pgy4["pull"] == i]["y"])
plt.show()

In [None]:
# Plot all PGY2 trajectories
for i in range(max(pgy2["pull"]) + 1):
    plt.figure()
    plt.scatter(pgy2[pgy2["pull"] == i]["x"], pgy2[pgy2["pull"] == i]["y"])
plt.show()

In [None]:
from sklearn.decomposition import PCA

In [None]:
def pca(n_components):
    pca = PCA(n_components=n_components, svd_solver="randomized", whiten=True).fit(
        X_train
    )

# Model Fitting/Prediction

In [None]:
X, y = get_pupil_std_data_traj((100, 100), False)

In [None]:
from sklearn.model_selection import StratifiedShuffleSplit

# Keep deterministic
np.random.seed(40)
sss = StratifiedShuffleSplit(1, test_size=0.2)
train_ind, test_ind = next(sss.split(X, y))
X_train, X_test = X[train_ind], X[test_ind]
y_train, y_test = y[train_ind], y[test_ind]
# Print to check class balance
y_train, y_test

## PCA

In [None]:
def process_for_pca(data):
    """TODO"""
    return np.reshape(data, (-1, 10000))

In [None]:
from sklearn.decomposition import PCA, KernelPCA


def pca(data, n_components):
    pca = PCA(n_components=n_components, svd_solver="randomized", whiten=True).fit(data)
    return pca, pca.components_.reshape((n_components, 100, 100))

In [None]:
pca_obj, components = pca(process_for_pca(X_train), 4)
# pca_obj.explained_variance_
X_train_pca = pca_obj.transform(process_for_pca(X_train))
for i in components:
    plt.imshow(i)
    plt.show()

In [None]:
# components = pca(process_for_pca(X_train), 5)
# for i in range(len(components)):
#   plt.imshow(components[i])
#   plt.show()

## Logistic Model (Baseline)

In [None]:
from sklearn.linear_model import LogisticRegression

clf = LogisticRegression(random_state=0, multi_class="multinomial", max_iter=1000)
param_grid = {
    "kernelpca__n_components": list(range(2, 12)) + ["None"],
    "kernelpca__kernel": ["linear", "poly", "rbf", "sigmoid", "cosine", "precomputed"],
}
search = grid_search_img(clf, True, param_grid, X_train, y_train)

In [None]:
search.best_score_

In [None]:
search.best_params_

In [None]:
search.score(X_test, y_test)

## Quadratic GDA

This does not perform well

In [None]:
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

clf = QuadraticDiscriminantAnalysis()
param_grid = {
    "kernelpca__n_components": list(range(2, 12)) + ["None"],
    "kernelpca__kernel": ["linear", "poly", "rbf", "sigmoid", "cosine", "precomputed"],
    "quadraticdiscriminantanalysis__reg_param": [
        1e-6,
        1e-3,
        1e-2,
        1e-1,
        2e-1,
        3e-1,
        4e-1,
        5e-1,
    ],
}
search = grid_search_img(clf, True, param_grid, X_train, y_train)

In [None]:
search.best_score_

In [None]:
search.best_params_

In [None]:
search.score(X_test, y_test)

## Neural Network

In [None]:
from sklearn.neural_network import MLPClassifier

clf = MLPClassifier(
    solver="adam", alpha=1e-5, hidden_layer_sizes=(1000, 100, 100), random_state=1
)
param_grid = {
    "kernelpca__n_components": list(range(2, 12)) + ["None"],
    "kernelpca__kernel": ["linear", "poly", "rbf", "sigmoid", "cosine", "precomputed"],
    "mlpclassifier__learning_rate_init": [1e-9, 1e-6, 1e-3, 1e-2, 1e-1],
}
search = grid_search_img(clf, True, param_grid, X_train, y_train)

In [None]:
search.best_score_

In [None]:
search.best_params_

In [None]:
search.score(X_test, y_test)

## Unsupervised clustering

In [None]:
from sklearn.cluster import MiniBatchKMeans
from sklearn.metrics import accuracy_score

BATCH_SIZE = 3
kmeans = MiniBatchKMeans(n_clusters=3, random_state=0, batch_size=BATCH_SIZE)
for i in range(0, len(X_train), BATCH_SIZE):
    kmeans.partial_fit(X_train[i : i + BATCH_SIZE])

y_pred = kmeans.predict(X_test)
accuracy_score(y_test, y_pred)