In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install wandb -qqq
import wandb

# Login to wandb
wandb.login()
use_wandb = True

In [None]:
DATA_LOC = "/content/drive/MyDrive/Rhexis/datasets/test_pulls"
REPO_LOC = "/content/drive/MyDrive/Projects/rhexis-trajectory"

In [None]:
# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
import sys
sys.path.insert(0,f"{REPO_LOC}/Trajectory_Classification")
from utils import *
%load_ext autoreload
%autoreload 2

# Load data

In [None]:
names, path_dfs, labels, sizes = load_all_pulls(DATA_LOC)

In [None]:
path_dfs[0]

In [None]:
data = np.stack([featurize_pull(pull) for pull in path_dfs], axis=0)
data[0]

In [None]:
X, y = np.stack([featurize_pull(pull, 15) for pull in path_dfs], axis=0), np.array(labels)
X[0]

In [None]:
from sklearn.model_selection import StratifiedShuffleSplit

# Keep deterministic
np.random.seed(13)
sss = StratifiedShuffleSplit(1, test_size=.2)
train_ind, test_ind = next(sss.split(X, y))
X_train, X_test = X[train_ind], X[test_ind]
y_train, y_test = y[train_ind], y[test_ind]
# Print to check class balance
y_train, y_test

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn import preprocessing
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

NUM_ANGLE_BINS = list(range(3, 30))
for num_bins in NUM_ANGLE_BINS:
  wandb.init(
    # Set the project where this run will be logged
    project="rhexis-classification-temp-logreg",
    entity="rhexis-trajectory",
    # We pass a run name (otherwise it'll be randomly assigned, like sunshine-lollypop-10)
    name="temporal_classification",
    # Track hyperparameters and run metadata
    config={
      "num_angle_bins":num_bins,
    })

  X, y = np.stack([featurize_pull(pull, num_bins) for pull in path_dfs], axis=0), np.array(labels)
  np.random.seed(13)
  sss = StratifiedShuffleSplit(1, test_size=.2)
  train_ind, test_ind = next(sss.split(X, y))
  X_train, X_test = X[train_ind], X[test_ind]
  y_train, y_test = y[train_ind], y[test_ind]
  clf = LogisticRegression(random_state=0, multi_class="multinomial", max_iter=1000)
  pipe = make_pipeline(StandardScaler(), clf)
  # print(cross_val_score(clf, X_train, y_train, cv=5, scoring='accuracy'))
  # print(cross_val_score(pipe, X_train, y_train, cv=4, scoring='accuracy'))
  pipe.fit(X_train, y_train)
  y_pred = pipe.predict(X_test)
  wandb.sklearn.plot_learning_curve(clf, X_train, y_train)
  wandb.finish()

In [None]:
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

clf = QuadraticDiscriminantAnalysis()
pipe = make_pipeline(StandardScaler(), clf)
cross_val_score(pipe, X_train, y_train, cv=5, scoring='accuracy')

In [None]:
from sklearn.cluster import MiniBatchKMeans
from sklearn.metrics import accuracy_score

BATCH_SIZE = 3
kmeans = MiniBatchKMeans(n_clusters=3,
                          random_state=0,
                          batch_size=BATCH_SIZE)

pipe = make_pipeline(StandardScaler(), kmeans)
for i in range(0, len(X_train), BATCH_SIZE):
  pipe.partial_fit(X_train[i:i+BATCH_SIZE])

y_pred = kmeans.predict(X_test)
# accuracy_score(y_test, y_pred)

In [None]:
from sklearn.neural_network import MLPClassifier

clf = MLPClassifier(solver='lbfgs', alpha=1e-5,
                     hidden_layer_sizes=(50, 25), random_state=1)
pipe = make_pipeline(StandardScaler(),
                     clf)
pipe.fit(X_train, y_train)
cross_val_score(pipe, X_train, y_train, cv=5, scoring='accuracy')
