In [39]:
import sys
import os
sys.path.insert(0, os.path.abspath('..\\diffpy'))
import numpy as np
import matplotlib.pyplot as plt
from features import *
import pandas as pd
import trajectories as traj
import msds as msds
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [55]:
bins = [np.arange(-300,301,30)]

x1, y1 = traj.walks2D(steps=100, N=1000, startrange=(-200,200))
M1, ft1, mft1, sft1 = calculateFeatures(x1, y1, 0.01, labelled=1, binned=bins)

x2, y2 = traj.walks2D(steps=100, N=1000, drift=(1.0, 1.0), theta=30, startrange=(-200,200))
M2, ft2, mft2, sft2 = calculateFeatures(x2, y2, 0.01, labelled=2, binned=bins)

x3, y3 = traj.walks2D(scale=(5.0, 5.0), steps=100, N=1000, startrange=(-200,200))
M3, ft3, mft3, sft3 = calculateFeatures(x3, y3, 0.01, labelled=3, binned=bins)

x4, y4 = traj.walks2D(steps=100, N=1000, drift=(2.0, 2.0), theta=30, startrange=(-200,200))
M4, ft4, mft4, sft4 = calculateFeatures(x4, y4, 0.01, labelled=4, binned=bins)

x5, y5 = traj.walks2D(steps=100, N=1000, startrange=(-200,200), circles=(0.4, 8))
M5, ft5, mft5, sft5 = calculateFeatures(x5, y5, 0.01, labelled=5, binned=bins)

x6, y6 = traj.walks2D(steps=100, N=1000, startrange=(-200,200), circles=(0.7, 8))
M6, ft6, mft6, sft6 = calculateFeatures(x6, y6, 0.01, labelled=6, binned=bins)

In [56]:
mfts = pd.concat((mft1, mft2, mft3, mft4, mft5, mft6), ignore_index=True)
sfts = pd.concat((sft1, sft2, sft3, sft4, sft5, sft6), ignore_index=True)

In [57]:
y = mfts['Label'].to_numpy()

In [58]:
mfts.drop(labels=['x', 'y', 'Label'], axis=1, inplace=True)
sfts.drop(labels=['x', 'y', 'Label'], axis=1, inplace=True)

In [59]:
fts = pd.concat((mfts, sfts), axis=1, keys=['Mean', 'Std'])
X = fts.to_numpy()

In [60]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

In [61]:
names = [
    "Nearest Neighbors",
    "Linear SVM",
    "RBF SVM",
    #"Gaussian Process",
    "Decision Tree",
    "Random Forest",
    "Neural Net",
    "AdaBoost",
    "Naive Bayes",
    "QDA",
]

classifiers = [
    KNeighborsClassifier(3),
    SVC(kernel="linear", C=0.025),
    SVC(gamma=2, C=1),
    #GaussianProcessClassifier(1.0 * RBF(1.0)),
    DecisionTreeClassifier(max_depth=5),
    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
    MLPClassifier(alpha=1, max_iter=1000),
    AdaBoostClassifier(),
    GaussianNB(),
    QuadraticDiscriminantAnalysis(),
]

In [62]:
X_train, X_test, y_train, y_test = train_test_split(fts, y, test_size=0.4, random_state=42)

In [63]:
for name, clf in zip(names, classifiers):
    pipe = Pipeline([('imputer', SimpleImputer(missing_values=np.nan, strategy='mean')), ('scaler', StandardScaler()),# ('pca', PCA(n_components=5)),
                 (name, clf)])
    pipe.fit(X_train, y_train)
    score = pipe.score(X_test, y_test)
    print('{}: {}'.format(name, score))

Nearest Neighbors: 0.9026915113871635
Linear SVM: 0.9627329192546584
RBF SVM: 0.494824016563147
Decision Tree: 0.9979296066252588
Random Forest: 0.9813664596273292
Neural Net: 0.9875776397515528
AdaBoost: 0.4865424430641822
Naive Bayes: 0.9875776397515528
QDA: 0.9958592132505176




In [64]:
fts1 = pd.concat((ft1, ft2, ft3, ft4, ft5, ft6))

In [65]:
X, y = fts1.to_numpy()[:, 2:-2], fts1['Label'].to_numpy()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

In [66]:
for name, clf in zip(names, classifiers):
    pipe = Pipeline([('imputer', SimpleImputer(missing_values=np.nan, strategy='mean')), ('scaler', StandardScaler()),# ('pca', PCA(n_components=5)),
                 (name, clf)])
    pipe.fit(X_train, y_train)
    score = pipe.score(X_test, y_test)
    print('{}: {}'.format(name, score))

Nearest Neighbors: 0.8383333333333334
Linear SVM: 0.8733333333333333
RBF SVM: 0.8579166666666667
Decision Tree: 0.9895833333333334
Random Forest: 0.89625
Neural Net: 0.9766666666666667
AdaBoost: 0.4845833333333333
Naive Bayes: 0.9854166666666667
QDA: 0.9979166666666667


