In [242]:
import torch
import numpy as np
from pathlib import Path
import logging
from torch.utils.data import DataLoader
from dataset import ViTacDataset
from torch import nn
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

### Prepare data

In [28]:
class FLAGS():
    def __init__(self):
        self.data_dir = '/home/tasbolat/some_python_examples/data_VT_SNN/'
        self.batch_size = 8
        self.sample_file = 1
        self.lr = 0.01
        self.epochs = 100
args = FLAGS()

In [29]:
train_dataset = ViTacDataset(
    path=args.data_dir, sample_file=f"train_80_20_{args.sample_file}.txt"
)
train_loader = DataLoader(
    dataset=train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=4
)
test_dataset = ViTacDataset(
    path=args.data_dir, sample_file=f"test_80_20_{args.sample_file}.txt"
)
test_loader = DataLoader(
    dataset=test_dataset, batch_size=args.batch_size, shuffle=True, num_workers=4
)

In [30]:
big_train_dataset = []#torch.zeros(240, 156, 325)

labels = []#torch.zeros(240)

for i, (in_tact, _, _, label) in enumerate(train_loader, 0):

    big_train_dataset.append(in_tact.squeeze())

    labels.append(label)

    

big_test_dataset = []#torch.zeros(240, 156, 325)

labels_test = []#torch.zeros(240)

for i, (in_tact, _, _, label) in enumerate(test_loader, 0):

    big_test_dataset.append(in_tact.squeeze())

    labels_test.append(label)
    
big_train_dataset = torch.cat(big_train_dataset,0)

big_test_dataset = torch.cat(big_test_dataset,0)

big_train_dataset.shape, big_test_dataset.shape

(torch.Size([240, 156, 325]), torch.Size([60, 156, 325]))

In [406]:
### Get length

In [442]:
lengths_ = np.linspace(10,320,32, dtype=int)
lengths = lengths_.tolist()
lengths.append(325)
len(lengths)

33

In [444]:

length = lengths_[-1]

X = torch.sum(big_train_dataset[...,:length], dim=2).cpu().numpy()
X_test = torch.sum(big_test_dataset[...,:length], dim=2).cpu().numpy()
y = torch.cat(labels).cpu().numpy()
y_test = torch.cat(labels_test).cpu().numpy()

In [245]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.08, stratify=y, random_state=42)

In [246]:
X_train.shape, X_val.shape, X_test.shape

((220, 156), (20, 156), (60, 156))

In [None]:
svc = SVC(tol=0.00001, max_iter=5000)

In [271]:
svc = SVC(C=0.00004,random_state=21, kernel='linear', max_iter=3000, tol=0.0001) # 0.00004
svc.fit(X_train, y_train)

SVC(C=4e-05, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='linear', max_iter=3000, probability=False, random_state=21,
  shrinking=True, tol=0.0001, verbose=False)

In [272]:
y_train_pred = svc.predict(X_train)
y_val_pred = svc.predict(X_val)
y_test_pred = svc.predict(X_test)

In [273]:
accuracy_score(y_train, y_train_pred), accuracy_score(y_val, y_val_pred), accuracy_score(y_test, y_test_pred)

(0.9681818181818181, 0.85, 0.8333333333333334)

In [286]:
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold

In [310]:
X.shape

(240, 156)

In [338]:
skf = StratifiedKFold(n_splits=12)

In [347]:
param_grid = {'C':np.linspace(0.000001, 3, 10000),}

In [348]:
svc = SVC(tol=0.00001, max_iter=3000, kernel='linear')
search = GridSearchCV(svc, param_grid, cv=skf,n_jobs=-1, scoring='accuracy')

In [349]:
search.fit(X, y)

GridSearchCV(cv=StratifiedKFold(n_splits=12, random_state=None, shuffle=False),
       error_score='raise-deprecating',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='linear', max_iter=3000, probability=False, random_state=None,
  shrinking=True, tol=1e-05, verbose=False),
       fit_params=None, iid='warn', n_jobs=-1,
       param_grid={'C': array([1.0000e-06, 3.0103e-04, ..., 2.9997e+00, 3.0000e+00])},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='accuracy', verbose=0)

In [350]:
search.cv

StratifiedKFold(n_splits=12, random_state=None, shuffle=False)

In [364]:
search.bgirs

{'C': 0.00030102990299029904}

In [373]:
search.best_index_

1

In [446]:
search.best_estimator_.C

0.00030102990299029904

In [376]:
search.cv_results_['mean_test_score'][search.best_index_]
search.cv_results_['std_test_score'][search.best_index_]

0.08281086214313246

In [235]:
#np.linspace(0.000001, 3, 100)

In [240]:
max_val_accuracy = 0
current_C = 0
for C in np.linspace(0.000001, 3, 10000):
    svc = SVC(C=C,random_state=21, kernel='linear', max_iter=3000, tol=0.0001) # 0.00004
    svc.fit(X_train, y_train)
    y_val_pred = svc.predict(X_val)
    
    val_accuracy = accuracy_score(y_val, y_val_pred)
    if val_accuracy > max_val_accuracy:
        current_C = C
        max_val_accuracy = val_accuracy

In [241]:
max_val_accuracy, current_C

(0.75, 0.00030102990299029904)

In [None]:
y = torch.cat(labels).cpu().numpy()
y_test = torch.cat(labels_test).cpu().numpy()

# prepare parameters
skf = StratifiedKFold(n_splits=12)
param_grid = {'C':np.linspace(0.000001, 3, 10),} #10000

# collect statistics
cv_std = []
cv_mean = []
test_accs = []
C_params = []

for length in lengths:
    
    print(length)
    # get data
    X = torch.sum(big_train_dataset[...,:length], dim=2).cpu().numpy()
    X_test = torch.sum(big_test_dataset[...,:length], dim=2).cpu().numpy()
    
    # define clf
    svc = SVC(tol=0.00001, max_iter=5000, kernel='linear')
    search = GridSearchCV(svc, param_grid, cv=skf,n_jobs=-1, scoring='accuracy')
    search.fit(X, y)
    
    # collect statistics
    cv_std.append( search.cv_results_['std_test_score'][search.best_index_] )
    cv_mean.append(search.cv_results_['mean_test_score'][search.best_index_])
    C_params.append(search.best_estimator_.C)
    
    # get test result based on the best estimator
    y_test_pred = search.best_estimator_.predict(X_test)
    test_accs.append(accuracy_score(y_test, y_test_pred))