In [1]:
import pandas as pd
import numpy as np

# Load data

In [32]:
TASK_TYPES = ["indifference", "range"]
BASE_DIR = "narpsdata/agh"

def load_csv(filename):
    df = pd.read_csv(filename, delimiter=" ", header=None)
    df = df.transpose() # row = subject
    df = df.drop([0, 1, 2], axis='index') # drop rows with x,y,z
    df = df.reset_index(drop=True)
    return df

def train_test_split(data, test_size=0.2):
    msk = np.random.rand(len(data)) < (1 - test_size)
    train = data[msk]
    test = data[~msk]

    return train.drop("label", axis='columns').to_numpy(), train["label"].to_numpy(), test.drop("label", axis='columns').to_numpy(), test["label"].to_numpy()

def load_task_data(task, test_size=0.2):
    if task not in TASK_TYPES:
        raise Exception(f"Available tasks: {TASK_TYPES}")

    gain = load_csv(f"{BASE_DIR}/gain_{task}.csv")
    gain["label"] = 1
    loss = load_csv(f"{BASE_DIR}/loss_{task}.csv")
    loss["label"] = 0
    df = pd.concat([gain, loss])
    return train_test_split(df.reset_index(drop=True), test_size=test_size)

# Prepare model

In [33]:
import pandas as pd
import numpy as np
BASE_DIR = "narpsdata/agh"
common_cols = [0, 1, 2]

def load_data(filename):
    df = pd.read_csv(filename, delimiter=" ", header=None)
    return df

def merge_and_fillna(reference_df, target_df):
    merged_df = pd.merge(reference_df, target_df, on=common_cols, how='left')
    merged_df = merged_df.fillna(0)
    return merged_df

def clean_data(gain_range, loss_range, gain_indifference, loss_indifference):
    ind_xyz = set(zip(gain_indifference[0].tolist(), gain_indifference[1].tolist(), gain_indifference[2].tolist()))
    rng_xyz = set(zip(gain_range[0].tolist(), gain_range[1].tolist(), gain_range[2].tolist()))
    xyz = ind_xyz | rng_xyz
    all_coords = pd.DataFrame(list(xyz), columns=common_cols)
    
    new_gain_range = merge_and_fillna(all_coords, gain_range)
    new_loss_range = merge_and_fillna(all_coords, loss_range)
    new_gain_indifference = merge_and_fillna(all_coords, gain_indifference)
    new_loss_indifference = merge_and_fillna(all_coords, loss_indifference)
    return new_gain_range, new_loss_range, new_gain_indifference, new_loss_indifference

def reset_data_frame(df):
    df = df.transpose() # row = subject
    df = df.drop(common_cols, axis='index') # drop rows with x,y,z
    df = df.reset_index(drop=True)
    return df

def train_test_split_merged(data, test_size=0.2):
    msk = np.random.rand(len(data)) < (1 - test_size)
    train = data[msk]
    test = data[~msk]

    return train.drop("gain_or_loss", axis='columns').to_numpy(), train["gain_or_loss"].to_numpy(), test.drop("gain_or_loss", axis='columns').to_numpy(), test["gain_or_loss"].to_numpy()

def load_task_data_merged(test_size=0.2):
    gain_range = load_data(f"{BASE_DIR}/gain_range.csv")
    loss_range = load_data(f"{BASE_DIR}/loss_range.csv")
    gain_indifference = load_data(f"{BASE_DIR}/gain_indifference.csv")
    loss_indifference = load_data(f"{BASE_DIR}/loss_indifference.csv")
    
    gain_range, loss_range, gain_indifference, loss_indifference = clean_data(gain_range, loss_range, gain_indifference, loss_indifference)
    gain_range = reset_data_frame(gain_range)
    loss_range = reset_data_frame(loss_range)
    gain_indifference = reset_data_frame(gain_indifference)
    loss_indifference = reset_data_frame(loss_indifference)

    gain_range["gain_or_loss"] = 1
    loss_range["gain_or_loss"] = 0
    gain_indifference["gain_or_loss"] = 1
    loss_indifference["gain_or_loss"] = 0
    
    df = pd.concat([gain_range, loss_range, gain_indifference, loss_indifference])
    split_data =  train_test_split_merged(df.reset_index(drop=True), test_size=test_size)
    return split_data


In [4]:
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC

In [34]:
X_train, y_train, X_test, y_test = load_task_data_merged(0.2)

In [35]:
X_train.shape

(152, 465551)

## Linear

In [36]:
clf = SVC(kernel='linear', C=1.0, random_state=42)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
print("Number of support vectors: ", len(clf.support_vectors_))

Accuracy: 0.9
Number of support vectors:  151


In [37]:
clf = SVC(kernel='linear', C=0.001, random_state=42)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
print("Number of support vectors: ", len(clf.support_vectors_))

Accuracy: 0.9
Number of support vectors:  151


In [38]:
clf = SVC(kernel='linear', C=100, random_state=42)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
print("Number of support vectors: ", len(clf.support_vectors_))

Accuracy: 0.9
Number of support vectors:  151


## RBF

In [39]:
clf = SVC(kernel='rbf', C=1.0, random_state=42)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
print("Number of support vectors: ", len(clf.support_vectors_))

Accuracy: 0.76
Number of support vectors:  152


In [40]:
clf = SVC(kernel='rbf', C=0.001, random_state=42)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
print("Number of support vectors: ", len(clf.support_vectors_))

Accuracy: 0.46
Number of support vectors:  148


In [41]:
clf = SVC(kernel='rbf', C=100, random_state=42)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
print("Number of support vectors: ", len(clf.support_vectors_))

Accuracy: 0.74
Number of support vectors:  152


## PCA

In [42]:
from sklearn.decomposition import PCA

pca = PCA()
pca.fit(X_train)

pca.explained_variance_ratio_

array([3.36170229e-02, 1.86061246e-02, 1.39463851e-02, 1.04874338e-02,
       1.00899073e-02, 9.90239129e-03, 9.69366337e-03, 9.42622259e-03,
       9.25913786e-03, 9.18191340e-03, 9.08612320e-03, 8.85415569e-03,
       8.76130067e-03, 8.50121470e-03, 8.48193135e-03, 8.27263244e-03,
       8.24196429e-03, 8.16955807e-03, 8.05972293e-03, 7.93511924e-03,
       7.91195464e-03, 7.86153315e-03, 7.82523918e-03, 7.74311068e-03,
       7.67076625e-03, 7.58921506e-03, 7.53722855e-03, 7.48465512e-03,
       7.42629438e-03, 7.40149342e-03, 7.33146440e-03, 7.24550814e-03,
       7.23450410e-03, 7.22403273e-03, 7.13217471e-03, 7.07072830e-03,
       7.04683991e-03, 7.02166597e-03, 6.99615871e-03, 6.95404475e-03,
       6.91853627e-03, 6.88451964e-03, 6.85530184e-03, 6.84609900e-03,
       6.78130707e-03, 6.73602934e-03, 6.68064122e-03, 6.66611690e-03,
       6.61543144e-03, 6.57749764e-03, 6.57647221e-03, 6.53896752e-03,
       6.51586090e-03, 6.49695540e-03, 6.45387533e-03, 6.42847835e-03,
      

## ICA

In [21]:
from sklearn.decomposition import FastICA

ica = FastICA()
ica.fit(X_train)



## Random Forest

In [52]:
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(n_estimators=100, max_depth=3, random_state=42)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

Accuracy: 0.64


## Neural Network

In [53]:
from sklearn.neural_network import MLPClassifier

clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(50, 20), random_state=42, verbose=True)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

 This problem is unconstrained.


RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =     23278641     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  7.29167D-01    |proj g|=  7.19568D-02

At iterate    1    f=  2.07987D-01    |proj g|=  1.92164D-01

At iterate    2    f=  2.01430D-02    |proj g|=  1.03691D-01

At iterate    3    f=  1.09389D-02    |proj g|=  5.33114D-02

At iterate    4    f=  6.38001D-03    |proj g|=  3.23691D-02

At iterate    5    f=  2.71995D-03    |proj g|=  1.50777D-02

At iterate    6    f=  1.02483D-03    |proj g|=  5.91290D-03

At iterate    7    f=  5.41048D-04    |proj g|=  3.18645D-03

At iterate    8    f=  2.18303D-04    |proj g|=  1.29439D-03

At iterate    9    f=  1.05503D-04    |proj g|=  6.35375D-04

At iterate   10    f=  5.08626D-05    |proj g|=  3.02062D-04

At iterate   11    f=  2.54628D-05    |proj g|=  1.40260D-04

At iterate   12    f=  1.40264D-05    |proj g|=  6.68200D-05

           * * *

Tit   = 