In [32]:
import numpy as np
import pandas as pd
import sklearn as sk
import convertDictionaries as cd

#import classifiers
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GroupShuffleSplit
from sklearn.svm import SVC

#import metrics
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score, balanced_accuracy_score, roc_auc_score, f1_score, recall_score, precision_score

In [33]:
data = pd.read_csv("./finalfinalDataset.csv")

In [34]:
y = pd.read_csv('./output_tidy.csv').drop('Unnamed: 0', axis=1)

In [35]:
data.columns

Index(['Unnamed: 0', '0', '1', '2', '3', '4', '5', '6', '7', '8',
       ...
       'wnioskowania', 'wolicjonalny', 'wolitywny', 'zdarzeniowy', 'o1+',
       'o1-', 'o1o', 'o2+', 'o2-', 'o2o'],
      dtype='object', length=325)

Create test and train indices

In [36]:
train_inds, test_inds = next(GroupShuffleSplit(test_size=.20, n_splits=2).split(data, groups=data['verb']))

In [37]:
data = data.drop(["index", 'Unnamed: 0', 'verb'], axis=1)

In [38]:
y_onehot = data.iloc[:, -6:]

In [39]:
X = data.iloc[:, :-6]

In [40]:
X_train = X.iloc[train_inds] 
X_test = X.iloc[test_inds] 
y_train = y_onehot.iloc[train_inds]  
y_test = y_onehot.iloc[test_inds] 

In [41]:
estimators = [
    KNeighborsClassifier(),
    DecisionTreeClassifier(),
    RandomForestClassifier(), 
    MLPClassifier(),
    AdaBoostClassifier(),
    GaussianProcessClassifier(),
    GradientBoostingClassifier(),
    SVC(kernel="linear"),
    SVC(kernel="rbf")
]

In [42]:
cross_val_score(estimators[0], X, y_onehot, cv=5)

array([0.64347826, 0.80701754, 0.73684211, 0.73684211, 0.71929825])

In [43]:
model = estimators[0].fit(X_train, y_train)

In [44]:
model.score(X_test, y_test)

0.8440366972477065

In [30]:
prediction = model.predict(X_test)

In [15]:
classNames = data.columns[-6:].to_numpy()
precision= precision_score(y_test.to_numpy(),prediction, average=None)
recall=recall_score(y_test.to_numpy(),prediction, average=None)
print("class names: {0}".format(classNames))
print("precision: {0}".format(precision))
print("recall: {0}".format(recall))

class names: ['o1+' 'o1-' 'o1o' 'o2+' 'o2-' 'o2o']
precision: [0.85294118 0.         0.70588235 0.84615385 0.         0.67391304]
recall: [0.53703704 0.         0.92307692 0.42307692 0.         0.88571429]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [115]:
prediction[:,:3].sum()

58

In [116]:
prediction[:,3:].sum()

58

In [117]:
y_test.to_numpy()[:,3:].sum()

58

In [118]:
y_test.to_numpy()[:,:3].sum()

58

In [113]:
y_test.to_numpy()[:,:3] - y_test.to_numpy()[:,3:]

array([[ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  1, -1],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 1,  0, -1],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [-1,  0,  1],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  

In [114]:
prediction[:,3:] - prediction[:,:3] 

array([[ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 1,  0, -1],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  