## Moving average filter classifiers results


In [1]:
import pandas as pd
import numpy as np
from sklearn import model_selection
from sklearn.model_selection import TimeSeriesSplit
from sklearn.model_selection import cross_validate
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.feature_selection import SelectFromModel
from sklearn.metrics import classification_report
from sklearn import preprocessing
from sklearn.metrics import confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn import preprocessing

In [2]:
dataset = pd.read_csv('../Data/imputedWQ.csv', parse_dates=True, header=0, index_col=0)

In [3]:
testset = pd.read_csv('../Data/testing2018-mean_imputed.csv', parse_dates=True, header=0, index_col=0)

In [4]:
X = dataset.drop('EVENT', axis=1)  
y = dataset['EVENT']  

## Classifiers

In [5]:
X_test = testset.drop('EVENT', axis=1)  
y_test = testset['EVENT']  

### Random Forest

In [6]:
from sklearn.ensemble import RandomForestClassifier

rf_clf = RandomForestClassifier(max_depth=4, n_estimators= 300, random_state=0, class_weight="balanced")

rf_clf.fit(X,y)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=4, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=300, n_jobs=1,
            oob_score=False, random_state=0, verbose=0, warm_start=False)

In [7]:
predicted = rf_clf.predict(X_test)

In [8]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test,predicted)

array([[115411,    226],
       [  1566,    763]])

In [9]:
print(f1_score(y_test,predicted))
print(recall_score(y_test,predicted))
print(precision_score(y_test,predicted))

0.4599156118143459
0.3276084156290253
0.7714863498483316


### Ada boost

In [63]:
seed=7

ada_clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=4), algorithm="SAMME", n_estimators=203, random_state=seed)

In [64]:
ada_clf.fit(X,y)

AdaBoostClassifier(algorithm='SAMME',
          base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best'),
          learning_rate=1.0, n_estimators=203, random_state=7)

In [65]:
ada_pred = ada_clf.predict(X_test)

In [66]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test,ada_pred)

array([[115514,    123],
       [  1505,    824]])

In [67]:
print(f1_score(y_test,ada_pred))
print(recall_score(y_test,ada_pred))
print(precision_score(y_test,ada_pred))

0.503052503052503
0.35379991412623446
0.870116156282999


### SVM

In [68]:
scaler = preprocessing.StandardScaler()
X_train_standard = scaler.fit_transform(X)

svm_clf = SVC(C=1000, gamma=0.01 ,kernel='linear')#, class_weight={1: 10}) 

In [69]:
X_test_standard = scaler.transform(X_test) 

In [70]:
svm_clf.fit(X_train_standard, y)

SVC(C=1000, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.01, kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [71]:
svm_pred = svm_clf.predict(X_test_standard) #here

In [72]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test,svm_pred)

array([[115637,      0],
       [  1863,    466]])

In [73]:
print(f1_score(y_test,svm_pred))
print(recall_score(y_test,svm_pred))
print(precision_score(y_test,svm_pred))

0.33345259391771015
0.2000858737655646
1.0
