## Moving average filter classifiers results


In [74]:
import pandas as pd
import numpy as np
from sklearn import model_selection
from sklearn.model_selection import TimeSeriesSplit
from sklearn.model_selection import cross_validate
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.feature_selection import SelectFromModel
from sklearn.metrics import classification_report
from sklearn import preprocessing
from sklearn.metrics import confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn import preprocessing

In [75]:
dataset = pd.read_csv('../Data/imputedWQ.csv', parse_dates=True, header=0, index_col=0)

In [76]:
testset = pd.read_csv('../Data/testing2018-mean_imputed.csv', parse_dates=True, header=0, index_col=0)

In [77]:
X = dataset.drop('EVENT', axis=1)  
y = dataset['EVENT']  

## Classifiers

In [78]:
X_test = testset.drop('EVENT', axis=1)  
y_test = testset['EVENT']  

### Random Forest

In [100]:
from sklearn.ensemble import RandomForestClassifier

rf_clf = RandomForestClassifier(max_depth=4, n_estimators = 300, random_state=0, class_weight="balanced")

rf_clf.fit(X,y)

RandomForestClassifier(bootstrap=True, class_weight='balanced',
            criterion='gini', max_depth=4, max_features='auto',
            max_leaf_nodes=None, min_impurity_decrease=0.0,
            min_impurity_split=None, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=300, n_jobs=1, oob_score=False, random_state=0,
            verbose=0, warm_start=False)

In [101]:
predicted = rf_clf.predict(X_test)

In [102]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test,predicted)

array([[105806,   9831],
       [  1353,    976]])

In [103]:
print(f1_score(y_test,predicted))
print(recall_score(y_test,predicted))
print(precision_score(y_test,predicted))

0.14859926918392208
0.41906397595534567
0.09031183492180994


### Ada boost

In [104]:
seed=7

ada_clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=4), algorithm="SAMME", n_estimators=250, random_state=seed)

In [105]:
ada_clf.fit(X,y)

AdaBoostClassifier(algorithm='SAMME',
          base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best'),
          learning_rate=1.0, n_estimators=250, random_state=7)

In [106]:
ada_pred = ada_clf.predict(X_test)

In [107]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test,ada_pred)

array([[115497,    140],
       [  1504,    825]])

In [108]:
print(f1_score(y_test,ada_pred))
print(recall_score(y_test,ada_pred))
print(precision_score(y_test,ada_pred))

0.5009107468123862
0.35422928295405753
0.8549222797927462


### SVM

In [213]:
scaler = preprocessing.StandardScaler()
X_train_standard = scaler.fit_transform(X)

svm_clf = SVC(C=10, kernel='rbf', max_iter=3000, tol=0.01, class_weight='balanced' )#, class_weight={1: 10}) 

In [214]:
X_test_standard = scaler.transform(X_test) 

In [215]:
svm_clf.fit(X_train_standard, y)



SVC(C=10, cache_size=200, class_weight='balanced', coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=3000, probability=False, random_state=None, shrinking=True,
  tol=0.01, verbose=False)

In [216]:
svm_pred = svm_clf.predict(X_test_standard) #here

In [217]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test,svm_pred)

array([[114304,   1333],
       [  2135,    194]])

In [218]:
print(f1_score(y_test,svm_pred))
print(recall_score(y_test,svm_pred))
print(precision_score(y_test,svm_pred))

0.10062240663900417
0.08329755259768142
0.12704649639816634


## GradientBoosting

In [110]:
from sklearn.ensemble import GradientBoostingClassifier

In [127]:
n_est = 100
clf = GradientBoostingClassifier(n_estimators=n_est, max_depth=3, random_state=0)

In [128]:
clf.fit(X,y)

GradientBoostingClassifier(criterion='friedman_mse', init=None,
              learning_rate=0.1, loss='deviance', max_depth=3,
              max_features=None, max_leaf_nodes=None,
              min_impurity_decrease=0.0, min_impurity_split=None,
              min_samples_leaf=1, min_samples_split=2,
              min_weight_fraction_leaf=0.0, n_estimators=100,
              presort='auto', random_state=0, subsample=1.0, verbose=0,
              warm_start=False)

In [129]:
predicted = clf.predict(X_test)

In [130]:
print(f1_score(y_test,predicted))
print(recall_score(y_test,predicted))
print(precision_score(y_test,predicted))

0.06571040453111689
0.6127093173035638
0.034716815881666016
