## Moving average filter classifiers results


In [1]:
import pandas as pd
import numpy as np
from sklearn import model_selection
from sklearn.model_selection import TimeSeriesSplit
from sklearn.model_selection import cross_validate
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.feature_selection import SelectFromModel
from sklearn.metrics import classification_report
from sklearn import preprocessing
from sklearn.metrics import confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn import preprocessing

In [2]:
dataset = pd.read_csv('../Data/imputedWQ.csv', parse_dates=True, header=0, index_col=0)

In [3]:
testset = pd.read_csv('../Data/testing2018-mean_imputed.csv', parse_dates=True, header=0, index_col=0)

In [4]:
cols = list(dataset.columns.values)
cols = cols[:-1]
dataset[cols] = dataset[cols].rolling(window=10).mean()
dataset = dataset.iloc[10:]

In [5]:
X = dataset.drop('EVENT', axis=1)  
y = dataset['EVENT']  

## Classifiers

In [6]:
X_test = testset.drop('EVENT', axis=1)  
y_test = testset['EVENT']  

In [7]:
#moving average on test set
cols = list(testset.columns.values)
cols = cols[:-1]
testset[cols] = testset[cols].rolling(window=10).mean()
testset = testset.iloc[10:]

### Random Forest

In [8]:
from sklearn.ensemble import RandomForestClassifier

rf_clf = RandomForestClassifier(max_depth=4, n_estimators= 300, random_state=0)

rf_clf.fit(X,y)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=4, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=300, n_jobs=1,
            oob_score=False, random_state=0, verbose=0, warm_start=False)

In [9]:
predicted = rf_clf.predict(X_test)

In [10]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test,predicted)

array([[115637,      0],
       [  1652,    677]])

In [11]:
print(f1_score(y_test,predicted))
print(recall_score(y_test,predicted))
print(precision_score(y_test,predicted))

0.4504324683965402
0.29068269643623873
1.0


### Ada boost

In [12]:
seed=7

ada_clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=4), algorithm="SAMME", n_estimators=50, random_state=seed)

In [13]:
ada_clf.fit(X,y)

AdaBoostClassifier(algorithm='SAMME',
          base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best'),
          learning_rate=1.0, n_estimators=50, random_state=7)

In [14]:
ada_pred = ada_clf.predict(X_test)

In [15]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test,ada_pred)

array([[114776,    861],
       [  1707,    622]])

In [16]:
print(f1_score(y_test,ada_pred))
print(recall_score(y_test,ada_pred))
print(precision_score(y_test,ada_pred))

0.32633788037775446
0.26706741090596825
0.4194200944032367


### SVM

In [17]:
scaler = preprocessing.StandardScaler()
X_train_standard = scaler.fit_transform(X)

svm_clf = SVC(C=1000, gamma=0.01 ) 

In [18]:
X_test_standard = scaler.transform(X_test) 

In [19]:
svm_clf.fit(X_train_standard, y)

SVC(C=1000, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.01, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [20]:
svm_pred = svm_clf.predict(X_test_standard)

In [21]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test,svm_pred)

array([[108994,   6643],
       [  1628,    701]])

In [22]:
print(f1_score(y_test,svm_pred))
print(recall_score(y_test,svm_pred))
print(precision_score(y_test,svm_pred))

0.14493952238188773
0.30098754830399316
0.0954520697167756
