# Evaluate *tsfresh* selected features with Random Forest classifier

### Cross validation

In [1]:
import pandas as pd
import numpy as np

from sklearn.ensemble import RandomForestClassifier

def create_rf(n_trees):
    return RandomForestClassifier(n_estimators=n_trees)

In [13]:
ids = pd.read_csv('csv/ids.csv')
ids.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 55 entries, 0 to 54
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   number  55 non-null     object
 1   ill     55 non-null     int64 
dtypes: int64(1), object(1)
memory usage: 1008.0+ bytes


In [14]:
names = ['all', 'night', 'day']
selected = {}
hourly_selected = {}

for n in names:
    selected[n] = pd.read_csv(f'csv/selected_{n}.csv')
    hourly_selected[n] = pd.read_csv(f'csv/selected_hourly_{n}.csv')

In [15]:
from sklearn.model_selection import cross_val_score

FOLDS = [5, 10]
TREES = [100, 500, 1000]
y = ids['ill']

def cross_validate(name: str, data: pd.DataFrame):
    for t in TREES:
        for f in FOLDS:
            rf = create_rf(t)
            print(f'[{name}] {f}-fold cross validation with Random Forest Classifier ({t} trees):')
            scores = cross_val_score(rf, data, y, cv=f)
            print(f'Results:\n\tMin: {min(scores)}\n\tMax: {max(scores)}\n\tMean: {np.mean(scores)}\n\tStd: {np.std(scores)}')

In [16]:
for name, data in selected.items():
    cross_validate(name, data)

[all] 5-fold cross validation with Random Forest Classifier (100 trees):
Results:
	Min: 0.7272727272727273
	Max: 0.9090909090909091
	Mean: 0.8181818181818181
	Std: 0.05749595745760688
[all] 10-fold cross validation with Random Forest Classifier (100 trees):
Results:
	Min: 0.6666666666666666
	Max: 1.0
	Mean: 0.8733333333333334
	Std: 0.11234866364235145
[all] 5-fold cross validation with Random Forest Classifier (500 trees):
Results:
	Min: 0.7272727272727273
	Max: 0.9090909090909091
	Mean: 0.8545454545454545
	Std: 0.0727272727272727
[all] 10-fold cross validation with Random Forest Classifier (500 trees):
Results:
	Min: 0.6
	Max: 1.0
	Mean: 0.8533333333333333
	Std: 0.13840359661351131
[all] 5-fold cross validation with Random Forest Classifier (1000 trees):
Results:
	Min: 0.7272727272727273
	Max: 0.9090909090909091
	Mean: 0.8363636363636363
	Std: 0.06803013430498073
[all] 10-fold cross validation with Random Forest Classifier (1000 trees):
Results:
	Min: 0.6
	Max: 1.0
	Mean: 0.8533333333

In [24]:
for name, data in hourly_selected.items():
    cross_validate(name, data)

[all] 5-fold cross validation with Random Forest Classifier (100 trees):
Results:
	Min: 0.5454545454545454
	Max: 0.9090909090909091
	Mean: 0.7454545454545455
	Std: 0.12060453783110546
[all] 10-fold cross validation with Random Forest Classifier (100 trees):
Results:
	Min: 0.5
	Max: 1.0
	Mean: 0.78
	Std: 0.15790292376436016
[all] 5-fold cross validation with Random Forest Classifier (500 trees):
Results:
	Min: 0.6363636363636364
	Max: 0.9090909090909091
	Mean: 0.7636363636363637
	Std: 0.09270944570168699
[all] 10-fold cross validation with Random Forest Classifier (500 trees):
Results:
	Min: 0.4
	Max: 1.0
	Mean: 0.8
	Std: 0.19321835661585918
[all] 5-fold cross validation with Random Forest Classifier (1000 trees):
Results:
	Min: 0.6363636363636364
	Max: 0.9090909090909091
	Mean: 0.7636363636363637
	Std: 0.09270944570168699
[all] 10-fold cross validation with Random Forest Classifier (1000 trees):
Results:
	Min: 0.4
	Max: 1.0
	Mean: 0.78
	Std: 0.18147543451754933
[night] 5-fold cross val

### Test/train split validation

In [18]:
from sklearn.model_selection import train_test_split

TEST_RATIO = .2

def validate(name: str, data: pd.DataFrame):
    for t in TREES:
        X_train, X_test, y_train, y_test = train_test_split(data, y, test_size=TEST_RATIO)
        rf = create_rf(t)
        rf.fit(X_train, y_train)
        score = rf.score(X_test, y_test)
        print(f'[{name}] validation with Random Forest Classifier ({t} trees):')
        print(f'\tTrain size: {X_train.shape}\n\tTest size: {X_test.shape}')
        print(f'\tAccuracy score: {score}')

In [22]:
for name, data in selected.items():
    validate(name, data)

[all] validation with Random Forest Classifier (100 trees):
	Train size: (44, 14)
	Test size: (11, 14)
	Accuracy score: 0.9090909090909091
[all] validation with Random Forest Classifier (500 trees):
	Train size: (44, 14)
	Test size: (11, 14)
	Accuracy score: 0.7272727272727273
[all] validation with Random Forest Classifier (1000 trees):
	Train size: (44, 14)
	Test size: (11, 14)
	Accuracy score: 0.9090909090909091
[night] validation with Random Forest Classifier (100 trees):
	Train size: (44, 47)
	Test size: (11, 47)
	Accuracy score: 0.8181818181818182
[night] validation with Random Forest Classifier (500 trees):
	Train size: (44, 47)
	Test size: (11, 47)
	Accuracy score: 0.6363636363636364
[night] validation with Random Forest Classifier (1000 trees):
	Train size: (44, 47)
	Test size: (11, 47)
	Accuracy score: 0.8181818181818182
[day] validation with Random Forest Classifier (100 trees):
	Train size: (44, 2)
	Test size: (11, 2)
	Accuracy score: 0.9090909090909091
[day] validation with

In [23]:
for name, data in hourly_selected.items():
    validate(name, data)

[all] validation with Random Forest Classifier (100 trees):
	Train size: (44, 24)
	Test size: (11, 24)
	Accuracy score: 0.7272727272727273
[all] validation with Random Forest Classifier (500 trees):
	Train size: (44, 24)
	Test size: (11, 24)
	Accuracy score: 0.8181818181818182
[all] validation with Random Forest Classifier (1000 trees):
	Train size: (44, 24)
	Test size: (11, 24)
	Accuracy score: 0.9090909090909091
[night] validation with Random Forest Classifier (100 trees):
	Train size: (44, 61)
	Test size: (11, 61)
	Accuracy score: 0.7272727272727273
[night] validation with Random Forest Classifier (500 trees):
	Train size: (44, 61)
	Test size: (11, 61)
	Accuracy score: 0.6363636363636364
[night] validation with Random Forest Classifier (1000 trees):
	Train size: (44, 61)
	Test size: (11, 61)
	Accuracy score: 0.7272727272727273
[day] validation with Random Forest Classifier (100 trees):
	Train size: (44, 8)
	Test size: (11, 8)
	Accuracy score: 0.9090909090909091
[day] validation with

### ExtraTreesClassifier

In [27]:
from sklearn.ensemble import ExtraTreesClassifier

def create_et(n_trees):
    return ExtraTreesClassifier(n_estimators=n_trees)

def cross_validate(name: str, data: pd.DataFrame):
    for t in TREES:
        for f in FOLDS:
            clf = create_et(t)
            print(f'[{name}] {f}-fold cross validation with Extra Trees Classifier ({t} trees):')
            scores = cross_val_score(clf, data, y, cv=f)
            print(f'Results:\n\tMin: {min(scores)}\n\tMax: {max(scores)}\n\tMean: {np.mean(scores)}\n\tStd: {np.std(scores)}')

In [28]:
for name, data in selected.items():
    cross_validate(name, data)

[all] 5-fold cross validation with Extra Trees Classifier (100 trees):
Results:
	Min: 0.7272727272727273
	Max: 0.9090909090909091
	Mean: 0.7818181818181819
	Std: 0.07272727272727271
[all] 10-fold cross validation with Extra Trees Classifier (100 trees):
Results:
	Min: 0.4
	Max: 1.0
	Mean: 0.7966666666666666
	Std: 0.1929018172830705
[all] 5-fold cross validation with Extra Trees Classifier (500 trees):
Results:
	Min: 0.6363636363636364
	Max: 0.9090909090909091
	Mean: 0.7818181818181819
	Std: 0.092709445701687
[all] 10-fold cross validation with Extra Trees Classifier (500 trees):
Results:
	Min: 0.4
	Max: 1.0
	Mean: 0.8133333333333332
	Std: 0.17269111795984826
[all] 5-fold cross validation with Extra Trees Classifier (1000 trees):
Results:
	Min: 0.6363636363636364
	Max: 0.9090909090909091
	Mean: 0.7636363636363637
	Std: 0.09270944570168699
[all] 10-fold cross validation with Extra Trees Classifier (1000 trees):
Results:
	Min: 0.4
	Max: 1.0
	Mean: 0.7966666666666666
	Std: 0.17792008193443

In [29]:
for name, data in hourly_selected.items():
    cross_validate(name, data)

[all] 5-fold cross validation with Extra Trees Classifier (100 trees):
Results:
	Min: 0.6363636363636364
	Max: 1.0
	Mean: 0.8181818181818181
	Std: 0.1149919149152138
[all] 10-fold cross validation with Extra Trees Classifier (100 trees):
Results:
	Min: 0.4
	Max: 1.0
	Mean: 0.7766666666666666
	Std: 0.17194960502038575
[all] 5-fold cross validation with Extra Trees Classifier (500 trees):
Results:
	Min: 0.6363636363636364
	Max: 1.0
	Mean: 0.8181818181818181
	Std: 0.1149919149152138
[all] 10-fold cross validation with Extra Trees Classifier (500 trees):
Results:
	Min: 0.4
	Max: 1.0
	Mean: 0.8133333333333332
	Std: 0.1944793619441976
[all] 5-fold cross validation with Extra Trees Classifier (1000 trees):
Results:
	Min: 0.6363636363636364
	Max: 1.0
	Mean: 0.8
	Std: 0.12060453783110545
[all] 10-fold cross validation with Extra Trees Classifier (1000 trees):
Results:
	Min: 0.4
	Max: 1.0
	Mean: 0.7933333333333333
	Std: 0.18427033281447006
[night] 5-fold cross validation with Extra Trees Classif

### SVM

In [None]:
# TODO