In [79]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV

import warnings; warnings.simplefilter('ignore')

In [80]:
# Load data
train = pd.read_csv('../data/datosEntrenamiento.csv', header = None)
test = pd.read_csv('../data/datosPrueba.csv', header = None)
validation = pd.read_csv('../data/datosValidacion.csv', header = None)
train=train.rename(columns = {36:'class'})
test=test.rename(columns = {36:'class'})
train_class_dummies = pd.get_dummies(list(train['class']))
train_class_dummies.columns = ['class1','class2','class3','class4','class5','class6']
test_class_dummies = pd.get_dummies(list(test['class']))
test_class_dummies.columns = ['class1','class2','class3','class4','class5','class6']
train = pd.concat([train,train_class_dummies], axis = 1)
test = pd.concat([test,test_class_dummies], axis = 1)
x_test = test.drop(test.columns[list(range(36,43))], axis = 1).values
y_test = test.drop(test.columns[list(range(36))+list(range(37,43))],axis = 1).values
x_train = train.drop(train.columns[list(range(36,43))], axis = 1).values
y_train = train.drop(test.columns[list(range(36))+list(range(37,43))],axis = 1).values

In [81]:
clf = RandomForestClassifier(n_estimators=400)
clf = clf.fit(x_train, y_train.ravel())
y_pred = clf.predict(x_test)
print(accuracy_score(y_test,y_pred))

0.9168609168609169


In [82]:
clf = ExtraTreesClassifier(n_estimators=400)
clf = clf.fit(x_train, y_train.ravel())
y_pred = clf.predict(x_test)
print(accuracy_score(y_test,y_pred))

0.9176379176379177


In [83]:
clf = AdaBoostClassifier(n_estimators=400)
clf = clf.fit(x_train, y_train.ravel())
y_pred = clf.predict(x_test)
print(accuracy_score(y_test,y_pred))

0.6961926961926962


In [84]:
clf = GradientBoostingClassifier(n_estimators=400).fit(x_train, y_train.ravel())
y_pred = clf.predict(x_test)
print(accuracy_score(y_test,y_pred))

0.9168609168609169


In [85]:
clf1 = RandomForestClassifier(n_estimators=400)
clf2 = ExtraTreesClassifier(n_estimators=400)
clf3 = GradientBoostingClassifier(n_estimators=400)
eclf = VotingClassifier(estimators=[('rf', clf1), ('et', clf2), ('gbc', clf3)], voting='hard')
eclf.fit(x_train, y_train.ravel())
y_pred = eclf.predict(x_test)
print(accuracy_score(y_test,y_pred))

0.9176379176379177


In [86]:
clf1 = RandomForestClassifier(n_estimators=400)
clf2 = ExtraTreesClassifier(n_estimators=400)
clf3 = GradientBoostingClassifier(n_estimators=400)
eclf = VotingClassifier(estimators=[('rf', clf1), ('et', clf2), ('gbc', clf3)], voting='soft')
eclf.fit(x_train, y_train.ravel())
y_pred = eclf.predict(x_test)
print(accuracy_score(y_test,y_pred))

0.9230769230769231


In [87]:
clf1 = RandomForestClassifier(n_estimators=400)
clf2 = ExtraTreesClassifier(n_estimators=400)
clf3 = GradientBoostingClassifier(n_estimators=400)
eclf = VotingClassifier(estimators=[('rf', clf1), ('et', clf2), ('gbc', clf3)], voting='soft', weights=[1,3,2])
eclf.fit(x_train, y_train.ravel())
y_pred = eclf.predict(x_test)
print(accuracy_score(y_test,y_pred))

0.9254079254079254


In [88]:
clf1 = RandomForestClassifier(n_estimators=400)
clf2 = ExtraTreesClassifier(n_estimators=400)
clf3 = GradientBoostingClassifier(n_estimators=400)
eclf = VotingClassifier(estimators=[('rf', clf1), ('et', clf2), ('gbc', clf3)], voting='soft', weights=[1,3,2])
params = {'rf__n_estimators': [20, 400], 'et__n_estimators': [20, 400], 'gbc__n_estimators': [20, 400]}

grid = GridSearchCV(estimator=eclf, param_grid=params, cv=5)
grid = grid.fit(x_train, y_train)
y_pred = grid.predict(x_test)
print(accuracy_score(y_test,y_pred))

0.9246309246309247
