In [1]:
import json
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.ensemble import (
    RandomForestClassifier,
    AdaBoostClassifier,
    RandomForestRegressor,
    GradientBoostingRegressor
)
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LinearRegression
from sklearn.svm import LinearSVC

from scipy.stats import mode

import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
# Splitting data into Train and Test sets (2:1 ratio)
def split_data(data):
    return train_test_split(data[:,:-1], data[:,-1], test_size=0.33, random_state=42)

In [4]:
# Try out several models and also ensemble their results
def ensemble_voting(X_train, Y_train, X_test, y_test):   
    # Bagging
    model1 = RandomForestClassifier(n_estimators=100, bootstrap = True, max_features = 'sqrt')
    model1.fit(X_train, Y_train)
    pred1 = model1.predict(X_test)
    print("Accuracy from Random Forrest Classifer: ", (y_test==pred1).sum()/len(pred1))
    
    # Support Vector Machines
    model2 = LinearSVC()
    model2.fit(X_train, Y_train)
    pred2 = model2.predict(X_test)
    print("Accuracy from Support Vector Machine: ", (y_test==pred2).sum()/len(pred2))
    
    # Boosting
    model3 = AdaBoostClassifier(n_estimators=100)
    model3.fit(X_train, Y_train)
    pred3 = model3.predict(X_test)
    print("Accuracy from Ada Boosting Classifer: ", (y_test==pred3).sum()/len(pred3))
    
#     # Nearest Neighbour
#     model4 = KNeighborsClassifier()
#     model4.fit(X_train, Y_train)
#     pred4 = model4.predict(X_test)
#     print("Accuracy from K Nearest Neighbors Classifer: ", (y_test==pred4).sum()/len(pred4))

    pred_all = np.append(np.append(pred1.reshape(-1,1), pred2.reshape(-1,1), axis=1), pred3.reshape(-1,1), axis=1)
    
    pred = np.array(mode(pred_all, axis=1).mode).reshape(1,-1)[0]
    
    print("Accuracy from ensemble: ", (y_test==pred).sum()/len(pred))
    
    return pred

In [None]:
# Run Experiment
X_train, X_test, y_train, y_test = split_data(data)
pred = ensemble_voting(X_train, y_train, X_test, y_test)