# Ensemble Testing (Under Construction)

We have created a python module that makes the homogeneous ensemble callable.  The module is `homogeneous_ensemble.py`.

In [28]:
import homogeneous_ensemble as he

In [29]:
import numpy as np
import pandas as pd

import random
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import mean_squared_error
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import OneHotEncoder

import time

abalone = pd.read_csv("abalone.csv")
oe_style = OneHotEncoder()
oe_results = oe_style.fit_transform(abalone[["Sex"]])

abalone = pd.DataFrame(oe_results.toarray(), columns=oe_style.categories_).join(abalone)
abalone = abalone.drop("Sex", axis=1)
abalone.shape

(4174, 11)

In [3]:
training, valid = he.split_train_test(abalone, 0.2)
training.head()

Unnamed: 0,"(F,)","(I,)","(M,)",Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight,Rings
916,1.0,0.0,0.0,0.625,0.475,0.175,1.1435,0.4755,0.2475,0.349,10
3041,0.0,0.0,1.0,0.455,0.345,0.125,0.44,0.169,0.1065,0.135,12
1997,1.0,0.0,0.0,0.505,0.39,0.175,0.692,0.267,0.15,0.215,12
2708,0.0,0.0,1.0,0.7,0.545,0.185,1.6135,0.75,0.4035,0.3685,11
1161,0.0,0.0,1.0,0.5,0.42,0.135,0.6765,0.302,0.1415,0.2065,9


In [4]:

weights, predictors = he.homog_ens(training, 1, 2)
print(weights)

[0.14608345047407156, 0.187294195358973]


In [5]:
print(predictors[0].intercept_)
print(predictors[1].intercept_)

[9.94557459]
[9.86049215]


In [6]:
X_valid = valid.iloc[:,0:-1]
Y_valid = valid.iloc[:,-1]

X_valid.iloc[:5,:]

Unnamed: 0,"(F,)","(I,)","(M,)",Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight
2292,0.0,1.0,0.0,0.275,0.195,0.07,0.0875,0.0345,0.022,0.0255
3738,0.0,1.0,0.0,0.44,0.35,0.11,0.3805,0.1575,0.0895,0.115
2363,0.0,1.0,0.0,0.22,0.16,0.05,0.049,0.0215,0.01,0.015
1596,0.0,1.0,0.0,0.455,0.325,0.135,0.82,0.4005,0.1715,0.211
3189,0.0,1.0,0.0,0.435,0.34,0.12,0.396,0.1775,0.081,0.125


In [7]:
predictions = []
for p in predictors:
    predictions.append(p.predict(X_valid))



In [8]:
predictions

[array([ 5.50633587,  7.61561421,  5.01621738,  8.76198637,  7.59715585,
        10.92600095,  7.29827734,  9.47242012,  8.58089361, 11.24412746,
         6.69276537,  8.86034126, 13.56981253, 11.55075657,  9.6105342 ,
         9.18354227,  4.84579041, 10.06287812, 11.04280062,  9.17181245,
         9.56064731,  6.98146468,  8.56082817, 11.81155907,  8.38705219,
         7.86377824, 10.36856364,  7.06311007, 11.33209935, 12.05175203,
        10.43456007, 10.59240983, 10.25893561, 11.61607206, 10.12541475,
        10.51142908, 10.2162187 ,  7.77865659,  9.28783028,  7.2170109 ,
         9.09472227,  6.53869211, 14.58421735, 10.71083131, 10.66877568,
         9.08920853,  6.73830909, 11.7473956 , 11.64838878,  9.24964118,
         6.66312745, 10.88697003,  7.33225056,  7.31720179, 10.57630058,
        10.68836063, 11.21344924,  8.829899  , 11.8258047 ,  6.49577897,
        10.84543537, 10.92154878, 10.0352472 ,  7.93494586,  9.43224702,
         7.87222023,  9.2755419 ,  9.81018181,  9.0

In [9]:
mean_squared_error(predictions[0], Y_valid)

5.117549186163923

In [10]:
mean_squared_error(predictions[1], Y_valid)

5.099338725270676

In [11]:
#weights[0]*predictions[0]
num = 0
j = 0
weight_sum = 0
while (j < len(weights)):
        num += weights[j]*predictions[j]
        weight_sum += weights[j]
        j+=1
print(weight_sum)

0.3333776458330445


In [18]:
guess = num / weight_sum
mean_squared_error(guess, Y_valid)

5.10496783245492

In [31]:
table_1 = [["DecisionTree",0,0],["SVR",0,0],["kNN",0,0]]
cols = ["Algorithm", "MSE", "Time"]
table_1 = pd.DataFrame(table_1, columns=cols)
table_1

Unnamed: 0,Algorithm,MSE,Time
0,DecisionTree,0,0
1,SVR,0,0
2,kNN,0,0


In [13]:
table_1.loc[2,"MSE"] = mean_squared_error(predictors[0].predict(X_valid), Y_valid)
table_1



Unnamed: 0,Algorithm,MSE
0,DecisionTree,0.0
1,SVR,0.0
2,kNN,5.159082


In [33]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning) # setting ignore as a parameter and further adding category

In [35]:
for i in range(0,3):
    
    #  Training
    t0 = time.time()
    weights, predictors = he.homog_ens(training, i, 256)
    
    predictions = []
    for p in predictors:
        predictions.append(p.predict(X_valid))
    
    
    num = 0
    j = 0
    weight_sum = 0
    while (j < len(weights)):
        num += weights[j]*predictions[j]
        weight_sum += weights[j]
        j+=1
    #print(weight_sum)
    
    guess = num / weight_sum
    
    t1 = time.time()
    
    # Predicting
    #predictions = []
    #for p in predictors:
    #   predictions.append(p.predict(X_valid))
        
        
    table_1.loc[i,"MSE"] = mean_squared_error(guess, Y_valid)
    table_1.loc[i,"Time"] = t1-t0
    
table_1

Unnamed: 0,Algorithm,MSE,Time
0,DecisionTree,6.099553,2.460581
1,SVR,5.098349,164.659896
2,kNN,6.343295,23.91686
