# Ensemble Testing (Under Construction)

We have created a python module that makes the homogeneous ensemble callable.  The module is `homogeneous_ensemble.py`.

In [23]:
import homogeneous_ensemble as he

In [24]:
import numpy as np
import pandas as pd

import random
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import mean_squared_error
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import OneHotEncoder

import time

abalone = pd.read_csv("abalone.csv")
oe_style = OneHotEncoder()
oe_results = oe_style.fit_transform(abalone[["Sex"]])

abalone = pd.DataFrame(oe_results.toarray(), columns=oe_style.categories_).join(abalone)
abalone = abalone.drop("Sex", axis=1)
abalone.shape

(4174, 11)

In [25]:
training, valid = he.split_train_test(abalone, 0.2)
training.head()

Unnamed: 0,"(F,)","(I,)","(M,)",Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight,Rings
3597,0.0,1.0,0.0,0.455,0.35,0.135,0.5365,0.2855,0.0855,0.1325,7
2788,0.0,0.0,1.0,0.53,0.415,0.12,0.706,0.3355,0.1635,0.1345,9
19,0.0,0.0,1.0,0.355,0.28,0.095,0.2455,0.0955,0.062,0.075,11
1421,0.0,1.0,0.0,0.485,0.375,0.13,0.6025,0.2935,0.1285,0.16,7
2041,0.0,0.0,1.0,0.51,0.405,0.13,0.7175,0.3725,0.158,0.17,9


In [26]:

weights, predictors = he.homog_ens(training, 1, 2)
print(weights)

[0.20027419211681474, 0.16214390795638853]


In [27]:
print(predictors[0].intercept_)
print(predictors[1].intercept_)

[9.9907759]
[9.95509399]


In [28]:
X_valid = valid.iloc[:,0:-1]
Y_valid = valid.iloc[:,-1]

X_valid.iloc[:5,:]

Unnamed: 0,"(F,)","(I,)","(M,)",Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight
2206,0.0,1.0,0.0,0.325,0.2,0.08,0.0995,0.0395,0.0225,0.032
937,1.0,0.0,0.0,0.655,0.46,0.16,1.494,0.6895,0.331,0.1825
3929,0.0,1.0,0.0,0.535,0.385,0.18,1.0835,0.4955,0.2295,0.304
2391,0.0,0.0,1.0,0.545,0.42,0.12,0.7865,0.403,0.185,0.17
1242,1.0,0.0,0.0,0.615,0.475,0.165,1.023,0.4905,0.1955,0.3035


In [29]:
predictions = []
for p in predictors:
    predictions.append(p.predict(X_valid))



In [30]:
predictions

[array([ 5.71294444,  8.67196668, 10.62008391,  8.48843254,  9.86935956,
        11.91071423, 10.21408149, 12.93203477,  4.82793842, 12.94357416,
         9.77930567,  8.25452223,  9.71014471,  8.51887232, 10.54237752,
        11.79702577, 10.84028043,  6.34678133, 10.06599135,  9.34531416,
         8.04091898,  5.69077314, 10.02797812,  9.35444091,  5.91786244,
         7.70499286, 12.75718011, 12.26164338,  6.53553127, 10.06433303,
        11.79928561,  8.76887333,  8.89142476,  9.87371274, 11.549264  ,
         9.14446829,  6.80438948, 10.30374052,  9.5845806 ,  9.45466489,
        10.42074205,  9.63443705,  4.43858657,  6.02888451,  8.6602559 ,
         8.98063047,  7.36800639, 11.36919667,  9.55251167, 10.32894002,
         9.41371273,  9.21190397,  9.78060753, 10.49828905,  8.2649339 ,
        11.44928865, 10.81693359,  6.7478841 ,  7.4600539 ,  7.69482096,
        10.47357378,  5.50615392, 10.4056596 , 10.78494492,  6.75715931,
        11.32160588,  8.64012743,  8.10448781, 10.1

In [31]:
mean_squared_error(predictions[0], Y_valid)

5.502647640456086

In [32]:
mean_squared_error(predictions[1], Y_valid)

5.576742247791828

In [33]:
#weights[0]*predictions[0]
num = 0
j = 0
weight_sum = 0
while (j < len(weights)):
        num += weights[j]*predictions[j]
        weight_sum += weights[j]
        j+=1
print(weight_sum)

0.3624181000732033


In [34]:
guess = num / weight_sum
mean_squared_error(guess, Y_valid)

5.5330933713625825

In [35]:
table_1 = [["DecisionTree",0,0],["SVR",0,0],["kNN",0,0]]
cols = ["Algorithm", "MSE", "Time"]
table_1 = pd.DataFrame(table_1, columns=cols)
table_1

Unnamed: 0,Algorithm,MSE,Time
0,DecisionTree,0,0
1,SVR,0,0
2,kNN,0,0


In [13]:
table_1.loc[2,"MSE"] = mean_squared_error(predictors[0].predict(X_valid), Y_valid)
table_1



Unnamed: 0,Algorithm,MSE
0,DecisionTree,0.0
1,SVR,0.0
2,kNN,5.159082


In [40]:
for i in range(0,3):
    
    #  Training
    t0 = time.time()
    weights, predictors = he.homog_ens(training, i, 256)
    t1 = time.time()
    
    # Predicting
    predictions = []
    for p in predictors:
        predictions.append(p.predict(X_valid))
    table_1.loc[i,"MSE"] = mean_squared_error(predictors[0].predict(X_valid), Y_valid)
    table_1.loc[i,"Time"] = t1-t0

  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mo

In [41]:
table_1

Unnamed: 0,Algorithm,MSE,Time
0,DecisionTree,7.577938,2.030542
1,SVR,5.510853,146.164683
2,kNN,8.069544,7.292482
