# Model Ensembling

In [53]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree, DecisionTreeRegressor
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score,r2_score
import matplotlib.pyplot as plt
from sklearn import tree
from sklearn.ensemble import VotingClassifier, VotingRegressor
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression
import warnings
warnings.simplefilter('ignore')
from sklearn.linear_model import Ridge, ElasticNet

## Wisconsin Dataset

In [3]:
wisc = pd.read_csv("./Cases/Wisconsin/BreastCancer.csv", index_col=0)
wisc.head()

Unnamed: 0_level_0,Clump,UniCell_Size,Uni_CellShape,MargAdh,SEpith,BareN,BChromatin,NoemN,Mitoses,Class
Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
61634,5,4,3,1,2,2,2,3,1,Benign
63375,9,1,2,6,4,10,7,7,2,Malignant
76389,10,4,7,2,2,8,6,1,1,Malignant
95719,6,10,10,10,8,10,7,10,7,Malignant
128059,1,1,1,1,2,5,5,1,1,Benign


In [4]:
X = wisc.drop('Class',axis=1)
y = wisc['Class']
X_train, X_test, y_train, y_test = train_test_split(X, y , test_size=0.3, stratify=y)

In [5]:
dtc = DecisionTreeClassifier(random_state=24)
svm_l = SVC(kernel='linear')
svm_r = SVC(kernel='rbf')
voting = VotingClassifier([('TREE',dtc),('SVML',svm_l),('SVMR', svm_r)], verbose=True)
voting.fit(X_train, y_train)
y_pred = voting.predict(X_test)
print(accuracy_score(y_test,y_pred))

[Voting] ..................... (1 of 3) Processing TREE, total=   0.0s
[Voting] ..................... (2 of 3) Processing SVML, total=   0.0s
[Voting] ..................... (3 of 3) Processing SVMR, total=   0.0s
0.9619047619047619


## Glass Dataset

In [6]:
glass = pd.read_csv("./Cases/Glass Identification/Glass.csv")
glass.head()

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
0,1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0.0,0.0,building_windows_float_processed
1,1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0.0,0.0,building_windows_float_processed
2,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.0,0.0,building_windows_float_processed
3,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.0,0.0,building_windows_float_processed
4,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.0,0.0,building_windows_float_processed


In [7]:
X = glass.drop('Type', axis=1)
y = glass['Type']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=24, stratify=y)

In [8]:
dtc = DecisionTreeClassifier(random_state=24)
svm_l = SVC(kernel='linear')
svm_r = SVC(kernel='rbf')
voting = VotingClassifier([('TREE',dtc),('SVML',svm_l),('SVMR', svm_r)], verbose=True)
voting.fit(X_train, y_train)
y_pred = voting.predict(X_test)
print(accuracy_score(y_test,y_pred))

[Voting] ..................... (1 of 3) Processing TREE, total=   0.0s
[Voting] ..................... (2 of 3) Processing SVML, total=   0.0s
[Voting] ..................... (3 of 3) Processing SVMR, total=   0.0s
0.6


## Image Segmentation

In [9]:
image = pd.read_csv("./Cases/Image Segmentation/Image_Segmentation.csv")
image.head()

Unnamed: 0,Class,region.centroid.col,region.centroid.row,region.pixel.count,short.line.density.5,short.line.density.2,vedge.mean,vegde.sd,hedge.mean,hedge.sd,intensity.mean,rawred.mean,rawblue.mean,rawgreen.mean,exred.mean,exblue.mean,exgreen.mean,value.mean,saturation.mean,hue-mean
0,BRICKFACE,188,133,9,0.0,0.0,0.333333,0.266667,0.5,0.077778,6.666666,8.333334,7.777778,3.888889,5.0,3.333333,-8.333333,8.444445,0.53858,-0.924817
1,BRICKFACE,105,139,9,0.0,0.0,0.277778,0.107407,0.833333,0.522222,6.111111,7.555555,7.222222,3.555556,4.333334,3.333333,-7.666666,7.555555,0.532628,-0.965946
2,BRICKFACE,34,137,9,0.0,0.0,0.5,0.166667,1.111111,0.474074,5.851852,7.777778,6.444445,3.333333,5.777778,1.777778,-7.555555,7.777778,0.573633,-0.744272
3,BRICKFACE,39,111,9,0.0,0.0,0.722222,0.374074,0.888889,0.429629,6.037037,7.0,7.666666,3.444444,2.888889,4.888889,-7.777778,7.888889,0.562919,-1.175773
4,BRICKFACE,16,128,9,0.0,0.0,0.5,0.077778,0.666667,0.311111,5.555555,6.888889,6.666666,3.111111,4.0,3.333333,-7.333334,7.111111,0.561508,-0.985811


In [10]:
X = image.drop('Class',axis=1)
y = image['Class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=24, stratify=y)


In [None]:

lda = LinearDiscriminantAnalysis()
svm_l = SVC(kernel='linear')
svm_r = SVC(kernel='rbf')
lr = LogisticRegression()
voting = VotingClassifier([('LDA',lda),('SVML',svm_l),('SVMR', svm_r),('LR', lr)], verbose=True, voting='hard')    
# voting = 'hard' : highest frequency  
# voting = 'soft' : average of the values
voting.fit(X_train, y_train)
y_pred = voting.predict(X_test)
print(f1_score(y_test,y_pred, average='macro'))

[Voting] ...................... (1 of 4) Processing LDA, total=   0.0s
[Voting] ..................... (2 of 4) Processing SVML, total=   0.0s
[Voting] ..................... (3 of 4) Processing SVMR, total=   0.0s
[Voting] ....................... (4 of 4) Processing LR, total=   0.1s
0.8854427736006684


### Checking individually

In [25]:
# Linear Discriminant Analysis
lda.fit(X_train, y_train)
y_pred = lda.predict(X_test)
print(f"f1 score : {f1_score(y_test, y_pred, average='macro')}")

f1 score : 0.8719347388078038


In [None]:
# SVM Linear
svm_l.fit(X_train, y_train)
y_pred = svm_l.predict(X_test)
print(f"f1 score : {f1_score(y_test, y_pred, average='macro')}")

f1 score : 0.8517126148705095


In [27]:
# SVM rbf
svm_r.fit(X_train, y_train)
y_pred = svm_r.predict(X_test)
print(f"f1 score : {f1_score(y_test, y_pred, average='macro')}")

f1 score : 0.6539682539682541


In [None]:
# Logistic Regression
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)
print(f"f1 score : {f1_score(y_test, y_pred, average='macro')}")

f1 score : 0.8542229012817248


## Concrete Strength dataset

In [44]:
concrete = pd.read_csv("./Cases/Concrete Strength/Concrete_Data.csv")
concrete.head()

Unnamed: 0,Cement,Blast,Fly,Water,Superplasticizer,Coarse,Fine,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [46]:
X = concrete.drop('Strength', axis=1)
y = concrete['Strength']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=24)

In [72]:
ridge = Ridge(alpha=0.5)
elastic = ElasticNet()
dtc = DecisionTreeRegressor(random_state=24, max_depth=4)
voting = VotingRegressor([('RIDGE',ridge),('Elastic',elastic),('DTC', dtc)], verbose=True, weights=[1,1,3])    
voting.fit(X_train, y_train)
y_pred = voting.predict(X_test)
print(r2_score(y_test,y_pred))

[Voting] .................... (1 of 3) Processing RIDGE, total=   0.0s
[Voting] .................. (2 of 3) Processing Elastic, total=   0.0s
[Voting] ...................... (3 of 3) Processing DTC, total=   0.0s
0.7509330302554483


In [57]:
ridge = Ridge()
ridge.fit(X_train, y_train)
y_pred = ridge.predict(X_test)
print(r2_score(y_test,y_pred))


0.5771749099675625


In [49]:

elastic = ElasticNet()
elastic.fit(X_train, y_train)
y_pred = elastic.predict(X_test)
print(r2_score(y_test,y_pred))

0.5766806310401152


In [70]:

dtc = DecisionTreeRegressor(max_depth=4)
dtc.fit(X_train, y_train)
y_pred = dtc.predict(X_test)
print(r2_score(y_test,y_pred))

0.6615374648404437


## Boston Dataset

In [73]:
boston = pd.read_csv("./Cases/Boston Housing/boston.csv")
boston.head()

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,black,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


In [74]:
X = boston.drop('medv', axis=1)
y = boston['medv']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=24)


In [87]:
ridge = Ridge(alpha=0.5)
elastic = ElasticNet()
dtc = DecisionTreeRegressor(random_state=24, max_depth=8)
voting = VotingRegressor([('RIDGE',ridge),('Elastic',elastic),('DTC', dtc)], verbose=True, weights=[2,1,3])    
voting.fit(X_train, y_train)
y_pred = voting.predict(X_test)
print(r2_score(y_test,y_pred))

[Voting] .................... (1 of 3) Processing RIDGE, total=   0.0s
[Voting] .................. (2 of 3) Processing Elastic, total=   0.0s
[Voting] ...................... (3 of 3) Processing DTC, total=   0.0s
0.7847836681863113


In [84]:
ridge = Ridge()
ridge.fit(X_train, y_train)
y_pred = ridge.predict(X_test)
print(f'Ridge : {r2_score(y_test,y_pred)}')
elastic = ElasticNet()
elastic.fit(X_train, y_train)
y_pred = elastic.predict(X_test)
print(f'Elastic: {r2_score(y_test,y_pred)}')
dtc = DecisionTreeRegressor()
dtc.fit(X_train, y_train)
y_pred = dtc.predict(X_test)
print(f'Decision Tree: {r2_score(y_test,y_pred)}')

Ridge : 0.7032547843448866
Elastic: 0.6746117127668813
Decision Tree: 0.7413193832019378
