# ENSEMBLE WITH WEIGHTED VOTING

In [None]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB 
from sklearn import metrics

#### Database read  

In [None]:
df = pd.read_csv('ReplicatedAcousticFeatures-ParkinsonDatabase.csv')
df.shape

(240, 48)

In [None]:
df.head()

Unnamed: 0,ID,Recording,Status,Gender,Jitter_rel,Jitter_abs,Jitter_RAP,Jitter_PPQ,Shim_loc,Shim_dB,...,Delta3,Delta4,Delta5,Delta6,Delta7,Delta8,Delta9,Delta10,Delta11,Delta12
0,CONT-01,1,0,1,0.25546,1.5e-05,0.001467,0.001673,0.030256,0.26313,...,1.407701,1.417218,1.380352,1.42067,1.45124,1.440295,1.403678,1.405495,1.416705,1.35461
1,CONT-01,2,0,1,0.36964,2.2e-05,0.001932,0.002245,0.023146,0.20217,...,1.331232,1.227338,1.213377,1.352739,1.354242,1.365692,1.32287,1.314549,1.318999,1.323508
2,CONT-01,3,0,1,0.23514,1.3e-05,0.001353,0.001546,0.019338,0.1671,...,1.412304,1.324674,1.276088,1.429634,1.455996,1.368882,1.438053,1.38891,1.305469,1.305402
3,CONT-02,1,0,0,0.2932,1.7e-05,0.001105,0.001444,0.024716,0.20892,...,1.5012,1.53417,1.323993,1.496442,1.472926,1.643177,1.551286,1.638346,1.604008,1.621456
4,CONT-02,2,0,0,0.23075,1.5e-05,0.001073,0.001404,0.013119,0.11607,...,1.508468,1.334511,1.610694,1.685021,1.417614,1.574895,1.640088,1.533666,1.297536,1.382023


#### Drop data 

In [None]:
df.drop(['ID'],axis = 1, inplace = True)
label = df['Status'] 
df.drop(['Status'], axis = 1, inplace = True)
df = df.values
samples,features = df.shape
print(samples,features)

240 46


In [None]:
data = df.astype(np.float)

#### Import train_test_split function and Split dataset into training set and test set

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data, label, test_size=0.2,random_state=100)

#### Base models 

In [None]:
model1 = DecisionTreeClassifier(random_state=1)
model2 = GaussianNB()
model3= LogisticRegression(max_iter=500)

model1.fit(X_train,y_train)
model2.fit(X_train,y_train)
model3.fit(X_train,y_train)

pred1=model1.predict(X_test)
pred2=model2.predict(X_test)
pred3=model3.predict(X_test)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [None]:
print(model1)
print(model2)
print(model3)

DecisionTreeClassifier(random_state=1)
GaussianNB()
LogisticRegression(max_iter=500)


In [None]:
print("decision tree classifier prediction:\n",pred1)
print("naive bayes classifier prediction:\n",pred2)
print("logistic regression classifier prediction:\n",pred3)

decision tree classifier prediction:
 [0 1 1 1 1 0 0 1 0 0 1 0 0 0 1 1 0 1 1 0 1 0 1 1 1 1 1 0 0 0 1 1 1 1 1 1 1
 1 0 0 0 1 1 1 0 1 0 0]
naive bayes classifier prediction:
 [0 1 1 1 1 0 1 1 0 1 1 0 0 0 1 1 0 1 1 0 0 0 1 1 1 1 1 0 0 0 1 1 1 1 1 1 1
 1 0 0 0 1 1 1 0 1 0 0]
logistic regression classifier prediction:
 [0 1 1 1 1 0 0 1 0 1 1 0 0 0 1 1 1 1 1 0 0 0 1 1 1 1 1 0 0 0 1 1 0 1 0 1 1
 1 0 0 0 1 1 1 0 0 0 0]


#### Accuracy of each base models

In [None]:
acc1 = metrics.accuracy_score(y_test, pred1)
acc2 = metrics.accuracy_score(y_test, pred2)
acc3 = metrics.accuracy_score(y_test, pred3)
print("Accuracy of decision tree classifier:",acc1)
print("Accuracy of naive bayes classifier:",acc2)
print("Accuracy of logistic regression classifier:",acc3)


Accuracy of decision tree classifier: 0.7708333333333334
Accuracy of naive bayes classifier: 0.7916666666666666
Accuracy of logistic regression classifier: 0.8125


#### Final prediction from weighted voting (accuracy as weight)

In [None]:
w = acc1 + acc2 + acc3
w1=acc1/w
w2=acc2/w
w3=acc3/w


In [None]:
final_pred_weighted_voting = np.array([])
for i in range(0,len(X_test)):
    x=0
    y=0
    if pred1[i]==1:
        x+=w1
    else:
        y+=w1
    if pred2[i]==1:
        x+=w2
    else:
        y+=w2
    if pred3[i]==1:
        x+=w3
    else:
        y+=w3
    if x>y:
        final_pred_weighted_voting = np.append(final_pred_weighted_voting,1)
    else:
        final_pred_weighted_voting = np.append(final_pred_weighted_voting,0)

In [None]:
print("final prediction using weighted voting:\n",final_pred_weighted_voting)

final prediction using weighted voting:
 [0. 1. 1. 1. 1. 0. 0. 1. 0. 1. 1. 0. 0. 0. 1. 1. 0. 1. 1. 0. 0. 0. 1. 1.
 1. 1. 1. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 0. 1. 1. 1. 0. 1. 0. 0.]


#### Accuracy of final prediction

In [None]:
print("Accuracy of weighted voting:",metrics.accuracy_score(y_test, final_pred_weighted_voting))

Accuracy of weighted voting: 0.8125


# RANKING OF ALL MODELS BASED ON THEIR ACCURACY

In [None]:
print("RANKING OF ALL MODELS BASED ON THEIR ACCURACY")
print("1.Weighted voting Ensemble & logistic regression classifier:",0.8125)
print("2.Naive bayes classifier ",0.7916666666666666 )
print("3.Accuracy of decision tree classifier",0.7708333333333334)

RANKING OF ALL MODELS BASED ON THEIR ACCURACY
1.Weighted voting Ensemble & logistic regression classifier: 0.8125
2.naive bayes classifier  0.7916666666666666
3.Accuracy of decision tree classifier 0.7708333333333334
