In [11]:
%load_ext nb_black

The nb_black extension is already loaded. To reload it, use:
  %reload_ext nb_black


<IPython.core.display.Javascript object>

In [12]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
import xgboost as xgb
from sklearn import metrics as m
from thundersvm import SVC as svmgpu
import calculateWeightUsingGa2 as aresult
import pandas as pd
import itertools
import random
from sklearn.utils import shuffle
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import VotingClassifier

randomseed = 42
np.random.seed(randomseed)

<IPython.core.display.Javascript object>

1. Read Dataset 
===

In [13]:
data = pd.read_csv("../dataset/seeds_dataset.txt", sep="\t", header=None)
data = shuffle(data)

le = LabelEncoder()
data.iloc[:, -1] = le.fit_transform(data.iloc[:, -1])
x = np.array(data.iloc[:, :-1])
y = np.array(data.iloc[:, -1])

print(np.unique(y))

xtrain, xtest, ytrain_original, ytest_original = train_test_split(
    x, y, test_size=0.3, random_state=10
)

ytrain = ytrain_original.copy()
ytest = ytest_original.copy()

[0 1 2]


<IPython.core.display.Javascript object>

In [14]:
# member values
clf = []
acc = []
finalacc = []
ypredproba_all = []
ypredconfprob_all = []

<IPython.core.display.Javascript object>

In [15]:
# orginal score using random forest classifier
rf = RandomForestClassifier(random_state=randomseed, n_estimators=10)
rf.fit(xtrain, ytrain)
print("original score", m.f1_score(ytest, rf.predict(xtest), average="weighted"))

original score 0.9524645173581343


<IPython.core.display.Javascript object>

2. Generate baseline classifiers 
===

In [6]:
# generate three base classifers using RF,svm and XGBoost

#================================================= 

rf=RandomForestClassifier(random_state=randomseed, n_estimators=10)
rf.fit(xtrain,ytrain)
rfpred=rf.predict(xtest)
print('RF f1_score',m.f1_score(ytest,rfpred,average='weighted'))

clf.append(rf)
acc.append(m.f1_score(ytest,rfpred,average='weighted'))
ypredproba_all.append(rf.predict_proba(xtest))

confmat=m.confusion_matrix(ytest,rfpred)
confsumh=np.sum(confmat,axis=1)
propconfmat=confmat.copy()
for i in range(propconfmat.shape[0]):
    propconfmat[i]= 100*propconfmat[i]/confsumh[i] 
ypredconfprob_all.append(propconfmat/100)

#=================================================
svc=svmgpu(random_state=randomseed,probability=True,C=100,gamma=0.0001)
svc.fit(xtrain,ytrain)

svcpred=svc.predict(xtest)
print('svc f1_score',m.f1_score(ytest,svcpred,average='weighted'))

clf.append(svc)
acc.append(m.f1_score(ytest,svcpred,average='weighted'))
ypredproba_all.append(svc.predict_proba(xtest))

confmat=m.confusion_matrix(ytest,svcpred)
confsumh=np.sum(confmat,axis=1)
propconfmat=confmat.copy()
for i in range(propconfmat.shape[0]):
    propconfmat[i]= 100*propconfmat[i]/confsumh[i] 
ypredconfprob_all.append(propconfmat/100)

#=================================================
xgbc=xgb.XGBClassifier(random_state=randomseed,n_estimators=100)
xgbc.fit(xtrain,ytrain)

xgbpred=xgbc.predict(xtest)
print('xgbc f1_score',m.f1_score(ytest,xgbpred,average='weighted'))


clf.append(xgbc)
acc.append(m.f1_score(ytest,xgbpred,average='weighted'))
ypredproba_all.append(xgbc.predict_proba(xtest))

confmat=m.confusion_matrix(ytest,xgbpred)
confsumh=np.sum(confmat,axis=1)
propconfmat=confmat.copy()
for i in range(propconfmat.shape[0]):
    propconfmat[i]= 100*propconfmat[i]/confsumh[i] 
ypredconfprob_all.append(propconfmat/100)


RF f1_score 0.9524645173581343
svc f1_score 0.8897826937042623
xgbc f1_score 0.9520778669714839


<IPython.core.display.Javascript object>

In [7]:
# =================================================
# =================================================
# generate combinations of features 12,7
comb = list(itertools.combinations(np.arange(0, 7, 1), 4))

# generate 10 random numbers
randnums = []
for i in range(10):
    randnums.append(random.randrange(0, len(comb)))

print(randnums)

comb = np.array(comb)[randnums, :]


for i in range(len(comb)):
    print(i, " ==================== ", comb[i])

    rf = RandomForestClassifier(random_state=randomseed, n_estimators=50)
    rf.fit(xtrain[:, comb[i]], ytrain)
    rfpred = rf.predict(xtest[:, comb[i]])
    print(m.f1_score(ytest, rfpred, average="weighted"))

    clf.append(rf)
    acc.append(m.f1_score(ytest, rfpred, average="weighted"))
    ypredproba_all.append(rf.predict_proba(xtest[:, comb[i]]))

    confmat = m.confusion_matrix(ytest, rfpred)
    confsumh = np.sum(confmat, axis=0)
    propconfmat = confmat.copy()
    for i in range(propconfmat.shape[0]):
        propconfmat[:, i] = 100 * propconfmat[:, i] / confsumh[i]
    ypredconfprob_all.append(propconfmat / 100)

    xgbmodel = xgb.XGBClassifier(random_state=randomseed, n_estimators=50)
    xgbmodel.fit(xtrain, ytrain)
    xgbmodelpred = xgbmodel.predict(xtest)
    print(m.f1_score(ytest, xgbmodelpred, average="weighted"))

    clf.append(xgbmodel)
    acc.append(m.f1_score(ytest, xgbmodelpred, average="weighted"))
    ypredproba_all.append(xgbmodel.predict_proba(xtest))

    confmat = m.confusion_matrix(ytest, xgbmodelpred)
    confsumh = np.sum(confmat, axis=0)
    propconfmat = confmat.copy()
    for i in range(propconfmat.shape[0]):
        propconfmat[:, i] = 100 * propconfmat[:, i] / confsumh[i]
    ypredconfprob_all.append(propconfmat / 100)

# #=================================================

[21, 5, 29, 11, 2, 21, 14, 21, 0, 20]
0.9208012677400433
0.9520778669714839
0.9208342677730433
0.9520778669714839
0.9682539682539683
0.9520778669714839
0.9208342677730433
0.9520778669714839
0.9047619047619049
0.9520778669714839
0.9208012677400433
0.9520778669714839
0.9206973496706653
0.9520778669714839
0.9208012677400433
0.9520778669714839
0.8207484922499914
0.9520778669714839
0.8712476154336619
0.9520778669714839


<IPython.core.display.Javascript object>

3. Compute the weight using ga and compute the ensemble accuracy
===

In [8]:
import calculateWeightUsingGa2 as aresult
weightvalga=aresult.getbestvalues(acc)

finalval=0
for i in range(len(acc)):
    finalval += weightvalga[i]*ypredproba_all[i]

print('f1_score',m.f1_score(ytest,np.argmax(finalval,axis=1),average='weighted'))
print('accuracy_score',m.accuracy_score(ytest,np.argmax(finalval,axis=1)))
    


0
1
2
3
4
5
6
7
8
9
15.36369614526178 [0.95093452 0.44584747 0.7496752  0.63463406 0.75578476 0.70916722
 0.77685526 0.99900046 0.80701216 0.72318516 0.83091702 0.51634625
 0.83481673 0.69720902 0.88349264 0.56772325 0.89462598 0.70629268
 0.91784325 0.0830561  0.91954042 0.17968649 0.94316531]
f1_score 0.9520778669714839
accuracy_score 0.9523809523809523


<IPython.core.display.Javascript object>

4. voting classifier
===

In [10]:
ensemb_clf = []
for i in range(len(clf)):
    ensemb_clf.append([str(type(clf[i])) + str(i), clf[i]])

eclf3 = VotingClassifier(estimators=ensemb_clf, voting="soft", flatten_transform=True)
eclf3 = eclf3.fit(xtrain, ytrain)
_acc = m.accuracy_score(ytest, eclf3.predict(xtest))
print(_acc)

0.9682539682539683


<IPython.core.display.Javascript object>