In [37]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score, log_loss, classification_report
from sklearn.preprocessing import LabelEncoder

import warnings
warnings.filterwarnings("ignore")

In [38]:
df = pd.read_csv('Kyphosis.csv')
df.sample(5)

Unnamed: 0,Kyphosis,Age,Number,Start
30,absent,31,3,16
49,absent,177,2,14
29,absent,151,2,16
4,absent,1,4,15
17,absent,175,5,13


In [39]:
# le = LabelEncoder()
# df['Kyphosis'] = le.fit_transform(df['Kyphosis'])
X = df.drop('Kyphosis', axis=1)
y = df['Kyphosis']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=24)

In [40]:
lr = LogisticRegression()
nb = GaussianNB()
svm = SVC(random_state=24, probability=True)
voting = VotingClassifier([('LR', lr), ('NB', nb), ('SVM', svm)], voting='soft', weights=[0.76, 0.76, 0.8])

In [41]:
voting.fit(X_train, y_train)
y_pred = voting.predict(X_test)
print(accuracy_score(y_test, y_pred))

y_pred_prob = voting.predict_proba(X_test)[:,1]
print(log_loss(y_test, y_pred_prob))

0.76
0.48370844608393226


In [None]:
'''
Without weights
0.76
0.48258033499245384

With weights
0.76
0.48370844608393226
'''

In [42]:
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=24)

params = {
    'LR__penalty': ['l2', None],
    'LR__C': [0.001, 10, 5],
    'LR__solver': ['lbfgs', 'newton-cg', 'newton-cholesky', 'sag'],

    'SVM__kernel': ['rbf', 'linear'],
    'SVM__C': [0.01, 4, 5],
    'SVM__gamma': np.linspace(0.01, 4, 5),
    'SVM__decision_function_shape': ['ovo','ovr']
}

In [43]:
voting = VotingClassifier([('LR', lr), ('NB', nb), ('SVM', svm)], voting='soft')
gcv = GridSearchCV(voting, param_grid=params, cv=kfold, scoring='neg_log_loss')
gcv.fit(X, y)
print(gcv.best_params_)
print(gcv.best_score_)

{'LR__C': 0.001, 'LR__penalty': None, 'LR__solver': 'lbfgs', 'SVM__C': 4, 'SVM__decision_function_shape': 'ovo', 'SVM__gamma': 2.005, 'SVM__kernel': 'rbf'}
-0.3966098931051333


In [None]:

{'LR__C': 0.001, 'LR__penalty': None, 'LR__solver': 'lbfgs', 'SVM__C': 4, 'SVM__gamma': 2.005, 'SVM__kernel': 'rbf'}
-0.3966098931051333
