In [1]:
import pandas as pd
import numpy as np

from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC 
from sklearn.ensemble import VotingClassifier

In [2]:
train = pd.read_csv("german_train.csv", header=None)
test = pd.read_csv("german_test.csv", header=None)

In [3]:
print(train.shape, test.shape)

(700, 25) (300, 25)


In [4]:
X_train = train.iloc[:, 0:-1]
y_train = train[24]

X_test = test.iloc[:, 0:-1]
y_test = test[24]

In [5]:
model1 = GaussianNB()
model1.fit(X_train, y_train)
y_pred = model1.predict(X_test)
model1_acc = accuracy_score(y_test, y_pred)

In [6]:
model2 = DecisionTreeClassifier()
X_train_, X_test_, y_train_, y_test_ = train_test_split(X_train, y_train, test_size=0.44, random_state=1)
model2.fit(X_train_, y_train_)
y_pred = model2.predict(X_test)
model2_acc = accuracy_score(y_test, y_pred)

In [7]:
model3 = DecisionTreeClassifier()
X_train_, X_test_, y_train_, y_test_ = train_test_split(X_train, y_train, test_size=0.44, random_state=2)
model3.fit(X_train_, y_train_)
y_pred = model3.predict(X_test)
model3_acc = accuracy_score(y_test, y_pred)

In [8]:
model4 = SVC(kernel="linear", C=1)
model4.fit(X_train, y_train)
y_pred = model4.predict(X_test)
model4_acc = accuracy_score(y_test, y_pred)

In [9]:
final = VotingClassifier(estimators=[('gnb', model1), ('dt1', model2), ('dt2', model3), ('svm', model4)], voting='hard')
final.fit(X_train, y_train)
y_pred = final.predict(X_test)
final_acc = accuracy_score(y_test, y_pred)

In [10]:
print(f"Accuracy using Naive Bayes: {model1_acc}")
print(f"Accuracy using Decision Tree and random seed = 1: {model2_acc}")
print(f"Accuracy using Decision Tree and random seed = 2: {model3_acc}")
print(f"Accuracy using SVM: {model4_acc}")
print(f"Accuracy using Ensemble Classifier: {final_acc}")

Accuracy using Naive Bayes: 0.74
Accuracy using Decision Tree and random seed = 1: 0.6766666666666666
Accuracy using Decision Tree and random seed = 2: 0.7033333333333334
Accuracy using SVM: 0.7966666666666666
Accuracy using Ensemble Classifier: 0.7833333333333333
