In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier

from sklearn.ensemble import VotingClassifier, RandomForestClassifier
from sklearn.metrics import classification_report

In [None]:
wine = pd.read_csv('white_wine.csv')
wine.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6.0
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6.0
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6.0
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6.0
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6.0


In [None]:
wine.isnull().sum()

fixed acidity           0
volatile acidity        0
citric acid             0
residual sugar          0
chlorides               0
free sulfur dioxide     0
total sulfur dioxide    0
density                 0
pH                      1
sulphates               1
alcohol                 0
quality                 1
dtype: int64

In [None]:
wine['alcohol'] = wine['alcohol'].fillna(wine['alcohol'].mean())

In [None]:
wine['label'] = np.where(wine['quality']> 6, 1, 0)

In [None]:
wine.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality,label
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6.0,0
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6.0,0
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6.0,0
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6.0,0
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6.0,0


In [None]:
X = wine[['density', 'alcohol']]
y = wine['label']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify = y, test_size = 0.2)

In [None]:
logreg = LogisticRegression()
dt = DecisionTreeClassifier(max_depth = 5)
knn = KNeighborsClassifier(n_neighbors=3)

In [None]:
vc = VotingClassifier([('clf1', logreg), ('clf2', dt), ('clf3', knn)], voting = 'soft')

In [None]:
vc.fit(X_train, y_train)

In [None]:
y_pred = vc.predict(X_test)

In [None]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.96      0.96      0.96        84
           1       0.85      0.85      0.85        20

    accuracy                           0.94       104
   macro avg       0.91      0.91      0.91       104
weighted avg       0.94      0.94      0.94       104



In [None]:
for clf, clf_name in zip([logreg, dt, knn], ['Logistic Regression', 'Decision Tree', 'KNN']):
  print('-----')
  print(f'Prediction using {clf_name}')
  clf.fit(X_train, y_train)
  y_pred = clf.predict(X_test) 
  print(classification_report(y_test, y_pred))

-----
Prediction using Logistic Regression
              precision    recall  f1-score   support

           0       0.87      0.92      0.89        84
           1       0.53      0.40      0.46        20

    accuracy                           0.82       104
   macro avg       0.70      0.66      0.67       104
weighted avg       0.80      0.82      0.81       104

-----
Prediction using Decision Tree
              precision    recall  f1-score   support

           0       0.99      1.00      0.99        84
           1       1.00      0.95      0.97        20

    accuracy                           0.99       104
   macro avg       0.99      0.97      0.98       104
weighted avg       0.99      0.99      0.99       104

-----
Prediction using KNN
              precision    recall  f1-score   support

           0       0.94      0.92      0.93        84
           1       0.68      0.75      0.71        20

    accuracy                           0.88       104
   macro avg       0.

In [None]:
bankloan = pd.read_csv('bankloan.csv')
bankloan.head()

Unnamed: 0,age,ed,employ,address,income,debtinc,creddebt,othdebt,default
0,41,3,17,12,176,9.3,11.359392,5.008608,1
1,27,1,10,6,31,17.3,1.362202,4.000798,0
2,40,1,15,14,55,5.5,0.856075,2.168925,0
3,41,1,15,14,120,2.9,2.65872,0.82128,0
4,24,2,2,0,28,17.3,1.787436,3.056564,1


In [None]:
X = bankloan[['age', 'employ', 'debtinc', 'creddebt', 'othdebt']]
y = bankloan['default']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify = y, test_size = 0.2)

In [None]:
rf = RandomForestClassifier(n_estimators = 20, max_features = 4, max_depth = 3)

In [None]:
rf.fit(X_train, y_train)

In [None]:
y_pred = rf.predict(X_test)

In [None]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.84      0.92      0.88       103
           1       0.70      0.51      0.59        37

    accuracy                           0.81       140
   macro avg       0.77      0.72      0.74       140
weighted avg       0.80      0.81      0.80       140



In [None]:
for clf, clf_name in zip([logreg, dt, knn], ['Logistic Regression', 'Decision Tree', 'KNN']):
  print('-----')
  print(f'Prediction using {clf_name}')
  clf.fit(X_train, y_train)
  y_pred = clf.predict(X_test) 
  print(classification_report(y_test, y_pred))

-----
Prediction using Logistic Regression
              precision    recall  f1-score   support

           0       0.86      0.95      0.90       103
           1       0.81      0.57      0.67        37

    accuracy                           0.85       140
   macro avg       0.83      0.76      0.78       140
weighted avg       0.85      0.85      0.84       140

-----
Prediction using Decision Tree
              precision    recall  f1-score   support

           0       0.86      0.83      0.85       103
           1       0.57      0.62      0.60        37

    accuracy                           0.78       140
   macro avg       0.72      0.73      0.72       140
weighted avg       0.78      0.78      0.78       140

-----
Prediction using KNN
              precision    recall  f1-score   support

           0       0.79      0.86      0.83       103
           1       0.50      0.38      0.43        37

    accuracy                           0.74       140
   macro avg       0.