In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import qgrid

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
features = ['variance', 'skewness', 'kurtosis', 'entropy',]
cols = features + ['class']

In [None]:
df = pd.read_csv('./Data/data_banknote_authentication.txt')
df.columns = cols

print(df.shape)
df_w = qgrid.show_grid(df)
df_w

In [None]:
#   Run basic EDA

def plot_uni(df, feature, category):
    sns.displot(data=df, x=feature, hue=category, kde=True) #.set(title=feature)

for f in features:
    plot_uni(df, f, 'class')
    

In [None]:
sns.pairplot(df, hue='class')

In [None]:
# !pip install seaborn-qqplot
from seaborn_qqplot import pplot
from scipy.stats import norm

for f in features:
    pplot(df, x=f, y=norm, kind='qq', hue='class') # display_kws={'identity':False, 'fit':True, 'reg':True, 'ci':0.025}

In [None]:
X = df.drop('class', axis=1)
y = df['class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=775)

In [None]:
# here we should scale the features, but skip it for now - let's see how it works w/o scaling


In [None]:
models = [LogisticRegression, RandomForestClassifier, GradientBoostingClassifier, SVC, MLPClassifier]

for model in models:
    cls = model()
    fit = cls.fit(X_train, y_train)
    pred = fit.predict(X_test)
    print(type(cls))
    print(classification_report(y_test, pred))
    print(confusion_matrix(y_test, pred))