In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import matplotlib.pyplot as plt
import seaborn as sns
from copy import deepcopy

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from collections import Counter

from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from xgboost import XGBClassifier

from sklearn.metrics import accuracy_score

import torch

In [None]:
df = pd.read_csv("/kaggle/input/car-acceptability-classification-dataset/car.csv")

In [None]:
df.head()

In [None]:
df.isna().sum()

In [None]:
fig, axes = plt.subplots(nrows=1, ncols=df.shape[1], figsize=(15, 10))
for i, j in enumerate(df.columns):
    axes[i].pie(df[j].value_counts(),
               labels=df[j].value_counts().index)
    
plt.show()

In [None]:
le = LabelEncoder()
encoders = []

for i in df.columns:
    le.fit(df[i])
    df[i] = le.transform(df[i])

In [None]:
df.head()

In [None]:
x = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=42, test_size=0.2)

sm = SMOTE()

print("Before ", Counter(y_train))

x_train, y_train = sm.fit_resample(x_train, y_train)

print("After ", Counter(y_train))

In [None]:
def training(model, name):
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)
    acc = accuracy_score(y_pred, y_test)
    acc *= 100
    acc = round(acc, 2)
    title = '{}: {}%'.format(name, acc)
    print(title)
    print()
    return model

In [None]:
rfc = RandomForestClassifier(max_depth=50)
dtc = DecisionTreeClassifier(max_depth=55)
abc = AdaBoostClassifier(learning_rate=0.5)
gnb = GaussianNB()
svm = SVC(C=0.9)
xgb = XGBClassifier()

models = [rfc, dtc, abc, gnb, svm, xgb]
names = ['Random Forest', 'Decision Tree', 'Ada Boost', 'GaussianNB', 'SVM', 'XGB']

In [None]:
trained = []
for i, j in zip(models, names):
    trained += [training(i, j)]

In [None]:
class Classifier(torch.nn.Module):
    def __init__(self, ins, n_classes):
        super(Classifier, self).__init__()
        self.fc = torch.nn.Linear(ins, 128)
        self.fc1 = torch.nn.Linear(128, 32)
        self.fc2 = torch.nn.Linear(32, n_classes)
        
    def forward(self, x):
        x = self.fc(x)
        x = torch.nn.functional.relu(x)
        x = self.fc1(x)
        x = torch.nn.functional.relu(x)
        x = self.fc2(x)
        x = torch.nn.functional.relu(x)
        
        return torch.nn.functional.softmax(x)

In [None]:
xnn_train = torch.tensor(x_train).type(torch.FloatTensor)
ynn_train = torch.tensor(y_train, dtype=torch.int64)
test_x = torch.tensor(x_test).type(torch.FloatTensor)
test_y = torch.tensor(y_test, dtype=torch.int64)

In [None]:
model = Classifier(x_train.shape[1], df['Car_Acceptability'].nunique())
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
criterion = torch.nn.CrossEntropyLoss()

In [None]:
best_model = deepcopy(model)
best_acc = 0

for i in range(1, 10001):
    model.train()
    optimizer.zero_grad()
    output = model(xnn_train)
    loss = criterion(output, ynn_train)
    loss.backward()
    optimizer.step()
    
    train_loss = loss.item()/x_train.shape[0]
    
    model.eval()
    with torch.no_grad():
        out = model(test_x)
        val = (test_y == out.argmax(1)).sum().item()
        acc = val/x_test.shape[0]
        if best_acc < acc:
            best_acc = acc
            best_model = deepcopy(model)
        if i%1000 == 0:
            print("Epoch {} train loss {} val {}".format(i, train_loss, acc))
            print()