<a href="https://colab.research.google.com/github/Varun880/WineWise/blob/main/WineWiseClassification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Introduction
This notebook uses preprocessed data from Data Preprocessing Notebook to classify wines as 'good' or 'bad' based on 'quality_class'.

In [9]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Importing Libraries

In [10]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

Load Processed Data

In [11]:
data_store_location = '/content/drive/MyDrive/ColabNotebooks/Processed/'

In [12]:
X_clf_train_scaled = pd.read_csv(data_store_location+'X_clf_train_scaled.csv')
X_clf_test_scaled = pd.read_csv(data_store_location+'X_clf_test_scaled.csv')

X_clf_train = pd.read_csv(data_store_location+'X_clf_train.csv')
X_clf_test = pd.read_csv(data_store_location+'X_clf_test.csv')

y_clf_train = pd.read_csv(data_store_location+'y_clf_train.csv')
y_clf_test = pd.read_csv(data_store_location+'y_clf_test.csv')

Importing evaluation libraries

In [13]:
from sklearn.model_selection import GridSearchCV  # to find best hyperparameters
from sklearn.metrics import classification_report, roc_auc_score # for metrics

Logistic Regression (Classification)

In [14]:
from sklearn.linear_model import LogisticRegression

log_reg = LogisticRegression(class_weight='balanced', random_state=1)
log_reg.fit(X_clf_train_scaled, y_clf_train)  # training the model
y_pred_log = log_reg.predict(X_clf_test_scaled) # Making Predictions

# Metrics
print('Logistic Regression Classification Report:')
print(classification_report(y_clf_test, y_pred_log))
print('AUC-ROC:', roc_auc_score(y_clf_test, log_reg.predict_proba(X_clf_test_scaled)[:, 1]))

Logistic Regression Classification Report:
              precision    recall  f1-score   support

           0       0.90      0.69      0.78       753
           1       0.42      0.76      0.54       227

    accuracy                           0.70       980
   macro avg       0.66      0.72      0.66       980
weighted avg       0.79      0.70      0.73       980

AUC-ROC: 0.7943848687482084


  y = column_or_1d(y, warn=True)


Random Forest

In [15]:
from sklearn.ensemble import RandomForestClassifier

rf_class = RandomForestClassifier(class_weight='balanced', random_state=1)
param_grid_rf = {'n_estimators': [100, 200], 'max_depth': [10, 20, None]} # parameter options
grid_rf_class = GridSearchCV(rf_class, param_grid_rf, cv=5, scoring='f1', n_jobs=-1)
grid_rf_class.fit(X_clf_train_scaled, y_clf_train)  # training the model

best_rf_class = grid_rf_class.best_estimator_
y_pred_rf_class = best_rf_class.predict(X_clf_test_scaled)  # Making Predictions

# metrics
print('Random Forest Classifier:')
print(f'Best Parameters: {grid_rf_class.best_params_}')
print(classification_report(y_clf_test, y_pred_rf_class))
print('AUC-ROC:', roc_auc_score(y_clf_test, best_rf_class.predict_proba(X_clf_test_scaled)[:, 1]))

  return fit_method(estimator, *args, **kwargs)


Random Forest Classifier:
Best Parameters: {'max_depth': 10, 'n_estimators': 200}
              precision    recall  f1-score   support

           0       0.93      0.88      0.90       753
           1       0.66      0.79      0.72       227

    accuracy                           0.86       980
   macro avg       0.80      0.83      0.81       980
weighted avg       0.87      0.86      0.86       980

AUC-ROC: 0.9061024623971077


Support Vectore Machine (Classification)

In [16]:
from sklearn.svm import SVC

svc = SVC(class_weight='balanced', probability=True, random_state=1)
param_grid_svc = {'C': [0.1, 1, 10], 'kernel': ['rbf', 'linear']} # parameter options
grid_svc = GridSearchCV(svc, param_grid_svc, cv=5, scoring='f1', n_jobs=-1)
grid_svc.fit(X_clf_train_scaled, y_clf_train)  # training the model

best_svc = grid_svc.best_estimator_
y_pred_svc = best_svc.predict(X_clf_test_scaled)  # making predictions

# metrics
print('Support Vector Classification (SVC):')
print(f'Best Parameters: {grid_svc.best_params_}')
print(classification_report(y_clf_test, y_pred_svc))
print('AUC-ROC:', roc_auc_score(y_clf_test, best_svc.predict_proba(X_clf_test_scaled)[:, 1]))

  y = column_or_1d(y, warn=True)


Support Vector Classification (SVC):
Best Parameters: {'C': 10, 'kernel': 'rbf'}
              precision    recall  f1-score   support

           0       0.94      0.76      0.84       753
           1       0.52      0.83      0.64       227

    accuracy                           0.78       980
   macro avg       0.73      0.80      0.74       980
weighted avg       0.84      0.78      0.79       980

AUC-ROC: 0.8647465936547495


Naive Bayes

In [17]:
from sklearn.naive_bayes import GaussianNB
gauss_nb = GaussianNB()
gauss_nb.fit(X_clf_train_scaled, y_clf_train) # training the model

y_pred_nb = gauss_nb.predict(X_clf_test_scaled) # Making Predictions

# Metrics
print('Naive Bayes Classification Report:')
print(classification_report(y_clf_test, y_pred_nb))
print(f"AUC-ROC: {roc_auc_score(y_clf_test, gauss_nb.predict_proba(X_clf_test_scaled)[:, 1]):.3f}")

Naive Bayes Classification Report:
              precision    recall  f1-score   support

           0       0.89      0.72      0.79       753
           1       0.43      0.70      0.53       227

    accuracy                           0.71       980
   macro avg       0.66      0.71      0.66       980
weighted avg       0.78      0.71      0.73       980

AUC-ROC: 0.771


  y = column_or_1d(y, warn=True)


Decision Tree

In [18]:
from sklearn.tree import DecisionTreeClassifier

dtree_model = DecisionTreeClassifier(class_weight='balanced', random_state=1)
dtree_model.fit(X_clf_train_scaled, y_clf_train)  # training the model

y_pred_dt = dtree_model.predict(X_clf_test_scaled)  # making predictions

# Metrics
print("Decision Tree Classification Report:")
print(classification_report(y_clf_test, y_pred_dt))
print(f"AUC-ROC: {roc_auc_score(y_clf_test, dtree_model.predict_proba(X_clf_test_scaled)[:, 1]):.4f}")

Decision Tree Classification Report:
              precision    recall  f1-score   support

           0       0.90      0.88      0.89       753
           1       0.62      0.66      0.64       227

    accuracy                           0.83       980
   macro avg       0.76      0.77      0.77       980
weighted avg       0.83      0.83      0.83       980

AUC-ROC: 0.7706


Artificial Neural Network

In [19]:
# Importing Libraries
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# Building ANN
ann_model = Sequential()
ann_model.add(Dense(64, activation='relu', input_shape=(X_clf_train_scaled.shape[1],))) # Input Layer and first Hidden Layer
ann_model.add(Dropout(0.2))
ann_model.add(Dense(32, activation='relu')) # Second Input Layer
ann_model.add(Dense(1, activation='sigmoid')) # Output Layer

# Training ANN
ann_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
ann_model.fit(X_clf_train_scaled, y_clf_train,
                    epochs=100,
                    batch_size=32,
                    validation_data=(X_clf_test_scaled, y_clf_test),
                    verbose=1)

y_pred_ann = ann_model.predict(X_clf_test_scaled) # Making Predictions
y_pred_ann = (y_pred_ann > 0.5).astype(int)

# Metrics
print("ANN Classification Report:")
print(classification_report(y_clf_test, y_pred_ann))
print(f"AUC-ROC: {roc_auc_score(y_clf_test, y_pred_ann):.3f}")

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.7321 - loss: 0.5579 - val_accuracy: 0.7878 - val_loss: 0.4502
Epoch 2/100
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8097 - loss: 0.4134 - val_accuracy: 0.7990 - val_loss: 0.4220
Epoch 3/100
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8164 - loss: 0.3968 - val_accuracy: 0.7949 - val_loss: 0.4149
Epoch 4/100
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8219 - loss: 0.3969 - val_accuracy: 0.7959 - val_loss: 0.4097
Epoch 5/100
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8224 - loss: 0.3795 - val_accuracy: 0.8051 - val_loss: 0.3999
Epoch 6/100
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8291 - loss: 0.3639 - val_accuracy: 0.8122 - val_loss: 0.3996
Epoch 7/100
[1m123/123[0m [32m━