<a href="https://colab.research.google.com/github/Varun880/WineWise/blob/main/WineWiseRegression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Introduction
This notebook uses preprocessed data from 'Data Preprocessing Notebook' to predict wine quality scores using regression algorithms.

In [8]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Importing Libraries

In [9]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

Loading Processed Data

In [10]:
data_store_location = '/content/drive/MyDrive/ColabNotebooks/Processed/'

In [11]:
X_reg_train_scaled = pd.read_csv(data_store_location+'X_reg_train_scaled.csv')
X_reg_test_scaled = pd.read_csv(data_store_location+'X_reg_test_scaled.csv')
X_reg_train = pd.read_csv(data_store_location+'X_reg_train.csv')
X_reg_test = pd.read_csv(data_store_location+'X_reg_test.csv')
y_reg_train = pd.read_csv(data_store_location+'y_reg_train.csv')
y_reg_test = pd.read_csv(data_store_location+'y_reg_test.csv')

Importing evaluation libraries

In [12]:
from sklearn.model_selection import GridSearchCV # to find the best hyperparameters
from sklearn.metrics import mean_squared_error, r2_score # for metrics

Linear Regression

In [13]:
from sklearn.linear_model import LinearRegression
lr_reg = LinearRegression()
lr_reg.fit(X_reg_train_scaled, y_reg_train) # training the model
y_pred_lin = lr_reg.predict(X_reg_test_scaled)  # predicting test cases

# Evaluation
mse_lin = mean_squared_error(y_reg_test, y_pred_lin)  # mean sq error
rmse_lin = np.sqrt(mse_lin) # root mse
r2_lin = r2_score(y_reg_test, y_pred_lin) #r2 Value

print('Linear Regression:')
print(f'MSE: {mse_lin:.4f}')
print(f'RMSE: {rmse_lin:.4f}')
print(f'R²: {r2_lin:.4f}')

Linear Regression:
MSE: 0.5690
RMSE: 0.7543
R²: 0.2653


Random Forest Regression

In [14]:
from sklearn.ensemble import RandomForestRegressor

rf_reg = RandomForestRegressor(
    n_estimators=500,
    max_depth=15,
    min_samples_split=5,
    min_samples_leaf=2)

rf_reg.fit(X_reg_train_scaled, y_reg_train) # training the model
y_pred_rf = rf_reg.predict(X_reg_test_scaled) # Make predictions

mse_rf = mean_squared_error(y_reg_test, y_pred_rf)
rmse_rf = np.sqrt(mse_rf)
r2_rf = r2_score(y_reg_test, y_pred_rf)

# Metrics
print('Random Forest Regression:')
print(f'MSE: {mse_rf:.4f}')
print(f'RMSE: {rmse_rf:.4f}')
print(f'R²: {r2_rf:.4f}')

  return fit_method(estimator, *args, **kwargs)


Random Forest Regression:
MSE: 0.3679
RMSE: 0.6066
R²: 0.5249


Support Vector Machine (Regression)

In [15]:
from sklearn.svm import SVR
svr = SVR()

param_grid_svr = {  # parameter options
    'C': [0.1, 1, 10],
    'epsilon': [0.01, 0.1, 0.5],
    'kernel': ['rbf', 'linear']
}
grid_svr = GridSearchCV(svr, param_grid_svr, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
grid_svr.fit(X_reg_train_scaled, y_reg_train) # training model

best_svr = grid_svr.best_estimator_
y_pred_svr = best_svr.predict(X_reg_test_scaled) # Making Predictions

# metrics
mse_svr = mean_squared_error(y_reg_test, y_pred_svr)
rmse_svr = np.sqrt(mse_svr)
r2_svr = r2_score(y_reg_test, y_pred_svr)

print('Support Vector Regression (SVR):')
print(f'Best Parameters: {grid_svr.best_params_}')
print(f'MSE: {mse_svr:.4f}')
print(f'RMSE: {rmse_svr:.4f}')
print(f'R²: {r2_svr:.4f}')

  y = column_or_1d(y, warn=True)


Support Vector Regression (SVR):
Best Parameters: {'C': 1, 'epsilon': 0.1, 'kernel': 'rbf'}
MSE: 0.4709
RMSE: 0.6862
R²: 0.3920


XG Boost

In [16]:
import xgboost as xgb

xgb_model = xgb.XGBRegressor(
    n_estimators=500,      # Number of trees
    learning_rate=0.05,
    max_depth=6,
    subsample=0.8,         # Fraction of samples used for training each tree
    colsample_bytree=0.8,  # Fraction of features used per tree
    objective='reg:squarederror',  # Loss function for regression
    random_state=1
)

xgb_model.fit(X_reg_train_scaled, y_reg_train) # training model
y_pred = xgb_model.predict(X_reg_test_scaled) # Make predictions

# Compute metrics
mse = mean_squared_error(y_reg_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_reg_test, y_pred)

# Print results
print(f'XGBoost Regression:')
print(f'MSE: {mse:.4f}')
print(f'RMSE: {rmse:.4f}')
print(f'R²: {r2:.4f}')

XGBoost Regression:
MSE: 0.3495
RMSE: 0.5912
R²: 0.5488


In [17]:
from sklearn.tree import DecisionTreeRegressor

dtree_model = DecisionTreeRegressor(random_state=1)
dtree_model.fit(X_reg_train_scaled, y_reg_train)  # training the model

y_pred_dtree = dtree_model.predict(X_reg_test_scaled) # Making Predictions

# Metrics
mse_dt = mean_squared_error(y_reg_test, y_pred_dtree)
rmse_dt = np.sqrt(mse_dt)
r2_dt = r2_score(y_reg_test, y_pred_dtree)

print("Decision Tree Regression Results:")
print(f"MSE: {mse_dt:.4f}")
print(f"RMSE: {rmse_dt:.4f}")
print(f"R²: {r2_dt:.4f}")

Decision Tree Regression Results:
MSE: 0.6867
RMSE: 0.8287
R²: 0.1133


In [18]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model_reg = Sequential()
model_reg.add(Dense(64, activation='relu', input_shape=(X_reg_train_scaled.shape[1],))) # Input layer and first hidden layer
model_reg.add(Dense(32, activation='relu')) # Second hidden layer
model_reg.add(Dense(1)) # Output layer

# Training ANN
model_reg.compile(optimizer='adam', loss='mean_squared_error')
model_reg.fit(X_reg_train_scaled, y_reg_train,
                            epochs=100,
                            batch_size=32,
                            validation_data=(X_reg_test_scaled, y_reg_test),
                            verbose=1)

# Make predictions
y_pred_ann_reg = model_reg.predict(X_reg_test_scaled)

# Evaluation Metrics
mse_ann = mean_squared_error(y_reg_test, y_pred_ann_reg)
rmse_ann = np.sqrt(mse_ann)
r2_ann = r2_score(y_reg_test, y_pred_ann_reg)

# Printing results
print("ANN Regression Results:")
print(f"MSE: {mse_ann:.4f}")
print(f"RMSE: {rmse_ann:.4f}")
print(f"R²: {r2_ann:.4f}")

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - loss: 21.8973 - val_loss: 3.2091
Epoch 2/100
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 2.4104 - val_loss: 2.2678
Epoch 3/100
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 1.7372 - val_loss: 1.7572
Epoch 4/100
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 1.2818 - val_loss: 1.3688
Epoch 5/100
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 1.0868 - val_loss: 1.0265
Epoch 6/100
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.8059 - val_loss: 0.7995
Epoch 7/100
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.6439 - val_loss: 0.7042
Epoch 8/100
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.5910 - val_loss: 0.6101
Epoch 9/100
[1m123/123[0m [32m━━━━━━━━━━