#  Dataset Selection and Preprocessing

In [6]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset
# data = pd.read_csv('winequality-red.csv')
data = pd.read_csv('winequality-red.csv', delimiter=';')
# Check the column names in the dataset
print(data.columns)

# Separate features and target variable
X = data.drop(columns='quality')
y = data['quality']

# Normalize the data using Min-Max Scaler
scaler = MinMaxScaler()
X_normalized = scaler.fit_transform(X)

# Split the dataset into training (80%) and testing sets (20%)
X_train, X_test, y_train, y_test = train_test_split(X_normalized, y, test_size=0.2, random_state=42)

print("Data Preprocessing Complete.")


Index(['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
       'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
       'pH', 'sulphates', 'alcohol', 'quality'],
      dtype='object')
Data Preprocessing Complete.


# Model Design and Implementation

In [7]:
# Instantiate the Random Forest Regressor
model = RandomForestRegressor(random_state=42)

# Train the model using the training dataset
model.fit(X_train, y_train)

print("Model Training Complete.")


Model Training Complete.


# Model Training and Evaluation

In [8]:
# Make predictions on the testing set
y_pred = model.predict(X_test)

# Evaluate the model using Mean Squared Error (MSE) and R-squared metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Model Evaluation:")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"R-squared: {r2:.2f}")


Model Evaluation:
Mean Squared Error (MSE): 0.30
R-squared: 0.54
