# **Requirement: [Dataset Link](https://www.kaggle.com/datasets/uciml/red-wine-quality-cortez-et-al-2009)**

# **Import necessary libraries**

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# **Load and Explore the Dataset**

In [2]:
# Load the Wine Quality dataset
wine_data = pd.read_csv("winequality-red.csv")

# Explore the dataset
print(wine_data.head())
print(wine_data.info())

   fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0            7.4              0.70         0.00             1.9      0.076   
1            7.8              0.88         0.00             2.6      0.098   
2            7.8              0.76         0.04             2.3      0.092   
3           11.2              0.28         0.56             1.9      0.075   
4            7.4              0.70         0.00             1.9      0.076   

   free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
0                 11.0                  34.0   0.9978  3.51       0.56   
1                 25.0                  67.0   0.9968  3.20       0.68   
2                 15.0                  54.0   0.9970  3.26       0.65   
3                 17.0                  60.0   0.9980  3.16       0.58   
4                 11.0                  34.0   0.9978  3.51       0.56   

   alcohol  quality  
0      9.4        5  
1      9.8        5  
2      9.8        5 

# **Data Preprocessing**

In [3]:
# Split the data into features (X) and target (y)
X = wine_data.drop('quality', axis=1)
y = wine_data['quality']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# **Create and Train the Regression Model**

In [4]:
# Initialize the Linear Regression model
model = LinearRegression()

# Train the model on the training data
model.fit(X_train, y_train)

# **Make Predictions and Evaluate the Model**

In [5]:
# Predict wine quality on the test data
y_pred = model.predict(X_test)

# Calculate the Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error (MSE): {mse:.2f}")

# Calculate the R-squared value
r2 = r2_score(y_test, y_pred)
print(f"R-squared value: {r2:.2f}")

Mean Squared Error (MSE): 0.39
R-squared value: 0.40


# **Plot**

In [7]:
import plotly.graph_objects as go

# Create a 3D scatter plot using Plotly
fig = go.Figure(data=[
    go.Scatter3d(
        x=scatter_data['Actual Quality'],
        y=scatter_data['Predicted Quality'],
        z=scatter_data.index,
        mode='markers',
        marker=dict(size=5, opacity=0.7)
    )
])

# Customize the layout and labels
fig.update_layout(
    title='3D Scatter Plot of Wine Quality Prediction',
    scene=dict(
        xaxis_title='Actual Quality',
        yaxis_title='Predicted Quality',
        zaxis_title='Data Point Index'
    )
)

# Show the 3D plot
fig.show()