In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score


In [None]:
np.random.seed(42)
data = {
    'fixed_acidity': np.random.uniform(4, 15, 1000),
    'volatile_acidity': np.random.uniform(0, 1.58, 1000),
    'citric_acid': np.random.uniform(0, 1, 1000),
    'residual_sugar': np.random.uniform(0.9, 15, 1000),
    'chlorides': np.random.uniform(0.012, 0.611, 1000),
    'free_sulfur_dioxide': np.random.uniform(1, 72, 1000),
    'total_sulfur_dioxide': np.random.uniform(6, 289, 1000),
    'density': np.random.uniform(0.990, 1.004, 1000),
    'pH': np.random.uniform(2.74, 4.01, 1000),
    'sulphates': np.random.uniform(0.33, 2, 1000),
    'alcohol': np.random.uniform(8.4, 14.9, 1000),
    'quality': np.random.randint(3, 9, 1000)
}

df = pd.DataFrame(data)
df.head()


Unnamed: 0,fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,pH,sulphates,alcohol,quality
0,8.119941,0.29251,0.261706,10.385112,0.354626,28.948122,189.456718,0.990543,3.65474,1.855675,10.828665,5
1,14.457857,0.856203,0.246979,12.133208,0.494454,34.613932,54.78534,0.992615,3.612849,1.207352,10.563929,3
2,12.051933,1.379254,0.906255,4.431597,0.467336,61.672865,252.887661,1.001637,2.861608,1.5406,9.545,6
3,10.585243,1.156915,0.249546,9.710725,0.104186,25.140311,179.511896,1.000735,3.911667,1.058201,12.347233,3
4,5.716205,1.274367,0.27195,8.961618,0.1014,62.745128,50.488699,0.994909,3.46196,1.382158,11.498057,4


In [None]:
print(df.isnull().sum())
X = df.drop('quality', axis=1)
y = df['quality']
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


fixed_acidity           0
volatile_acidity        0
citric_acid             0
residual_sugar          0
chlorides               0
free_sulfur_dioxide     0
total_sulfur_dioxide    0
density                 0
pH                      0
sulphates               0
alcohol                 0
quality                 0
dtype: int64


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [None]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


In [None]:
y_pred = model.predict(X_test)
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("\nAccuracy Score:")
print(accuracy_score(y_test, y_pred))

Confusion Matrix:
[[ 5  9  5  9  8  9]
 [ 2  7  5  5  3  8]
 [ 1  5  7  5  3  2]
 [ 2  4  5  3  6  4]
 [ 5  2  6 13  7  6]
 [ 6  9  4  7  9  4]]

Classification Report:
              precision    recall  f1-score   support

           3       0.24      0.11      0.15        45
           4       0.19      0.23      0.21        30
           5       0.22      0.30      0.25        23
           6       0.07      0.12      0.09        24
           7       0.19      0.18      0.19        39
           8       0.12      0.10      0.11        39

    accuracy                           0.17       200
   macro avg       0.17      0.18      0.17       200
weighted avg       0.18      0.17      0.16       200


Accuracy Score:
0.165


In [None]:
sample = np.array([[7.4, 0.7, 0, 1.9, 0.076, 11, 34, 0.9978, 3.51, 0.56, 9.4]])
sample_scaled = scaler.transform(sample)
predicted_quality = model.predict(sample_scaled)
print(f"Predicted wine quality: {predicted_quality[0]}")

Predicted wine quality: 6


