In [1]:
import pandas as pd 
import numpy as np 

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score,f1_score
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical

In [3]:
df=pd.read_csv('cleaned_dataset.csv')
df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.064711,0.07325,2.484907,3.555348,0.9978,3.51,0.444686,2.341806,5
1,7.8,0.88,0.0,1.280934,0.09349,3.258097,4.219508,0.9968,3.2,0.518794,2.379546,5
2,7.8,0.76,0.04,1.193922,0.088011,2.772589,4.007333,0.997,3.26,0.500775,2.379546,5
3,11.2,0.28,0.56,1.064711,0.072321,2.890372,4.110874,0.998,3.16,0.457425,2.379546,6
4,7.4,0.66,0.0,1.029619,0.072321,2.639057,3.713572,0.9978,3.51,0.444686,2.341806,5


In [4]:
df.shape

(1359, 12)

In [5]:
df['quality'].value_counts()

quality
5    577
6    535
7    167
4     53
8     17
3     10
Name: count, dtype: int64

In [15]:
mean=np.mean(df['quality'])
print(round(mean,0))

6.0


In [7]:
def quality_category(q):
    if q <= 5:
        return "Low"
    elif q == 6:
        return "Medium"
    else:
        return "High"

In [8]:
df["quality_cat"] = df["quality"].apply(quality_category)

X = df.drop(["quality", "quality_cat"], axis=1).values
y = df["quality_cat"].values

In [9]:
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y) 


y_categorical = to_categorical(y_encoded)

X_train, X_test, y_train, y_test = train_test_split(
    X, y_categorical, test_size=0.2, random_state=42, stratify=y_categorical
)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [10]:
classes = np.unique(y_encoded)
class_weights = compute_class_weight(
    class_weight="balanced",
    classes=classes,
    y=y_encoded
)
class_weight_dict = dict(zip(classes, class_weights))
print("Class weights:", class_weight_dict)

Class weights: {np.int64(0): np.float64(2.4619565217391304), np.int64(1): np.float64(0.7078125), np.int64(2): np.float64(0.8467289719626169)}


In [11]:
model = Sequential([
    Dense(64, input_dim=X_train.shape[1], activation='relu'),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dropout(0.3),
    Dense(3, activation='softmax')  
])

model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)
history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=50,
    batch_size=32,
    class_weight=class_weight_dict,
    verbose=1
)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 15ms/step - accuracy: 0.4453 - loss: 1.0754 - val_accuracy: 0.5367 - val_loss: 0.9975
Epoch 2/50
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.4833 - loss: 1.0159 - val_accuracy: 0.4908 - val_loss: 0.9648
Epoch 3/50
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5362 - loss: 0.9417 - val_accuracy: 0.4862 - val_loss: 0.9453
Epoch 4/50
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5558 - loss: 0.8785 - val_accuracy: 0.4817 - val_loss: 0.9213
Epoch 5/50
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5593 - loss: 0.8582 - val_accuracy: 0.4954 - val_loss: 0.9182
Epoch 6/50
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5938 - loss: 0.8408 - val_accuracy: 0.4862 - val_loss: 0.9161
Epoch 7/50
[1m28/28[0m [32m━━━━━━━━━

In [12]:
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(y_test, axis=1)
f1_weighted = f1_score(y_true_classes, y_pred_classes, average='weighted')
print("Accuracy:", accuracy_score(y_true_classes, y_pred_classes))
print("F1 Score (Weighted):", f1_weighted)
print("\nClassification Report:\n", classification_report(y_true_classes, y_pred_classes, target_names=encoder.classes_))
print("\nConfusion Matrix:\n", confusion_matrix(y_true_classes, y_pred_classes))

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
Accuracy: 0.625
F1 Score (Weighted): 0.6080913402244232

Classification Report:
               precision    recall  f1-score   support

        High       0.43      0.81      0.56        37
         Low       0.73      0.81      0.77       128
      Medium       0.61      0.34      0.43       107

    accuracy                           0.62       272
   macro avg       0.59      0.65      0.59       272
weighted avg       0.64      0.62      0.61       272


Confusion Matrix:
 [[ 30   3   4]
 [  5 104  19]
 [ 35  36  36]]


In [13]:
model.save("model.h5")
print("Model saved as 'model.h5'")



Model saved as 'model.h5'
