In [14]:
import pandas as pd 
import numpy as np 

In [15]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score,f1_score
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical

In [16]:
df=pd.read_csv('cleaned_dataset.csv')
df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,0.72499,0.070692,1.248441,1.516302,0.9978,3.51,0.367892,2.341806,5
1,7.8,0.88,0.0,0.824585,0.089375,1.448822,1.652403,0.9968,3.2,0.417916,2.379546,5
2,7.8,0.76,0.04,0.785691,0.084351,1.327761,1.610903,0.997,3.26,0.405982,2.379546,5
3,11.2,0.28,0.56,0.72499,0.069825,1.358505,1.63137,0.998,3.16,0.376671,2.379546,6
4,7.4,0.66,0.0,0.707848,0.069825,1.291725,1.550446,0.9978,3.51,0.367892,2.341806,5


In [17]:
df.shape

(1359, 12)

In [18]:
df['quality'].value_counts()

quality
5    577
6    535
7    167
4     53
8     17
3     10
Name: count, dtype: int64

In [19]:
def quality_category(q):
    if q <= 5:
        return "Low"
    elif q == 6:
        return "Medium"
    else:
        return "High"

In [20]:
df["quality_cat"] = df["quality"].apply(quality_category)

X = df.drop(["quality", "quality_cat"], axis=1).values
y = df["quality_cat"].values

In [21]:
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)   
y_categorical = to_categorical(y_encoded)

X_train, X_test, y_train, y_test = train_test_split(
    X, y_categorical, test_size=0.2, random_state=42, stratify=y_categorical
)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [22]:
classes = np.unique(y_encoded)
class_weights = compute_class_weight(
    class_weight="balanced",
    classes=classes,
    y=y_encoded
)
class_weight_dict = dict(zip(classes, class_weights))
print("Class weights:", class_weight_dict)

Class weights: {np.int64(0): np.float64(2.4619565217391304), np.int64(1): np.float64(0.7078125), np.int64(2): np.float64(0.8467289719626169)}


In [23]:
model = Sequential([
    Dense(64, input_dim=X_train.shape[1], activation='relu'),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dropout(0.3),
    Dense(3, activation='softmax')  
])

model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)
history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=50,
    batch_size=32,
    class_weight=class_weight_dict,
    verbose=1
)


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step - accuracy: 0.3843 - loss: 1.1062 - val_accuracy: 0.5046 - val_loss: 1.0121
Epoch 2/50
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4914 - loss: 0.9803 - val_accuracy: 0.5000 - val_loss: 0.9579
Epoch 3/50
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5443 - loss: 0.9235 - val_accuracy: 0.5046 - val_loss: 0.9353
Epoch 4/50
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5547 - loss: 0.8753 - val_accuracy: 0.5413 - val_loss: 0.9106
Epoch 5/50
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5673 - loss: 0.8541 - val_accuracy: 0.5229 - val_loss: 0.8992
Epoch 6/50
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5616 - loss: 0.8634 - val_accuracy: 0.5046 - val_loss: 0.9128
Epoch 7/50
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━

In [24]:
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(y_test, axis=1)
f1_weighted = f1_score(y_true_classes, y_pred_classes, average='weighted')
print("Accuracy:", accuracy_score(y_true_classes, y_pred_classes))
print("F1 Score (Weighted):", f1_weighted)
print("\nClassification Report:\n", classification_report(y_true_classes, y_pred_classes, target_names=encoder.classes_))
print("\nConfusion Matrix:\n", confusion_matrix(y_true_classes, y_pred_classes))

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
Accuracy: 0.6213235294117647
F1 Score (Weighted): 0.6046153232913801

Classification Report:
               precision    recall  f1-score   support

        High       0.41      0.84      0.55        37
         Low       0.73      0.80      0.77       128
      Medium       0.62      0.33      0.43       107

    accuracy                           0.62       272
   macro avg       0.59      0.66      0.58       272
weighted avg       0.65      0.62      0.60       272


Confusion Matrix:
 [[ 31   3   3]
 [  7 103  18]
 [ 37  35  35]]


In [25]:
model.save("model.h5")
print("Model saved as 'model.h5'")



Model saved as 'model.h5'
