In [None]:
import pandas as pd   #loading and handling dataset
import numpy as np    #for numerical operations
import tensorflow as tf   #building and training neural networks
from sklearn.model_selection import train_test_split  #To split data into training and testing sets
from sklearn.preprocessing import StandardScaler      #To normalize/scale features
from sklearn.metrics import classification_report, confusion_matrix    #For evaluation metrics
from sklearn.preprocessing import LabelEncoder     #To convert labels into numbers

In [None]:
df = pd.read_csv("wine.csv")  # Load the CSV file
print(df['quality'].value_counts())      # Check how many examples are present for each quality score


quality
5    681
6    638
7    199
4     53
8     18
3     10
Name: count, dtype: int64


In [None]:
X = df.drop("quality", axis=1)  # X contains all columns except the target column "quality"
y = df["quality"]               # y contains only the "quality" column, which is our label

# Encode labels from [3–8] to [0–5] for softmax
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# One-hot encode target
y_onehot = tf.keras.utils.to_categorical(y_encoded)

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_onehot, test_size=0.2, random_state=42)


In [None]:
model = tf.keras.models.Sequential([                     # Create a sequential model (one layer after another)
    tf.keras.layers.Dense(128, activation='relu', input_shape=(X_train.shape[1],)),  # Input layer with 128 neurons
    tf.keras.layers.Dropout(0.3),                        # Dropout layer to avoid overfitting (randomly drops 30% neurons)
    tf.keras.layers.Dense(64, activation='relu'),        # Hidden layer with 64 neurons and ReLU activation
    tf.keras.layers.Dropout(0.3),                        # Another dropout layer
    tf.keras.layers.Dense(6, activation='softmax')       # Output layer with 6 neurons (for 6 classes) and softmax
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
model.compile(
    optimizer='adam',                     # Adam optimizer automatically adjusts learning rate
    loss='categorical_crossentropy',      # Loss function for multiclass classification
    metrics=['accuracy']                  # Track accuracy while training
)

model.summary()                           # Print the summary of the model architecture


In [None]:
early_stop = tf.keras.callbacks.EarlyStopping(
    patience=10,                          # If validation accuracy doesn't improve for 10 epochs, stop training
    restore_best_weights=True            # Restore weights from the best-performing epoch
)

history = model.fit(
    X_train, y_train,                    # Training data
    epochs=100,                          # Train for 100 epochs (max)
    batch_size=32,                       # Use 32 samples per batch
    validation_data=(X_test, y_test),    # Evaluate model on test data after each epoch
    callbacks=[early_stop]               # Use early stopping to avoid overfitting
)


Epoch 1/100
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - accuracy: 0.2970 - loss: 1.6912 - val_accuracy: 0.5219 - val_loss: 1.1929
Epoch 2/100
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5485 - loss: 1.1520 - val_accuracy: 0.5688 - val_loss: 1.0525
Epoch 3/100
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5821 - loss: 1.0579 - val_accuracy: 0.5500 - val_loss: 1.0099
Epoch 4/100
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5971 - loss: 1.0147 - val_accuracy: 0.5312 - val_loss: 0.9893
Epoch 5/100
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6062 - loss: 0.9938 - val_accuracy: 0.5437 - val_loss: 0.9813
Epoch 6/100
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5910 - loss: 0.9823 - val_accuracy: 0.5562 - val_loss: 0.9642
Epoch 7/100
[1m40/40[0m [32m━━━

In [None]:
loss, acc = model.evaluate(X_test, y_test)   # Evaluate the model on test data
print("Test Accuracy:", acc)                 # Print the test accuracy


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6265 - loss: 0.8729 
Test Accuracy: 0.6156250238418579


In [None]:
y_pred = model.predict(X_test)               # Predict probabilities for each class
y_pred_classes = np.argmax(y_pred, axis=1)   # Pick the class with the highest probability
y_true_classes = np.argmax(y_test, axis=1)   # Convert one-hot labels back to class numbers

print(confusion_matrix(y_true_classes, y_pred_classes))  # Show confusion matrix
print(classification_report(y_true_classes, y_pred_classes))  # Print precision, recall, f1-score


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[[ 0  0  1  0  0  0]
 [ 0  0  9  1  0  0]
 [ 0  0 93 36  1  0]
 [ 0  0 34 87 11  0]
 [ 0  0  0 25 17  0]
 [ 0  0  0  1  4  0]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.00      0.00      0.00        10
           2       0.68      0.72      0.70       130
           3       0.58      0.66      0.62       132
           4       0.52      0.40      0.45        42
           5       0.00      0.00      0.00         5

    accuracy                           0.62       320
   macro avg       0.30      0.30      0.29       320
weighted avg       0.58      0.62      0.60       320



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
