In [1]:

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.metrics import (
    accuracy_score,
    precision_recall_fscore_support,
    classification_report,
    confusion_matrix
)

import matplotlib.pyplot as plt

RANDOM_STATE = 42
TEST_SIZE = 0.2

In [3]:

cols = [
    "fixed acidity",
    "volatile acidity",
    "citric acid",
    "residual sugar",
    "chlorides",
    "free sulfur dioxide",
    "total sulfur dioxide",
    "density",
    "pH",
    "sulphates",
    "alcohol",
    "quality",
]

df = pd.read_csv("~/Downloads/winequality-red.csv", sep=";", skiprows=1, header=None, names=cols)

print("First few rows:")
display(df.head())

print("\nColumns:", df.columns.tolist())
print("\nQuality value counts:")
print(df["quality"].value_counts().sort_index())


def make_labels(y_quality: np.ndarray, mode: str = "3class"):
    """
    Map original quality scores to binary or 3-class labels.
    This matches what your teammate wrote.
    """
    yq = pd.Series(y_quality).astype(int).to_numpy()

    if mode == "binary":
        y = (yq >= 7).astype(int)
        names = ["bad(<=6)", "good(>=7)"]
    elif mode == "3class":
        # 0 = low (<=5), 1 = mid (=6), 2 = high (>=7)
        y = np.where(yq <= 5, 0, np.where(yq == 6, 1, 2))
        names = ["low(<=5)", "mid(=6)", "high(>=7)"]
    else:
        raise ValueError("mode must be 'binary' or '3class'")
    return y, names


y, class_names = make_labels(df["quality"].to_numpy(), mode="3class")
X = df.drop(columns=["quality"])

print("\nClass distribution (0=low, 1=mid, 2=high):")
print(pd.Series(y).value_counts().sort_index())
print("Class names:", class_names)

First few rows:


Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5



Columns: ['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates', 'alcohol', 'quality']

Quality value counts:
quality
3     10
4     53
5    681
6    638
7    199
8     18
Name: count, dtype: int64

Class distribution (0=low, 1=mid, 2=high):
0    744
1    638
2    217
Name: count, dtype: int64
Class names: ['low(<=5)', 'mid(=6)', 'high(>=7)']


In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=TEST_SIZE,
    random_state=RANDOM_STATE,
    stratify=y
)

print("Train shape:", X_train.shape, " Test shape:", X_test.shape)


Train shape: (1279, 11)  Test shape: (320, 11)


In [5]:
from sklearn.metrics import (
    accuracy_score,
    precision_recall_fscore_support,
    classification_report,
    confusion_matrix
)

def evaluate_model(name, model, X_test, y_test, class_names=None):
    """
    Evaluate a fitted model on test data and print metrics.
    For 3-class, we report macro-averaged precision/recall/F1.
    """
    y_pred = model.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    precision, recall, f1, _ = precision_recall_fscore_support(
        y_test, y_pred, average="macro", zero_division=0
    )
    cm = confusion_matrix(y_test, y_pred)

    print(f"\n===== {name} =====")
    print("Accuracy        :", acc)
    print("Macro precision :", precision)
    print("Macro recall    :", recall)
    print("Macro F1-score  :", f1)
    print("Confusion matrix:\n", cm)

    print("\nClassification report:\n",
          classification_report(
              y_test, y_pred,
              target_names=class_names if class_names is not None else None,
              zero_division=0
          ))

    return {
        "model": name,
        "accuracy": acc,
        "precision_macro": precision,
        "recall_macro": recall,
        "f1_macro": f1
    }

In [6]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC

svm_basic = Pipeline([
    ("scaler", StandardScaler()),
    ("svc", SVC(kernel="rbf", random_state=RANDOM_STATE))
])

svm_basic.fit(X_train, y_train)

scores_basic = evaluate_model(
    "SVM_RBF_basic", svm_basic, X_test, y_test, class_names
)


===== SVM_RBF_basic =====
Accuracy        : 0.690625
Macro precision : 0.7198900027847396
Macro recall    : 0.6175212007699912
Macro F1-score  : 0.6437499999999999
Confusion matrix:
 [[124  24   1]
 [ 45  80   3]
 [  2  24  17]]

Classification report:
               precision    recall  f1-score   support

    low(<=5)       0.73      0.83      0.78       149
     mid(=6)       0.62      0.62      0.62       128
   high(>=7)       0.81      0.40      0.53        43

    accuracy                           0.69       320
   macro avg       0.72      0.62      0.64       320
weighted avg       0.70      0.69      0.68       320



In [7]:
from sklearn.model_selection import GridSearchCV

svm_pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("svc", SVC(kernel="rbf", random_state=RANDOM_STATE))
])

param_grid = {
    "svc__C": [0.1, 1, 10, 100],
    "svc__gamma": ["scale", "auto", 0.01, 0.1, 1.0]
}

grid_search = GridSearchCV(
    estimator=svm_pipeline,
    param_grid=param_grid,
    scoring="f1_macro",   # multi-class scoring
    cv=5,
    n_jobs=-1,
    verbose=1
)

grid_search.fit(X_train, y_train)

print("Best params:", grid_search.best_params_)
print("Best CV macro F1:", grid_search.best_score_)

best_svm = grid_search.best_estimator_

scores_tuned = evaluate_model(
    "SVM_RBF_tuned", best_svm, X_test, y_test, class_names
)

Fitting 5 folds for each of 20 candidates, totalling 100 fits
Best params: {'svc__C': 100, 'svc__gamma': 0.1}
Best CV macro F1: 0.6417762084956665

===== SVM_RBF_tuned =====
Accuracy        : 0.684375
Macro precision : 0.6925911298886951
Macro recall    : 0.6810690455751521
Macro F1-score  : 0.6838211389812106
Confusion matrix:
 [[119  27   3]
 [ 50  70   8]
 [  3  10  30]]

Classification report:
               precision    recall  f1-score   support

    low(<=5)       0.69      0.80      0.74       149
     mid(=6)       0.65      0.55      0.60       128
   high(>=7)       0.73      0.70      0.71        43

    accuracy                           0.68       320
   macro avg       0.69      0.68      0.68       320
weighted avg       0.68      0.68      0.68       320



In [8]:
svm_linear = Pipeline([
    ("scaler", StandardScaler()),
    ("svc", SVC(kernel="linear", random_state=RANDOM_STATE))
])

svm_linear.fit(X_train, y_train)

scores_linear = evaluate_model(
    "SVM_linear_basic", svm_linear, X_test, y_test, class_names
)


===== SVM_linear_basic =====
Accuracy        : 0.61875
Macro precision : 0.4059186580155088
Macro recall    : 0.4701132550335571
Macro F1-score  : 0.43551931136028993
Confusion matrix:
 [[124  25   0]
 [ 54  74   0]
 [  0  43   0]]

Classification report:
               precision    recall  f1-score   support

    low(<=5)       0.70      0.83      0.76       149
     mid(=6)       0.52      0.58      0.55       128
   high(>=7)       0.00      0.00      0.00        43

    accuracy                           0.62       320
   macro avg       0.41      0.47      0.44       320
weighted avg       0.53      0.62      0.57       320



In [10]:
svm_balanced = Pipeline([
    ("scaler", StandardScaler()),
    ("svc", SVC(kernel="rbf", class_weight="balanced", random_state=RANDOM_STATE))
])

svm_balanced.fit(X_train, y_train)

scores_balanced = evaluate_model(
    "SVM_RBF_balanced", svm_balanced, X_test, y_test, class_names
)


===== SVM_RBF_balanced =====
Accuracy        : 0.65
Macro precision : 0.6178876574784503
Macro recall    : 0.6899257322719942
Macro F1-score  : 0.6325996523774052
Confusion matrix:
 [[115  30   4]
 [ 38  56  34]
 [  0   6  37]]

Classification report:
               precision    recall  f1-score   support

    low(<=5)       0.75      0.77      0.76       149
     mid(=6)       0.61      0.44      0.51       128
   high(>=7)       0.49      0.86      0.63        43

    accuracy                           0.65       320
   macro avg       0.62      0.69      0.63       320
weighted avg       0.66      0.65      0.64       320



In [11]:
results_list = [
    scores_basic,
    scores_tuned,
    scores_linear,
    scores_balanced
]

results_df = pd.DataFrame(results_list)
display(results_df)

Unnamed: 0,model,accuracy,precision_macro,recall_macro,f1_macro
0,SVM_RBF_basic,0.690625,0.71989,0.617521,0.64375
1,SVM_RBF_tuned,0.684375,0.692591,0.681069,0.683821
2,SVM_linear_basic,0.61875,0.405919,0.470113,0.435519
3,SVM_RBF_balanced,0.65,0.617888,0.689926,0.6326
