In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


In [12]:
df = pd.read_csv("wine.data.csv")

X = df.drop("Class", axis=1)
y = df["Class"]

In [13]:
null_values = df.isnull().sum()
print(null_values)

Class                           0
Alcohol                         0
Malic acid                      0
Ash                             0
Alcalinity of ash               0
Magnesium                       0
Total phenols                   0
Flavanoids                      0
Nonflavanoid phenols            0
Proanthocyanins                 0
Color intensity                 0
Hue                             0
OD280/OD315 of diluted wines    0
Proline                         0
dtype: int64


In [14]:
from sklearn.preprocessing import LabelEncoder

In [15]:
unique_classes = y.unique()
print(f"Unique classes in the target variable: {unique_classes}")
print(f"Number of unique classes: {len(unique_classes)}")

Unique classes in the target variable: [1 2 3]
Number of unique classes: 3


In [16]:
encoder = LabelEncoder()


y_encoded = encoder.fit_transform(y)


print(f"Encoded target values: {y_encoded[:10]}")  

Encoded target values: [0 0 0 0 0 0 0 0 0 0]


In [10]:

from sklearn.utils import shuffle

X_shuffled, y_shuffled = shuffle(X, y_encoded, random_state=42)

print(X_shuffled.head())
print(y_shuffled[:10]) 


     Alcohol  Malic acid   Ash  Alcalinity of ash  Magnesium  Total phenols  \
19     13.64        3.10  2.56               15.2        116           2.70   
45     14.21        4.04  2.44               18.9        111           2.85   
140    12.93        2.81  2.70               21.0         96           1.54   
30     13.73        1.50  2.70               22.5        101           3.00   
67     12.37        1.17  1.92               19.6         78           2.11   

     Flavanoids  Nonflavanoid phenols  Proanthocyanins  Color intensity   Hue  \
19         3.03                  0.17             1.66             5.10  0.96   
45         2.65                  0.30             1.25             5.24  0.87   
140        0.50                  0.53             0.75             4.60  0.77   
30         3.25                  0.29             2.38             5.70  1.19   
67         2.00                  0.27             1.04             4.68  1.12   

     OD280/OD315 of diluted wines  Pro

In [20]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


selected_features = ['Alcohol', 'Malic acid', 'Ash', 'Magnesium', 'Total phenols', 'Flavanoids']
X = df[selected_features]  

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train) 
X_test_scaled = scaler.transform(X_test)       


print(f"Training data shape: {X_train_scaled.shape}")
print(f"Test data shape: {X_test_scaled.shape}")


Training data shape: (142, 6)
Test data shape: (36, 6)


In [22]:

from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


knn = KNeighborsClassifier(n_neighbors=5) 

knn.fit(X_train_scaled, y_train)


y_pred = knn.predict(X_test_scaled)


accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))


Accuracy: 0.9444

Classification Report:
              precision    recall  f1-score   support

           1       1.00      0.93      0.96        14
           2       0.93      0.93      0.93        14
           3       0.89      1.00      0.94         8

    accuracy                           0.94        36
   macro avg       0.94      0.95      0.94        36
weighted avg       0.95      0.94      0.94        36


Confusion Matrix:
[[13  1  0]
 [ 0 13  1]
 [ 0  0  8]]


In [23]:
import joblib

In [24]:

# Save the trained model and scaler
joblib.dump(knn, 'knn_wine_quality_model.joblib')
joblib.dump(scaler, 'scaler.joblib')

print("Model and scaler have been saved successfully!")

Model and scaler have been saved successfully!
