In [1]:
import pandas as pd
import numpy as np
import matplotlib as plt
import seaborn as sns

In [2]:
data = pd.read_csv('WineQT.csv')
data.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality,Id
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,0
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5,1
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5,2
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6,3
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,4


### Data Splitting

In [None]:
# inut features
X = data.drop('quality',axis=1)
# output / label
Y = data['quality']

In [5]:
X.shape

(1143, 12)

In [6]:
Y.shape

(1143,)

### Break data into train and test set

In [7]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [9]:
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(914, 12) (229, 12) (914,) (229,)


### Feature Scaling

In [11]:
from sklearn.preprocessing import StandardScaler,MaxAbsScaler,MinMaxScaler

scalar = StandardScaler()

X_train = scalar.fit_transform(X_train)

X_test = scalar.transform(X_test) # we have computed U = 0 and sigma=1 

### Data Modeling

##### SVC svm_classifier

In [12]:
from sklearn.svm import SVC
svm_classifier = SVC()

svm_classifier.fit(X_train, y_train)

### Prediction 

In [13]:
svm_classifier.predict(X_test)

array([5, 5, 5, 5, 6, 6, 5, 5, 6, 5, 7, 6, 5, 6, 6, 5, 6, 5, 5, 6, 6, 6,
       5, 7, 5, 5, 6, 6, 5, 6, 6, 6, 7, 6, 5, 5, 5, 6, 7, 5, 7, 5, 6, 5,
       5, 5, 6, 6, 5, 6, 7, 5, 6, 6, 6, 6, 6, 6, 6, 5, 5, 6, 6, 6, 6, 5,
       7, 5, 5, 6, 6, 6, 5, 6, 5, 5, 6, 6, 5, 6, 5, 6, 6, 5, 5, 6, 5, 5,
       6, 5, 5, 6, 5, 5, 5, 5, 5, 6, 6, 5, 6, 6, 5, 6, 5, 5, 6, 6, 6, 7,
       6, 5, 5, 6, 5, 5, 6, 7, 6, 5, 5, 5, 5, 6, 5, 6, 6, 6, 5, 6, 5, 5,
       7, 6, 5, 5, 5, 5, 6, 6, 7, 5, 6, 5, 6, 5, 5, 5, 5, 5, 6, 5, 6, 5,
       6, 5, 5, 6, 6, 6, 6, 5, 6, 6, 6, 5, 6, 6, 6, 5, 6, 6, 6, 5, 5, 5,
       7, 6, 6, 5, 6, 5, 6, 5, 5, 5, 6, 5, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6,
       7, 6, 6, 6, 5, 5, 6, 5, 5, 5, 6, 6, 5, 6, 5, 6, 5, 5, 6, 5, 6, 6,
       6, 6, 5, 6, 5, 6, 5, 6, 6], dtype=int64)

### Score of Classification 

In [None]:
svm_classifier.score(X_test,y_test)

0.6550218340611353

### HyperParameter Tuning 

#### Grid Search CV

In [15]:

param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf', 'poly'],
    'gamma': ['scale', 'auto'],
    'degree': [2, 3, 4]
}

from sklearn.model_selection import GridSearchCV
grid_search = GridSearchCV(svm_classifier, param_grid, cv=5)
grid_search.fit(X_train, y_train)
best_params = grid_search.best_params_
print(best_params)

{'C': 1, 'degree': 2, 'gamma': 'auto', 'kernel': 'rbf'}


#### Randomised Search CV

In [16]:
from sklearn.model_selection import RandomizedSearchCV


random_search = RandomizedSearchCV(svm_classifier, param_grid, cv=5)
random_search.fit(X_train, y_train)
best_params_random = random_search.best_params_
print(best_params_random)

{'kernel': 'rbf', 'gamma': 'scale', 'degree': 4, 'C': 1}


### Train the Model with Optimized Parameters

In [17]:
svm_classifier_optimizedparams = SVC(C=1, gamma='scale', kernel='rbf', degree=2)
svm_classifier_optimizedparams.fit(X_train, y_train)
score = svm_classifier_optimizedparams.score(X_test, y_test)
print(f'Optimized SVM Test Accuracy: {score}')

Optimized SVM Test Accuracy: 0.6550218340611353
