In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import metrics


In [6]:
wine = pd.read_csv('winequality-red.csv', sep=';')

In [5]:
bins = (1,5,10)
group_names = [1,2]
wine['quality'] = pd.cut(wine['quality'], bins = bins, labels = group_names)
wine.shape

(1599, 12)

In [8]:
X = np.array(wine.iloc[:, :11])

y = np.array(wine.iloc[:, 11])

In [58]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier

In [74]:
#Train-Test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)


#Make a Pipelines
pipeline_svm= Pipeline([('sc0', StandardScaler()),
                        ('pca0',PCA(n_components=9)),
                      ('svm_clf', SVC())])

pipeline_lr= Pipeline([('sc1', StandardScaler()),
                       ('pca1',PCA(n_components=9)),
                      ('lr_clf', LogisticRegression(random_state=0))])

pipeline_knn=Pipeline([('scalar2',StandardScaler()),
                     ('pca2',PCA(n_components=9)),
                     ('knn_classifier',KNeighborsClassifier())])

pipeline_random_forest=Pipeline([('scalar3',StandardScaler()),
                     ('pca3',PCA(n_components=9)),
                     ('knn_classifier',RandomForestClassifier())])


pipelines= [pipeline_svm, pipeline_lr, pipeline_knn,pipeline_random_forest]

pip_dic= {0 : 'Support vector',
         1: 'Logistic regression',
         2: 'Knn classifier',
         3: 'Random forest'}

for pipes in pipelines:
    pipes.fit(X_train, y_train)
    
    
for i, model in enumerate(pipelines):
    print('{} Accuracy: {}'.format(pip_dic[i], model.score(X_test,y_test)))

Support vector Accuracy: 0.6375
Logistic regression Accuracy: 0.6425
Knn classifier Accuracy: 0.5575
Random forest Accuracy: 0.7175


In [86]:
#HyperParameter Tuning

from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.pipeline import make_pipeline


model= make_pipeline(StandardScaler(),PCA(n_components=9),SVC())


param = [{
    "svc": [SVC()],
"svc__C":[0.1, 0.5, 0.8,0.9,1,1.1,1.2,1.3,1.4],                    
 "svc__kernel":['rbf'],                 
 "svc__gamma":[0.01, 0.05, 0.1,0.8,0.9,1,1.1,1.2,1.3,1.4],
}]

grid_svc = GridSearchCV(model, param_grid = param, scoring = 'accuracy', cv = 5)
best_model = grid_svc.fit(X_train,y_train)
best_model.score(X_test,y_test)

0.655

In [87]:
grid_svc.best_params_

{'svc': SVC(C=1, gamma=1.1),
 'svc__C': 1,
 'svc__gamma': 1.1,
 'svc__kernel': 'rbf'}