In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
# Import dataset
dataset_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv"

First row already contains column names, ``header=0`` loads it from the first row

In [3]:
dataset = pd.read_csv(dataset_url,  sep=';', header=0)  ### first row already contains column names

In [4]:
dataset.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6


In [5]:
# Dimensions of dataset
print(dataset.shape)

(4898, 12)


In [6]:
# 10 random rows of the dataset
print(dataset.sample(10))

      fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
905             8.4              0.19         0.42             1.6      0.047   
2985            6.7              0.18         0.28            10.2      0.039   
2523            8.2              0.20         0.38             3.5      0.053   
4149            5.8              0.24         0.28             1.4      0.038   
4255            6.0              0.33         0.26             5.1      0.051   
4210            6.0              0.17         0.21             6.0      0.050   
2710            7.8              0.25         0.34            13.7      0.044   
2462            8.3              0.25         0.33             2.5      0.053   
2474            6.1              0.27         0.31             1.5      0.035   
4556            6.1              0.20         0.40             1.9      0.028   

      free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
905                   9.0      

In [7]:
# Description of dataset
print(dataset.describe())

       fixed acidity  volatile acidity  citric acid  residual sugar  \
count    4898.000000       4898.000000  4898.000000     4898.000000   
mean        6.854788          0.278241     0.334192        6.391415   
std         0.843868          0.100795     0.121020        5.072058   
min         3.800000          0.080000     0.000000        0.600000   
25%         6.300000          0.210000     0.270000        1.700000   
50%         6.800000          0.260000     0.320000        5.200000   
75%         7.300000          0.320000     0.390000        9.900000   
max        14.200000          1.100000     1.660000       65.800000   

         chlorides  free sulfur dioxide  total sulfur dioxide      density  \
count  4898.000000          4898.000000           4898.000000  4898.000000   
mean      0.045772            35.308085            138.360657     0.994027   
std       0.021848            17.007137             42.498065     0.002991   
min       0.009000             2.000000         

Splitting dataset for the ml model

In [8]:
X = dataset.iloc[:,0:11] 
y = dataset['quality']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [9]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

import itertools as it

Trying with different parameters

In [10]:
parameters = {'C' : [0.5, 1.0, 2.0],
              'kernel' : ['rbf', 'sigmoid'],
             }

combinations=list(it.product(*(parameters[key] for key in parameters)))
print(combinations)

[(0.5, 'rbf'), (0.5, 'sigmoid'), (1.0, 'rbf'), (1.0, 'sigmoid'), (2.0, 'rbf'), (2.0, 'sigmoid')]


In [11]:
for params in combinations:
    print('Trying:',params)
    svc = SVC(C=params[0], kernel=params[1])
    svc.fit(X_train, y_train)
    y_pred = svc.predict(X_test)
    print("Accuracy:",accuracy_score(y_test, y_pred),"\n")

Trying: (0.5, 'rbf')
Accuracy: 0.5102040816326531 

Trying: (0.5, 'sigmoid')
Accuracy: 0.4520717377860235 

Trying: (1.0, 'rbf')
Accuracy: 0.5528756957328386 

Trying: (1.0, 'sigmoid')
Accuracy: 0.4520717377860235 

Trying: (2.0, 'rbf')
Accuracy: 0.562152133580705 

Trying: (2.0, 'sigmoid')
Accuracy: 0.4520717377860235 

