### Imports 

In [53]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns

from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

### Reading the dataset

In [54]:
df = pd.read_csv('https://raw.githubusercontent.com/Explore-AI/Public-Data/master/Data/classification_sprint/winequality.csv')
df.head()

Unnamed: 0,type,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
1,0,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
2,0,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6
3,0,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6
4,0,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6


### Data Preprocessing

In [55]:
df.type.unique()
df.isnull().sum()

type                     0
fixed acidity           10
volatile acidity         8
citric acid              3
residual sugar           2
chlorides                2
free sulfur dioxide      0
total sulfur dioxide     0
density                  0
pH                       9
sulphates                4
alcohol                  0
quality                  0
dtype: int64

In [56]:
### START FUNCTION
def data_preprocess(df):
    df.fillna(0, inplace = True)
    df['y'] = df['quality'].apply(lambda x : 0 if x <= 4 else 1)
    features = df.drop(columns=['quality', 'y'])
    labels = np.array(df['y'])
    scaler = preprocessing.StandardScaler()
    scaled_features = scaler.fit_transform(features)
    X_train, X_test, y_train, y_test = train_test_split(scaled_features, 
                                                    labels, 
                                                    test_size= 0.25, 
                                                    random_state=42)
    return (X_train, y_train), (X_test, y_test)
### END FUNCTION

In [57]:
### START FUNCTION
def train_SVC_model(X_train,y_train):
    svc_model = SVC(random_state=40, gamma='auto')
    svc_model.fit(X_train, y_train)
    return svc_model

### END FUNCTION

In [58]:
def custom_scoring_function(y_true, y_pred):
  
    epsilon = 1e-15
    y_pred = np.maximum(epsilon, y_pred)
    y_pred = np.minimum(1 - epsilon, y_pred)
 
    log_loss = -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
    return np.round(log_loss, 7)


In [62]:
model = SVC()
list(model.get_params().keys())

['C',
 'break_ties',
 'cache_size',
 'class_weight',
 'coef0',
 'decision_function_shape',
 'degree',
 'gamma',
 'kernel',
 'max_iter',
 'probability',
 'random_state',
 'shrinking',
 'tol',
 'verbose']

In [None]:
parameter_grid = {
    'C': [0.1, 1, 10],
    'gamma': [0.01, 0.1, 1]
}