# SVM

In [474]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler, StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import make_scorer, confusion_matrix
from sklearn import metrics, svm
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV


### Prepering data

In [401]:
df = pd.read_csv("WA_Fn-UseC_-Telco-Customer-Churn.csv")

In [402]:
df["TotalCharges"] = pd.to_numeric(df["TotalCharges"],errors="coerce")

In [403]:
df.dropna(inplace=True)

In [404]:
df_d = df.drop("customerID", axis = 1)

In [405]:
df_d['Churn'].replace(to_replace='Yes', value=1, inplace=True)
df_d['Churn'].replace(to_replace='No',  value=0, inplace=True)

#### Skalowanie

In [406]:
df_d.columns

Index(['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure',
       'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity',
       'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV',
       'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod',
       'MonthlyCharges', 'TotalCharges', 'Churn'],
      dtype='object')

In [407]:
def apply_scalers(df, columns_to_exclude=None):
    if columns_to_exclude:
        exclude_filter = ~df.columns.isin(columns_to_exclude) 
    else:
        exclude_filter = ~df.columns.isin([]) 
    for column in df.iloc[:, exclude_filter].columns:
        df[column] = df[column].astype(float)
 
    df.loc[:, exclude_filter] = StandardScaler().fit_transform(df.loc[:, exclude_filter])
    return df
 
df_d = apply_scalers(df_d, columns_to_exclude=['gender', 'SeniorCitizen', 'Partner', 'Dependents',
       'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity',
       'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV',
       'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod', 'Churn'])

#### Dumifikacja

In [408]:
dummies_df = pd.get_dummies(df_d)

In [409]:
dummies_df.head()

Unnamed: 0,SeniorCitizen,tenure,MonthlyCharges,TotalCharges,Churn,gender_Female,gender_Male,Partner_No,Partner_Yes,Dependents_No,...,StreamingMovies_Yes,Contract_Month-to-month,Contract_One year,Contract_Two year,PaperlessBilling_No,PaperlessBilling_Yes,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check
0,0,-1.280248,-1.161694,-0.994194,0,1,0,0,1,1,...,0,1,0,0,0,1,0,0,1,0
1,0,0.064303,-0.260878,-0.17374,0,0,1,1,0,1,...,0,0,1,0,1,0,0,0,0,1
2,0,-1.239504,-0.363923,-0.959649,1,0,1,1,0,1,...,0,1,0,0,0,1,0,0,0,1
3,0,0.512486,-0.74785,-0.195248,0,0,1,1,0,1,...,0,0,1,0,1,0,1,0,0,0
4,0,-1.239504,0.196178,-0.940457,1,1,0,1,0,1,...,0,1,0,0,0,1,0,0,1,0


### Train test split

In [410]:
y = dummies_df['Churn'].values
X = dummies_df.drop(columns = ['Churn'])

In [411]:
#Splity X,y into train, val, test
X_train, X_val, y_train, y_val = train_test_split(X,y,random_state=44,train_size=0.6,stratify=y)
X_val, X_test, y_val, y_test = train_test_split(X_val, y_val, random_state=44,train_size=0.5,stratify=y_val)

### Make scorer "slytherin score"

In [412]:
def function(y_true, y_pred, discount = 0.2):
    
    matrix = confusion_matrix(y_true, y_pred)
    TP = matrix[0][0]
    FP = matrix[0][1]
    FN = matrix[1][0]
    TN = matrix[1][1]
    
    return 1 - (FN + discount*FP)/(TN + discount*TP)

In [413]:
slytherin_score = make_scorer(function)

### Clasification witg Grid Search CV for SVM model

In [414]:
parameters = {"kernel":("linear", "poly", "rbf", "sigmoid"),
                "C":(0.1,0.2,0.3,0.4,0.5,1,2,3,4,5),
                "degree":(1,2,3),
                "gamma": ["scale", "auto"],
                "class_weight": [None, "balanced"]
                }

In [415]:
svm_clsf = svm.SVC(random_state=44)
grid_clsf = GridSearchCV(estimator = svm_clsf, 
                            param_grid=parameters ,
                            cv=3,
                            n_jobs=-1, 
                            scoring=slytherin_score
                        )

In [416]:
grid_clsf.fit(X_train, y_train)

GridSearchCV(cv=3, estimator=SVC(random_state=44), n_jobs=-1,
             param_grid={'C': (0.1, 0.2, 0.3, 0.4, 0.5, 1, 2, 3, 4, 5),
                         'class_weight': [None, 'balanced'],
                         'degree': (1, 2, 3), 'gamma': ['scale', 'auto'],
                         'kernel': ('linear', 'poly', 'rbf', 'sigmoid')},
             scoring=make_scorer(function))

In [417]:
grid_clsf.cv_results_

{'mean_fit_time': array([0.47117496, 0.42436059, 0.4870561 , 0.5153687 , 0.40549064,
        0.39982327, 0.48970127, 0.4773674 , 0.40403048, 0.4036936 ,
        0.48503502, 0.52827263, 0.40002807, 0.41702906, 0.49503462,
        0.50136773, 0.42536394, 0.43369754, 0.50870403, 0.54603895,
        0.42836316, 0.44169935, 0.50303753, 0.49169914, 0.57203992,
        0.58237576, 0.70476373, 0.68517415, 0.58237743, 0.6133763 ,
        0.71905963, 0.73387472, 0.56301395, 0.59037685, 0.69404793,
        0.69004933, 0.57093517, 0.74106805, 0.72238652, 0.71960727,
        0.5733757 , 0.5920407 , 0.70371747, 0.70705724, 0.5681053 ,
        0.91341805, 0.70727666, 0.71105258, 0.44803135, 0.4146955 ,
        0.50570242, 0.50629067, 0.45487873, 0.42002877, 0.49472388,
        0.4905924 , 0.44336669, 0.42669566, 0.50670497, 0.499904  ,
        0.46406396, 0.44157902, 0.49203412, 0.50803598, 0.45387793,
        0.4618818 , 0.52637323, 0.50270263, 0.45530311, 0.44830672,
        0.48736842, 0.48503447,

### Clasification with random search for SVM model

In [418]:
svm_clsf = svm.SVC(random_state=44)
random_clsf = RandomizedSearchCV(estimator = svm_clsf, 
                                    param_distributions=parameters, 
                                    n_iter=20, 
                                    cv=3, 
                                    n_jobs=-1, 
                                    scoring=slytherin_score
                                )

In [419]:
random_clsf.fit(X_train, y_train)

RandomizedSearchCV(cv=3, estimator=SVC(random_state=44), n_iter=20, n_jobs=-1,
                   param_distributions={'C': (0.1, 0.2, 0.3, 0.4, 0.5, 1, 2, 3,
                                              4, 5),
                                        'class_weight': [None, 'balanced'],
                                        'degree': (1, 2, 3),
                                        'gamma': ['scale', 'auto'],
                                        'kernel': ('linear', 'poly', 'rbf',
                                                   'sigmoid')},
                   scoring=make_scorer(function))

In [420]:
random_clsf.cv_results_

{'mean_fit_time': array([1.63023591, 0.94624066, 0.6893146 , 1.02299762, 0.70873737,
        0.91439199, 1.00415619, 1.22745037, 1.03156106, 1.08651153,
        1.05439711, 1.07732383, 0.7694846 , 2.01279585, 1.18684371,
        0.63983576, 0.88073095, 0.70903452, 0.70318532, 1.50569105]),
 'std_fit_time': array([0.10011541, 0.21546868, 0.04591573, 0.04440975, 0.02043009,
        0.02869258, 0.11782312, 0.03478049, 0.03803363, 0.06844082,
        0.0667894 , 0.04888905, 0.02851963, 0.13055443, 0.02381146,
        0.09240527, 0.05447295, 0.05059149, 0.07383301, 0.17581622]),
 'mean_score_time': array([0.28935568, 0.79316147, 0.30318515, 0.27369563, 0.30384167,
        0.27903565, 0.40194599, 0.35443489, 0.94356434, 0.32064597,
        0.86952806, 0.33657495, 0.25337036, 0.404615  , 0.26902095,
        0.20601336, 0.68789196, 0.70195436, 0.23634966, 0.23325014]),
 'std_score_time': array([0.00555821, 0.04328947, 0.03512689, 0.00958409, 0.01779075,
        0.01451003, 0.06349454, 0.050459

#### Wyniki Grid i Random Search

In [421]:
print(f" Best params for SVM model by GridSearchCV :        {grid_clsf.best_params_}")
print(f" Best score with those params :                     {grid_clsf.best_score_}")
print(f" Best params for SVM model by RandomizedSearchCV :  {random_clsf.best_params_}")
print(f" Best score with those params :                     {random_clsf.best_score_}")

 Best params for SVM model by GridSearchCV :        {'C': 3, 'class_weight': 'balanced', 'degree': 1, 'gamma': 'scale', 'kernel': 'poly'}
 Best score with those params :                     0.6985151048699438
 Best params for SVM model by RandomizedSearchCV :  {'kernel': 'linear', 'gamma': 'auto', 'degree': 3, 'class_weight': 'balanced', 'C': 2}
 Best score with those params :                     0.6956235461893313


### SVM model


In [446]:
print(f" Best params for SVM model by GridSearchCV :        {grid_clsf.best_params_}")
print(f" Best params for SVM model by RandomizedSearchCV :  {random_clsf.best_params_}")

 Best params for SVM model by GridSearchCV :        {'C': 3, 'class_weight': 'balanced', 'degree': 1, 'gamma': 'scale', 'kernel': 'poly'}
 Best params for SVM model by RandomizedSearchCV :  {'kernel': 'linear', 'gamma': 'auto', 'degree': 3, 'class_weight': 'balanced', 'C': 2}


In [467]:
scores = []

# basic model
svm_model = svm.SVC(random_state=44)
svm_model.fit(X_train, y_train)
# params grid
svm_model_grid = svm.SVC(C=3, class_weight="balanced", degree=1,  gamma='scale', kernel='poly', random_state=44 )
svm_model_grid.fit(X_train, y_train)
# params random
svm_model_random = svm.SVC(kernel= 'rbf', gamma= 'auto', degree= 4, class_weight= 'balanced', C= 5, random_state=44)
svm_model_random.fit(X_train, y_train)

#prediction from models
pred_basic = svm_model.predict(X_val)
pred_grid = svm_model_grid.predict(X_val)
pred_random = svm_model_random.predict(X_val)


models = (svm_model, svm_model_grid, svm_model_random)
pred = (pred_basic,pred_grid,pred_random)


precision = metrics.precision_score(y_val, pred_basic)
recall = metrics.recall_score(y_val, pred_basic)
roc_auc = metrics.roc_auc_score(y_val, pred_basic)
f1 = metrics.f1_score(y_val, pred_basic)
SS = slytherin_score(svm_model, X_val, y_val)

scores.append({"model": "basic model",
                "precision": precision,
                "recall": recall,
                "ROC AUC": roc_auc,
                "F1": f1,
                "Slytherin score": SS
                    })

precision = metrics.precision_score(y_val, pred_grid)
recall = metrics.recall_score(y_val, pred_grid)
roc_auc = metrics.roc_auc_score(y_val, pred_grid)
f1 = metrics.f1_score(y_val, pred_grid)
SS = slytherin_score(svm_model_grid, X_val, y_val)

scores.append({"model": "grid search params",
                "precision": precision,
                "recall": recall,
                "ROC AUC": roc_auc,
                "F1": f1,
                "Slytherin score": SS
                    })                    


precision = metrics.precision_score(y_val, pred_random)
recall = metrics.recall_score(y_val, pred_random)
roc_auc = metrics.roc_auc_score(y_val, pred_random)
f1 = metrics.f1_score(y_val, pred_random)
SS = slytherin_score(svm_model_random, X_val, y_val)

scores.append({"model": "random search params",
                "precision": precision,
                "recall": recall,
                "ROC AUC": roc_auc,
                "F1": f1,
                "Slytherin score": SS
                    })       


scores_df = pd.DataFrame(scores,columns=["model","precision","recall","ROC AUC","F1","Slytherin score"])

scores_df

Unnamed: 0,model,precision,recall,ROC AUC,F1,Slytherin score
0,basic model,0.695341,0.518717,0.718176,0.594181,0.486176
1,grid search params,0.462774,0.847594,0.745502,0.598678,0.709649
2,random search params,0.51269,0.81016,0.765545,0.627979,0.715361
