# SVM

In [565]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler, StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import make_scorer, confusion_matrix
from sklearn import metrics, svm
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from collections import Counter


### Prepering data

In [566]:
df = pd.read_csv("WA_Fn-UseC_-Telco-Customer-Churn.csv")

In [567]:
df["TotalCharges"] = pd.to_numeric(df["TotalCharges"],errors="coerce")

In [568]:
df.dropna(inplace=True)

In [569]:
df_d = df.drop("customerID", axis = 1)

In [570]:
df_d['Churn'].replace(to_replace='Yes', value=1, inplace=True)
df_d['Churn'].replace(to_replace='No',  value=0, inplace=True)

#### Skalowanie

In [571]:
df_d.columns

Index(['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure',
       'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity',
       'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV',
       'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod',
       'MonthlyCharges', 'TotalCharges', 'Churn'],
      dtype='object')

In [572]:
def apply_scalers(df, columns_to_exclude=None):
    if columns_to_exclude:
        exclude_filter = ~df.columns.isin(columns_to_exclude) 
    else:
        exclude_filter = ~df.columns.isin([]) 
    for column in df.iloc[:, exclude_filter].columns:
        df[column] = df[column].astype(float)
 
    df.loc[:, exclude_filter] = StandardScaler().fit_transform(df.loc[:, exclude_filter])
    return df
 
df_d = apply_scalers(df_d, columns_to_exclude=['gender', 'SeniorCitizen', 'Partner', 'Dependents',
       'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity',
       'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV',
       'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod', 'Churn'])

#### Dumifikacja

In [573]:
dummies_df = pd.get_dummies(df_d)

In [574]:
dummies_df.head()

Unnamed: 0,SeniorCitizen,tenure,MonthlyCharges,TotalCharges,Churn,gender_Female,gender_Male,Partner_No,Partner_Yes,Dependents_No,...,StreamingMovies_Yes,Contract_Month-to-month,Contract_One year,Contract_Two year,PaperlessBilling_No,PaperlessBilling_Yes,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check
0,0,-1.280248,-1.161694,-0.994194,0,1,0,0,1,1,...,0,1,0,0,0,1,0,0,1,0
1,0,0.064303,-0.260878,-0.17374,0,0,1,1,0,1,...,0,0,1,0,1,0,0,0,0,1
2,0,-1.239504,-0.363923,-0.959649,1,0,1,1,0,1,...,0,1,0,0,0,1,0,0,0,1
3,0,0.512486,-0.74785,-0.195248,0,0,1,1,0,1,...,0,0,1,0,1,0,1,0,0,0
4,0,-1.239504,0.196178,-0.940457,1,1,0,1,0,1,...,0,1,0,0,0,1,0,0,1,0


### Train test split

In [575]:
y = dummies_df['Churn'].values
X = dummies_df.drop(columns = ['Churn'])

In [576]:
#Splity X,y into train, val, test
X_train, X_val, y_train, y_val = train_test_split(X,y,random_state=44,train_size=0.6,stratify=y)
X_val, X_test, y_val, y_test = train_test_split(X_val, y_val, random_state=44,train_size=0.5,stratify=y_val)

### Make scorer "slytherin score"

In [577]:
basic_medians = [64.45, 79.65]
basic_discount = 1 - basic_medians[0] / basic_medians[1]

In [578]:
def function(y_true, y_pred, medians = basic_medians, discount = basic_discount, basic = True, scoring_only = True):
    
    """
    Jeśli interesuje Cie wartość dodana przyjmij scoring_only = False. Wówczas poza scorem otrzymasz także:
        base_case - najlepszy możliwy scenariusz dla firmy
        standard_case - aktualny scenariusz firmy (bez naszej ingerencji)
        model_vase - scenariusz oparty na naszym modelu

    Jeśli chcesz manipulować zmiennymi medians i discount, pamiętaj aby przyjąć basic = False
    """
    
    
    matrix = confusion_matrix(y_true, y_pred)
    TN = matrix[0][0]
    FP = matrix[0][1]
    FN = matrix[1][0]
    TP = matrix[1][1]
    
    if basic:
        basic_score = 1 - (medians[1]/medians[0]*FN + medians[0]/medians[1]*FP)/(TN + TP)
        
        if scoring_only:
            return basic_score
        else:
            best_case = round(medians[0] * Counter(y_true)[0] + medians[1] * Counter(y_true)[1], 2)
            standard_case = round(medians[0] * Counter(y_true)[0] - medians[1] * Counter(y_true)[1], 2)
            model_case = round(basic_score * medians[0]*(TN+TP), 2)
            return basic_score, best_case, standard_case, model_case
    
    else:
        score = 1 - (medians[1]*FN + (1 - discount)*medians[0]*FP)/(medians[0]*TN + (1- discount)*medians[1]*TP)
        
        if scoring_only:
            return score
        else:
            best_case = round(medians[0] * Counter(y_true)[0] + medians[1] * Counter(y_true)[1], 2)
            standard_case = round(medians[0] * Counter(y_true)[0] - medians[1] * Counter(y_true)[1], 2)
            model_case = round(score * (medians[0]*TN + (1- discount)*medians[1]*TP), 2)
            return score, best_case, standard_case, model_case

In [579]:
slytherin_score = make_scorer(function, greater_is_better=True)

### Clasification witg Grid Search CV for SVM model

In [580]:
parameters = {"kernel":("linear", "poly", "rbf", "sigmoid"),
                "C":(0.1,0.2,0.3,0.4,0.5,1,2,3,4,5),
                "degree":(1,2,3),
                "gamma": ["scale", "auto"],
                "class_weight": [None, "balanced"]
                }

In [581]:
svm_clsf = svm.SVC(random_state=44)
grid_clsf = GridSearchCV(estimator = svm_clsf, 
                            param_grid=parameters ,
                            cv=3,
                            n_jobs=-1, 
                            scoring=slytherin_score
                        )

In [582]:
grid_clsf.fit(X_train, y_train)

GridSearchCV(cv=3, estimator=SVC(random_state=44), n_jobs=-1,
             param_grid={'C': (0.1, 0.2, 0.3, 0.4, 0.5, 1, 2, 3, 4, 5),
                         'class_weight': [None, 'balanced'],
                         'degree': (1, 2, 3), 'gamma': ['scale', 'auto'],
                         'kernel': ('linear', 'poly', 'rbf', 'sigmoid')},
             scoring=make_scorer(function))

In [583]:
grid_clsf.cv_results_

{'mean_fit_time': array([0.48420445, 0.4678212 , 0.56137252, 0.58566809, 0.46605492,
        0.45448057, 0.57170916, 0.55543145, 0.46737798, 0.51036938,
        0.59171073, 0.63829263, 0.49284061, 0.49253615, 0.57504217,
        0.56448491, 0.47192677, 0.50770322, 0.89806724, 0.86159913,
        0.58808772, 0.63304615, 0.64071433, 0.57283616, 0.63177125,
        0.66689173, 0.84540256, 0.9913218 , 0.84954389, 0.85304674,
        1.15248489, 0.89827005, 0.66203125, 0.73337936, 0.77739112,
        0.84457056, 0.70499849, 0.81147408, 0.78003184, 0.77159683,
        0.60452652, 0.64264584, 0.78205665, 0.77138042, 0.64183998,
        1.02543688, 1.59810972, 1.1134723 , 0.60359454, 0.48987548,
        0.57255212, 0.53547033, 0.62526202, 0.72605236, 0.68905775,
        0.94445944, 0.9141314 , 0.80148594, 0.63037936, 0.63137873,
        0.61126423, 0.62355677, 0.70483597, 0.64827633, 0.61689059,
        0.6985627 , 0.68358215, 0.64463909, 0.6188093 , 0.78532887,
        0.60737483, 0.69262902,

### Clasification with random search for SVM model

In [584]:
svm_clsf = svm.SVC(random_state=44)
random_clsf = RandomizedSearchCV(estimator = svm_clsf, 
                                    param_distributions=parameters, 
                                    n_iter=20, 
                                    cv=3, 
                                    n_jobs=-1, 
                                    scoring=slytherin_score
                                )

In [585]:
random_clsf.fit(X_train, y_train)

RandomizedSearchCV(cv=3, estimator=SVC(random_state=44), n_iter=20, n_jobs=-1,
                   param_distributions={'C': (0.1, 0.2, 0.3, 0.4, 0.5, 1, 2, 3,
                                              4, 5),
                                        'class_weight': [None, 'balanced'],
                                        'degree': (1, 2, 3),
                                        'gamma': ['scale', 'auto'],
                                        'kernel': ('linear', 'poly', 'rbf',
                                                   'sigmoid')},
                   scoring=make_scorer(function))

In [586]:
random_clsf.cv_results_

{'mean_fit_time': array([0.65274239, 0.85315196, 0.669566  , 0.93975178, 0.87106435,
        0.66404883, 1.17376836, 0.97741127, 0.89894692, 1.02228983,
        0.67404834, 0.7590576 , 0.64271339, 0.46888081, 0.48121556,
        0.73500856, 0.66504876, 0.49201965, 0.68407106, 0.61021948]),
 'std_fit_time': array([0.03581958, 0.02180313, 0.03010199, 0.04600573, 0.03334909,
        0.09274383, 0.02050181, 0.12016694, 0.05978044, 0.10509519,
        0.02486176, 0.00711847, 0.01033925, 0.03611535, 0.02932134,
        0.03544567, 0.05512178, 0.09843616, 0.00878791, 0.0556464 ]),
 'mean_score_time': array([0.2450161 , 0.30971583, 0.30671724, 0.30235465, 0.27801967,
        0.29202143, 0.30269551, 0.28535374, 0.82482266, 0.74072019,
        0.29335411, 0.27301701, 0.64025251, 0.15457201, 0.19468053,
        0.23301554, 0.60119049, 0.15051341, 0.22819996, 0.44336494]),
 'std_score_time': array([0.00990121, 0.01743571, 0.02660668, 0.01302205, 0.03539433,
        0.06132774, 0.00436096, 0.034781

#### Wyniki Grid i Random Search

In [587]:
print(f" Best params for SVM model by GridSearchCV :        {grid_clsf.best_params_}")
print(f" Best score with those params :                     {grid_clsf.best_score_}")
print(f" Best params for SVM model by RandomizedSearchCV :  {random_clsf.best_params_}")
print(f" Best score with those params :                     {random_clsf.best_score_}")

 Best params for SVM model by GridSearchCV :        {'C': 1, 'class_weight': None, 'degree': 1, 'gamma': 'auto', 'kernel': 'sigmoid'}
 Best score with those params :                     0.7293805234654546
 Best params for SVM model by RandomizedSearchCV :  {'kernel': 'sigmoid', 'gamma': 'auto', 'degree': 3, 'class_weight': None, 'C': 0.3}
 Best score with those params :                     0.7275241873244548


### SVM model


In [588]:
print(f" Best params for SVM model by GridSearchCV :        {grid_clsf.best_params_}")
print(f" Best params for SVM model by RandomizedSearchCV :  {random_clsf.best_params_}")

 Best params for SVM model by GridSearchCV :        {'C': 1, 'class_weight': None, 'degree': 1, 'gamma': 'auto', 'kernel': 'sigmoid'}
 Best params for SVM model by RandomizedSearchCV :  {'kernel': 'sigmoid', 'gamma': 'auto', 'degree': 3, 'class_weight': None, 'C': 0.3}


In [591]:
scores = []

# basic model
svm_model = svm.SVC(random_state=44)
svm_model.fit(X_train, y_train)
# params grid
svm_model_grid = svm.SVC(C=1, degree=1, gamma='auto', kernel='sigmoid', random_state=44 )
svm_model_grid.fit(X_train, y_train)
# params random
svm_model_random = svm.SVC(kernel= 'sigmoid', gamma='auto', degree= 3, C= 0.2, random_state=44)
svm_model_random.fit(X_train, y_train)

#prediction from models
pred_basic = svm_model.predict(X_val)
pred_grid = svm_model_grid.predict(X_val)
pred_random = svm_model_random.predict(X_val)


models = (svm_model, svm_model_grid, svm_model_random)
pred = (pred_basic,pred_grid,pred_random)


precision = metrics.precision_score(y_val, pred_basic)
recall = metrics.recall_score(y_val, pred_basic)
roc_auc = metrics.roc_auc_score(y_val, pred_basic)
f1 = metrics.f1_score(y_val, pred_basic)
SS = slytherin_score(svm_model, X_val, y_val)

scores.append({"model": "basic model",
                "precision": precision,
                "recall": recall,
                "ROC AUC": roc_auc,
                "F1": f1,
                "Slytherin score": SS
                    })

precision = metrics.precision_score(y_val, pred_grid)
recall = metrics.recall_score(y_val, pred_grid)
roc_auc = metrics.roc_auc_score(y_val, pred_grid)
f1 = metrics.f1_score(y_val, pred_grid)
SS = slytherin_score(svm_model_grid, X_val, y_val)

scores.append({"model": "grid search params",
                "precision": precision,
                "recall": recall,
                "ROC AUC": roc_auc,
                "F1": f1,
                "Slytherin score": SS
                    })                    


precision = metrics.precision_score(y_val, pred_random)
recall = metrics.recall_score(y_val, pred_random)
roc_auc = metrics.roc_auc_score(y_val, pred_random)
f1 = metrics.f1_score(y_val, pred_random)
SS = slytherin_score(svm_model_random, X_val, y_val)

scores.append({"model": "random search params",
                "precision": precision,
                "recall": recall,
                "ROC AUC": roc_auc,
                "F1": f1,
                "Slytherin score": SS
                    })       


scores_df = pd.DataFrame(scores,columns=["model","precision","recall","ROC AUC","F1","Slytherin score"])

scores_df

Unnamed: 0,model,precision,recall,ROC AUC,F1,Slytherin score
0,basic model,0.695341,0.518717,0.718176,0.594181,0.744759
1,grid search params,0.65528,0.564171,0.728306,0.606322,0.742704
2,random search params,0.680135,0.540107,0.724026,0.602086,0.745886
