# SVM

In [60]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler, StandardScaler, LabelEncoder


### Prepering data

In [61]:
df = pd.read_csv("WA_Fn-UseC_-Telco-Customer-Churn.csv")

In [62]:
df["TotalCharges"] = pd.to_numeric(df["TotalCharges"],errors="coerce")

In [63]:
df.dropna(inplace=True)

In [64]:
df_d = df.drop("customerID", axis = 1)

In [65]:
df_d['Churn'].replace(to_replace='Yes', value=1, inplace=True)
df_d['Churn'].replace(to_replace='No',  value=0, inplace=True)

#### Skalowanie

In [66]:
df_d.columns

Index(['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure',
       'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity',
       'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV',
       'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod',
       'MonthlyCharges', 'TotalCharges', 'Churn'],
      dtype='object')

In [67]:
def apply_scalers(df, columns_to_exclude=None):
    if columns_to_exclude:
        exclude_filter = ~df.columns.isin(columns_to_exclude) 
    else:
        exclude_filter = ~df.columns.isin([]) 
    for column in df.iloc[:, exclude_filter].columns:
        df[column] = df[column].astype(float)
 
    df.loc[:, exclude_filter] = StandardScaler().fit_transform(df.loc[:, exclude_filter])
    return df
 
df_d = apply_scalers(df_d, columns_to_exclude=['gender', 'SeniorCitizen', 'Partner', 'Dependents',
       'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity',
       'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV',
       'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod', 'Churn'])

#### Dumifikacja

In [68]:
dummies_df = pd.get_dummies(df_d)

In [69]:
dummies_df.head()

Unnamed: 0,SeniorCitizen,tenure,MonthlyCharges,TotalCharges,Churn,gender_Female,gender_Male,Partner_No,Partner_Yes,Dependents_No,...,StreamingMovies_Yes,Contract_Month-to-month,Contract_One year,Contract_Two year,PaperlessBilling_No,PaperlessBilling_Yes,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check
0,0,-1.280248,-1.161694,-0.994194,0,1,0,0,1,1,...,0,1,0,0,0,1,0,0,1,0
1,0,0.064303,-0.260878,-0.17374,0,0,1,1,0,1,...,0,0,1,0,1,0,0,0,0,1
2,0,-1.239504,-0.363923,-0.959649,1,0,1,1,0,1,...,0,1,0,0,0,1,0,0,0,1
3,0,0.512486,-0.74785,-0.195248,0,0,1,1,0,1,...,0,0,1,0,1,0,1,0,0,0
4,0,-1.239504,0.196178,-0.940457,1,1,0,1,0,1,...,0,1,0,0,0,1,0,0,1,0


### Train test split

In [70]:
from sklearn.model_selection import train_test_split

In [71]:
y = dummies_df['Churn'].values
X = dummies_df.drop(columns = ['Churn'])

#### Train test split NEW

In [72]:
#Splity X,y into train, val, test
X_train, X_val, y_train, y_val = train_test_split(X,y,random_state=44,train_size=0.6,stratify=y)
X_val, X_test, y_val, y_test = train_test_split(X_val, y_val, random_state=44,train_size=0.5,stratify=y_val)

### Clasification witg Grid Search CV for SVM model

In [73]:
from sklearn import svm, metrics
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
import numpy as np
import matplotlib.pyplot as plt

In [74]:
parameters = {"kernel":("linear", "rbf"), "C":[1,2,3]}

In [75]:
svm_clsf = svm.SVC()
grid_clsf = GridSearchCV(estimator=svm_clsf, param_grid=parameters ,n_jobs=1, verbose=2)

In [76]:
grid_clsf.fit(X_train, y_train)

Fitting 5 folds for each of 6 candidates, totalling 30 fits
[CV] END .................................C=1, kernel=linear; total time=   0.9s
[CV] END .................................C=1, kernel=linear; total time=   0.7s
[CV] END .................................C=1, kernel=linear; total time=   0.8s
[CV] END .................................C=1, kernel=linear; total time=   0.9s
[CV] END .................................C=1, kernel=linear; total time=   0.7s
[CV] END ....................................C=1, kernel=rbf; total time=   0.9s
[CV] END ....................................C=1, kernel=rbf; total time=   1.0s
[CV] END ....................................C=1, kernel=rbf; total time=   0.8s
[CV] END ....................................C=1, kernel=rbf; total time=   0.8s
[CV] END ....................................C=1, kernel=rbf; total time=   0.9s
[CV] END .................................C=2, kernel=linear; total time=   0.8s
[CV] END .................................C=2, ke

GridSearchCV(estimator=SVC(), n_jobs=1,
             param_grid={'C': [1, 2, 3], 'kernel': ('linear', 'rbf')},
             verbose=2)

In [77]:
grid_clsf.cv_results_

{'mean_fit_time': array([0.78654575, 0.69493771, 0.81694179, 0.72254567, 1.00348043,
        0.7244072 ]),
 'std_fit_time': array([0.05193877, 0.02745081, 0.04111278, 0.02039422, 0.09391292,
        0.02534256]),
 'mean_score_time': array([0.10320764, 0.28157063, 0.08093143, 0.2810153 , 0.08790851,
        0.27929668]),
 'std_score_time': array([0.01556321, 0.02213124, 0.0027066 , 0.01516808, 0.00390372,
        0.01150727]),
 'param_C': masked_array(data=[1, 1, 2, 2, 3, 3],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_kernel': masked_array(data=['linear', 'rbf', 'linear', 'rbf', 'linear', 'rbf'],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'C': 1, 'kernel': 'linear'},
  {'C': 1, 'kernel': 'rbf'},
  {'C': 2, 'kernel': 'linear'},
  {'C': 2, 'kernel': 'rbf'},
  {'C': 3, 'kernel': 'linear'},
  {'C': 3, 'kernel': 'rbf'}],
 'split0

### Clasification with random search for SVM model

In [78]:
svm_clsf = svm.SVC()
random_clsf = RandomizedSearchCV(estimator=svm_clsf, param_distributions=parameters, n_iter=1, cv=3, n_jobs=1, verbose=2)

In [79]:
random_clsf.fit(X_train, y_train)

Fitting 3 folds for each of 1 candidates, totalling 3 fits
[CV] END .................................C=2, kernel=linear; total time=   0.6s
[CV] END .................................C=2, kernel=linear; total time=   0.6s
[CV] END .................................C=2, kernel=linear; total time=   0.7s


RandomizedSearchCV(cv=3, estimator=SVC(), n_iter=1, n_jobs=1,
                   param_distributions={'C': [1, 2, 3],
                                        'kernel': ('linear', 'rbf')},
                   verbose=2)

In [80]:
random_clsf.cv_results_

{'mean_fit_time': array([0.60633572]),
 'std_fit_time': array([0.02560001]),
 'mean_score_time': array([0.11756372]),
 'std_score_time': array([0.01728374]),
 'param_kernel': masked_array(data=['linear'],
              mask=[False],
        fill_value='?',
             dtype=object),
 'param_C': masked_array(data=[2],
              mask=[False],
        fill_value='?',
             dtype=object),
 'params': [{'kernel': 'linear', 'C': 2}],
 'split0_test_score': array([0.78820185]),
 'split1_test_score': array([0.78876245]),
 'split2_test_score': array([0.79516358]),
 'mean_test_score': array([0.79070929]),
 'std_test_score': array([0.00315796]),
 'rank_test_score': array([1])}

#### Wyniki Grid i Random Search

In [81]:
print(f" Best params for SVM model by GridSearchCV :        {grid_clsf.best_params_}")
print(f" Best score with those params :                     {grid_clsf.best_score_}")
print(f" Best params for SVM model by RandomizedSearchCV :  {random_clsf.best_params_}")
print(f" Best score with those params :                     {random_clsf.best_score_}")

 Best params for SVM model by GridSearchCV :        {'C': 1, 'kernel': 'rbf'}
 Best score with those params :                     0.799239344925874
 Best params for SVM model by RandomizedSearchCV :  {'kernel': 'linear', 'C': 2}
 Best score with those params :                     0.7907092930659309


### SVM model

From GridSearchCV method the best params por SVC model will be:\
C:      1\
kernel: linear

In [None]:
#kernel="linear", C=1, gamma="auto", class_weight="balanced"

In [137]:
scores = []

In [138]:
# bez wyników z grid ani random
svm_model = svm.SVC()
svm_model.fit(X_train, y_train)

pred = svm_model.predict(X_val)

precision = metrics.precision_score(y_val, pred)
recall = metrics.recall_score(y_val, pred)
roc_auc = metrics.roc_auc_score(y_val, pred)
f1 = metrics.f1_score(y_val, pred)

scores.append({"model params": "bez eksploracji parametrów", 
                "precision": precision, 
                "recall": recall,
                "ROC AUC": roc_auc,
                "F1": f1
                })


In [139]:
# paramerty grid
svm_model_grid = svm.SVC(kernel="rbf", C=1, gamma="auto", class_weight="balanced")
svm_model_grid.fit(X_train, y_train)

pred = svm_model_grid.predict(X_val)

precision1 = metrics.precision_score(y_val, pred)
recall1 = metrics.recall_score(y_val, pred)
roc_auc1 = metrics.roc_auc_score(y_val, pred)
f11 = metrics.f1_score(y_val, pred)

scores.append({"model params": "GridSearch", 
                "precision": precision1, 
                "recall": recall1,
                "ROC AUC": roc_auc1,
                "F1": f11
                })


In [140]:
# parametry ransom
svm_model_random = svm.SVC(kernel="linear", C=2, gamma="auto", class_weight="balanced")
svm_model_random.fit(X_train, y_train)

pred = svm_model_random.predict(X_val)

precision2 = metrics.precision_score(y_val, pred)
recall2 = metrics.recall_score(y_val, pred)
roc_auc2 = metrics.roc_auc_score(y_val, pred)
f12 = metrics.f1_score(y_val, pred)

scores.append({"model params": "RandomSearch", 
                "precision": precision2, 
                "recall": recall2,
                "ROC AUC": roc_auc2,
                "F1": f12
                })

                

### Wyniki 

In [141]:
best_params = pd.DataFrame(scores,columns=["model params","precision","recall","ROC AUC","F1"])

In [142]:
best_params

Unnamed: 0,model params,precision,recall,ROC AUC,F1
0,bez eksploracji parametrów,0.695341,0.518717,0.718176,0.594181
1,GridSearch,0.491228,0.823529,0.75721,0.615385
2,RandomSearch,0.462774,0.847594,0.745502,0.598678
