# SVM

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


### Prepering data

In [2]:
df = pd.read_csv("WA_Fn-UseC_-Telco-Customer-Churn.csv")

In [3]:
df["TotalCharges"] = pd.to_numeric(df["TotalCharges"],errors="coerce")

In [4]:
df.dropna(inplace=True)

In [5]:
df_d = df.drop("customerID", axis = 1)

In [6]:
df_d['Churn'].replace(to_replace='Yes', value=1, inplace=True)
df_d['Churn'].replace(to_replace='No',  value=0, inplace=True)

In [7]:
dummies_df = pd.get_dummies(df_d)

In [8]:
dummies_df.head()

Unnamed: 0,SeniorCitizen,tenure,MonthlyCharges,TotalCharges,Churn,gender_Female,gender_Male,Partner_No,Partner_Yes,Dependents_No,...,StreamingMovies_Yes,Contract_Month-to-month,Contract_One year,Contract_Two year,PaperlessBilling_No,PaperlessBilling_Yes,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check
0,0,1,29.85,29.85,0,1,0,0,1,1,...,0,1,0,0,0,1,0,0,1,0
1,0,34,56.95,1889.5,0,0,1,1,0,1,...,0,0,1,0,1,0,0,0,0,1
2,0,2,53.85,108.15,1,0,1,1,0,1,...,0,1,0,0,0,1,0,0,0,1
3,0,45,42.3,1840.75,0,0,1,1,0,1,...,0,0,1,0,1,0,1,0,0,0
4,0,2,70.7,151.65,1,1,0,1,0,1,...,0,1,0,0,0,1,0,0,1,0


### Train test split

In [30]:
from sklearn.model_selection import train_test_split

In [31]:
y = dummies_df['Churn'].values
X = dummies_df.drop(columns = ['Churn'])

In [32]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=44)

### Clasification witg Grid Search CV for SVM model

In [51]:
from sklearn import svm, metrics
from sklearn.model_selection import GridSearchCV
import numpy as np
import matplotlib.pyplot as plt

In [52]:
gamma_range = np.outer(np.logspace(-2,-1,2), np.array([1,5]))
gamma_range = gamma_range.flatten()

C_range = np.outer(np.logspace(-1,0,2), np.array([1,5]))
c_range = C_range.flatten()

In [60]:
parameters = {"kernel":("linear", "rbf"), "C":[1,2,3]}

In [61]:
svm_clsf = svm.SVC()
grid_clsf = GridSearchCV(estimator=svm_clsf, param_grid=parameters ,n_jobs=1, verbose=2)

In [62]:
grid_clsf.fit(X_train, y_train)

Fitting 5 folds for each of 6 candidates, totalling 30 fits
[CV] END .................................C=1, kernel=linear; total time= 4.9min
[CV] END .................................C=1, kernel=linear; total time=12.1min
[CV] END .................................C=1, kernel=linear; total time=11.6min
[CV] END .................................C=1, kernel=linear; total time= 6.6min
[CV] END .................................C=1, kernel=linear; total time= 9.6min
[CV] END ....................................C=1, kernel=rbf; total time=   1.4s
[CV] END ....................................C=1, kernel=rbf; total time=   1.4s
[CV] END ....................................C=1, kernel=rbf; total time=   1.4s
[CV] END ....................................C=1, kernel=rbf; total time=   1.4s
[CV] END ....................................C=1, kernel=rbf; total time=   1.4s
[CV] END .................................C=2, kernel=linear; total time= 6.1min
[CV] END .................................C=2, ke

GridSearchCV(estimator=SVC(), n_jobs=1,
             param_grid={'C': [1, 2, 3], 'kernel': ('linear', 'rbf')},
             verbose=2)

In [70]:
print(f" Best params for SVM model by GridSearchCV :    {grid_clsf.best_params_}")
print(f" Best score with those params :                 {grid_clsf.best_score_}")

 Best params for SVM model by GridSearchCV :    {'C': 1, 'kernel': 'linear'}
 Best score with those params :                 0.7889777777777778


In [71]:
grid_clsf.cv_results_

{'mean_fit_time': array([538.46534719,   1.04862666, 410.39586349,   1.42791162,
        721.90364819,   1.56922255]),
 'std_fit_time': array([1.67395253e+02, 2.26811083e-02, 5.63562166e+01, 5.61562105e-02,
        1.68013506e+02, 1.45644987e-01]),
 'mean_score_time': array([0.07979078, 0.45174866, 0.10083098, 0.65096445, 0.10442548,
        0.68430071]),
 'std_score_time': array([0.00887439, 0.01399865, 0.02934728, 0.04619899, 0.00627167,
        0.0176924 ]),
 'param_C': masked_array(data=[1, 1, 2, 2, 3, 3],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_kernel': masked_array(data=['linear', 'rbf', 'linear', 'rbf', 'linear', 'rbf'],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'C': 1, 'kernel': 'linear'},
  {'C': 1, 'kernel': 'rbf'},
  {'C': 2, 'kernel': 'linear'},
  {'C': 2, 'kernel': 'rbf'},
  {'C': 3, 'kernel': 'linear'},
  

### Clasification with random search for SVM model

### SVC model

From GridSearchCV method the best params por SVC model will be:\
C:      1\
kernel: linear

In [74]:
svm_model = SVC(kernel="linear", C=1, gamma="auto")
svm_model.fit(X_train, y_train)

SVC(C=1, gamma='auto', kernel='linear')

In [76]:
pred = svm_model.predict(X_test)
metrics.accuracy_score(y_test, pred)

0.8159203980099502

In [77]:
from sklearn.metrics import classification_report, confusion_matrix

In [78]:
print(confusion_matrix(y_test, pred))

[[983  59]
 [200 165]]


#### Precision

In [79]:
metrics.precision_score(y_test, pred)

0.7366071428571429

#### ROC AUC

In [80]:
metrics.roc_auc_score(y_test, pred)

0.6977164567612336

#### F1

In [81]:
metrics.f1_score(y_test, pred)

0.5602716468590833