In [1]:

# =============================================================================
# # Import Library
# =============================================================================
import pandas as pd
from sklearn.model_selection import GridSearchCV 
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC


In [2]:
# =============================================================================
# # Import Data
# =============================================================================
data = pd.read_csv('lab7_data.csv', sep='\t', decimal=',')

In [3]:
# =============================================================================
# # Information 
# =============================================================================
data.head()


Unnamed: 0.1,Unnamed: 0,contract_id,cnt_blocks_1m,cnt_blocks_2m,cnt_blocks_3m,cnt_blocks_2m_sum,cnt_blocks_3m_sum,ratio_cnt_blocks_2m,ratio_cnt_blocks_3m,avg_block_3m,...,avg_daily_traffic_4w,avg_traffic_per_session_1m,avg_traffic_per_session_2m,avg_traffic_per_session_3m,avg_traffic_per_session_1w,avg_traffic_per_session_2w,avg_traffic_per_session_3w,avg_traffic_per_session_4w,ARPU,churn
0,0,4965,0,3,1,3,4,0.777977,0.0,521,...,0.0,0.000182,0.0018,0.0,4.6e-05,0.001666,0.0,0.0,890.0,1
1,1,4974,0,6,0,6,6,1.0,0.415613,474,...,4.318102,0.000312,0.000319,0.000201,0.000165,0.002317,0.0,0.001118,538.0,1
2,2,2018,0,0,0,0,0,0.744709,0.436665,0,...,0.0,0.000181,1e-05,0.000262,0.0,9.2e-05,0.0,0.0,1340.0,0
3,3,2628,1,0,0,1,1,0.0,0.702495,166,...,0.0,0.0,0.0,8.3e-05,0.0,0.00207,0.000592,0.0,153.688502,1
4,4,3336,0,0,0,0,0,0.0,1.0,780,...,1.601099,0.0,0.0,0.000243,0.000371,0.001504,0.0,0.0,1046.0,1


In [8]:
# =============================================================================
# # Create instance of Classifier and their parameter
# =============================================================================
# SVC 
svc = SVC()
svc_params = {'kernel':('sigmoid', 'rbf'), 'C':[1, 10]}
# Decision Tree
tree = DecisionTreeClassifier()
tree_params = {'max_depth':range(2,6), 'criterion':('gini','entropy')}
# Knn
neighb = KNeighborsClassifier()
neighb_params = {'n_neighbors':range(1,10)}


In [11]:
# =============================================================================
# # Grids set for instances of classes with the scoring - ROC_AUC
# =============================================================================
# For Svc
grid_svc = GridSearchCV(svc, svc_params, scoring='roc_auc', verbose=1)
# For Decision Tree
grid_tree = GridSearchCV(tree, tree_params, scoring='roc_auc', verbose=1)
# For Knn
grid_neighb = GridSearchCV(neighb, neighb_params, scoring='roc_auc', verbose=1)

In [13]:
# =============================================================================
# # Fitting the classifier with their Parameters
# =============================================================================
# For Svc
grid_svc.fit(data.drop('churn', axis=1), data.churn)
# For Decision Tree
grid_tree.fit(data.drop('churn', axis=1), data.churn)
# For Knn
grid_neighb.fit(data.drop('churn', axis=1), data.churn)

Fitting 3 folds for each of 4 candidates, totalling 12 fits


[Parallel(n_jobs=1)]: Done  12 out of  12 | elapsed:   14.0s finished


Fitting 3 folds for each of 8 candidates, totalling 24 fits


[Parallel(n_jobs=1)]: Done  24 out of  24 | elapsed:    1.1s finished


Fitting 3 folds for each of 9 candidates, totalling 27 fits


[Parallel(n_jobs=1)]: Done  27 out of  27 | elapsed:    4.6s finished


GridSearchCV(cv=None, error_score='raise',
       estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform'),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'n_neighbors': range(1, 10)}, pre_dispatch='2*n_jobs',
       refit=True, return_train_score='warn', scoring='roc_auc', verbose=1)

In [15]:

# =============================================================================
# # The best results of ROC_AUC for each type of classifier.
# =============================================================================
print('#####################################')
print('# DecisionTree =',grid_tree.best_score_)
print('#####################################')
print('# SVM SVC =',grid_svc.best_score_)
print('#####################################')
print('# KNN =',grid_neighb.best_score_)
print('#####################################')

#####################################
# DecisionTree = 0.608138716404326
#####################################
# SVM SVC = 0.5
#####################################
# KNN = 0.5385341773169221
#####################################
