In [1]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.figure_factory as ff
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, matthews_corrcoef
from sklearn.ensemble import RandomForestClassifier 
from scipy.stats import pearsonr, kendalltau, spearmanr
import xgboost as xgb
from lightgbm import LGBMClassifier

In [2]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
output = pd.read_csv('sample.csv')

In [3]:
features = ['call_days', 're_call10', 'short_call10', 'long_call10', 'bank_cnt', 'active_days01']
target = ['is_5g']

In [4]:
correlation = pd.DataFrame(columns = ['Pearson Correlation', 'Kendall Correlation', 'Spearman Correlation'], 
                           index = features)

In [5]:
for x in features:
    corr_pearson, _ = pearsonr(train[x], train[target])
    corr_kendall, _ = kendalltau(train[x], train[target])
    corr_spearman, _ = spearmanr(train[x], train[target])
    
    correlation.loc[x] = [corr_pearson[0], corr_kendall, corr_spearman]
    
correlation.to_csv('temp2_correlations.csv')

In [6]:
X_train = train[features]
y_train = train[target]

X_test = test[features]
y_test = output[target]

In [7]:
#Function of Interactive Confusion Matrix
def cm_plotter(y_test, y_pred):
    cols = list(y_test.unique())
    z = confusion_matrix(y_test, y_pred)
    #print(z)
    z = z[::-1]
    
    #r_list SHOULD BE A . Use list() if needed.
    x = cols
    y = x[::-1].copy() # invert idx values of x 
    #print(type(x))
    #print(type(y))
    
    z_text = [[str(y) for y in x] for x in z]
    
    # set up figure
    fig = ff.create_annotated_heatmap(z, x = x, y = y, annotation_text = z_text, colorscale = 'Viridis')
    
    # add title
    fig.update_layout(title_text='<i><b>Confusion matrix</b></i>',
                    #xaxis = dict(title = 'x'),
                    #yaxis = dict(title = 'x')
    )
    
    # add custom xaxis title
    fig.add_annotation(dict(font = dict(color = "black", size = 14),
                            x = 0.5,
                            y = -0.15,
                            showarrow = False,
                            text = "Predicted value",
                            xref = "paper",
                            yref = "paper"))

    # add custom yaxis title
    fig.add_annotation(dict(font = dict(color = "black", size = 14),
                            x = -0.35,
                            y = 0.5,
                            showarrow = False,
                            text = "Real value",
                            textangle = -90,
                            xref = "paper",
                            yref = "paper"))

    # adjust margins to make room for yaxis title
    fig.update_layout(margin = dict(t = 50, l = 200))

    # add colorbar
    fig['data'][0]['showscale'] = True
    
    return fig

In [8]:
#Random Forest Classification
classifier_random_forest = RandomForestClassifier(n_estimators = 20, criterion = 'entropy', random_state = 42, 
                                                  max_features = 'auto', max_depth = 10, min_samples_split = 6, 
                                                  min_samples_leaf = 2, bootstrap = False) 

classifier_random_forest.fit(X_train, np.ravel(y_train))
y_pred_random_forest = classifier_random_forest.predict(X_test)

In [9]:
#Calculating Accuracy of Random Forest Classification
accuracy_random_forest = accuracy_score(y_test, y_pred_random_forest)
print("Accuracy of Random Forest = ", accuracy_random_forest)

#Calculating MCC of Random Forest Classification
mcc_random_forest = matthews_corrcoef(y_test, y_pred_random_forest)
print("Matthews correlation coefficient of Random Forest = ", mcc_random_forest)

#Confusion Matrix of Random Forest Classification
#fig_cm_random_forest = cm_plotter(y_test, y_pred_random_forest)
#fig_cm_random_forest.show()

Accuracy of Random Forest =  1.0
Matthews correlation coefficient of Random Forest =  0.0


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


In [10]:
#Classification Report of Random Forest Classification
print(classification_report(y_test, y_pred_random_forest))

              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00    300000

    accuracy                           1.00    300000
   macro avg       1.00      1.00      1.00    300000
weighted avg       1.00      1.00      1.00    300000



In [11]:
#XGBoost
classifier_xgb = xgb.XGBClassifier(n_estimators = 100, eta = 0.2, gamma = 100, max_depth = 3, subsample = 0.5, reg_lambda = 5, alpha = 5, 
                           colsample_bytree = 0.7, min_child_weight = 5)
classifier_xgb.fit(X_train, np.ravel(y_train))
y_pred_xgb = classifier_xgb.predict(X_test)





In [12]:
#Calculating Accuracy of XGBoost
accuracy_xgb = accuracy_score(y_test, y_pred_xgb)
print("Accuracy of XGB = ", accuracy_xgb)

#Calculating MCC of XGBoost
mcc_xgb = matthews_corrcoef(y_test, y_pred_xgb)
print("Matthews correlation coefficient of XGB = ", mcc_xgb)

#Confusion Matrix of XGBoost
#fig_cm_xgb = cm_plotter(y_test, y_pred_xgb)
#fig_cm_xgb.show()

Accuracy of XGB =  1.0
Matthews correlation coefficient of XGB =  0.0


In [13]:
#Classification Report of XGBoost
print(classification_report(y_test, y_pred_xgb))

              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00    300000

    accuracy                           1.00    300000
   macro avg       1.00      1.00      1.00    300000
weighted avg       1.00      1.00      1.00    300000



In [14]:
#Light_GBM
classifier_LGBM = LGBMClassifier(n_estimators = 80, max_depth = 1, num_leaves = 10, learning_rate = 0.1, boosting_type = 'dart')
classifier_LGBM.fit(X_train, np.ravel(y_train))
y_pred_LGBM = classifier_LGBM.predict(X_test)

In [15]:
#Calculating Accuracy of LGBM
accuracy_LGBM = accuracy_score(y_test, y_pred_LGBM)
print("Accuracy of LGBM = ", accuracy_LGBM)

#Calculating MCC of LGBM
mcc_LGBM = matthews_corrcoef(y_test, y_pred_LGBM)
print("Matthews correlation coefficient of LGBM = ", mcc_LGBM)

#Confusion Matrix of LGBM
#fig_cm_LGBM = cm_plotter(y_test, y_pred_LGBM)
#fig_cm_LGBM.show()

Accuracy of LGBM =  1.0
Matthews correlation coefficient of LGBM =  0.0


In [16]:
#Classification Report of LGBM
print(classification_report(y_test, y_pred_LGBM))

              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00    300000

    accuracy                           1.00    300000
   macro avg       1.00      1.00      1.00    300000
weighted avg       1.00      1.00      1.00    300000

