In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import StackingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import KFold, cross_val_score, train_test_split
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier

In [3]:
train=pd.read_csv('dataset/new_train4.csv')
test=pd.read_csv('dataset/new_test4.csv')

In [4]:
x_train=train.drop(['customer_id', 'churn_risk_score'], axis=1)
x_test=test.drop(['customer_id'], axis=1)
y_train=train['churn_risk_score']

In [5]:
x_train.shape, x_test.shape, y_train.shape

((36992, 55), (19919, 55), (36992,))

In [6]:
y1d=[]
for i in y_train:
    y1d.append(i)

In [7]:
y_train.value_counts()

3    10424
4    10185
5     9827
1     3815
2     2741
Name: churn_risk_score, dtype: int64

In [8]:
tempy=y_train[:10000]
tempy.value_counts()

3    2785
5    2752
4    2680
1    1017
2     766
Name: churn_risk_score, dtype: int64

In [9]:
ty=[]
for i in tempy:
    ty.append(i)

In [10]:
tx=x_train[:10000]

# CV

In [11]:
def cv_f1(model):
    
    return cross_val_score(model, x_train, y1d, scoring='f1_macro', cv=5)

In [12]:
rf=RandomForestClassifier()
cv_rf=cv_f1(rf)
cv_rf.mean()*100, cv_rf.std()

(73.36378841871134, 0.0038916303387813616)

In [13]:
gbr=GradientBoostingClassifier()
cv_gbr=cv_f1(gbr)
cv_gbr.mean()*100, cv_gbr.std()

(73.8703687295278, 0.008002539475388632)

In [14]:
xgb=XGBClassifier()
cv_xgb=cv_f1(xgb)
cv_xgb.mean()*100, cv_xgb.std()

(73.3954230362088, 0.008188929962841016)

In [15]:
lgb=LGBMClassifier()
cv_lgb=cv_f1(lgb)
cv_lgb.mean()*100, cv_lgb.std()

(73.87166349423893, 0.004220271754203087)

In [16]:
cat=CatBoostClassifier(logging_level='Silent')
cv_cat=cv_f1(cat)
cv_cat.mean()*100, cv_cat.std()

(73.72664549197809, 0.004809878211774382)

# Hyperparameter tuning

In [17]:
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

In [14]:
params={'n_estimators':[100, 300, 500, 1000],
        'max_depth':[None, 20, 50, 100],
        'min_samples_split':[2, 5, 10, 15, 20, 25],
        'max_features':[None, 5, 10, 15, 20, 25, 30],
        'min_samples_leaf':[1, 2, 3],
        'bootstrap':[True, False]}
grid=GridSearchCV(RandomForestClassifier(), params, verbose=10, cv=2,scoring='f1_macro')
grid.fit(x_train, y1d)
grid.best_params_
# {'bootstrap': True,
#  'max_depth': 100,
#  'max_features': 20,
#  'min_samples_leaf': 1,
#  'min_samples_split': 2,
#  'n_estimators': 100}

# {'max_depth': 20,
#  'max_features': 15,
#  'min_samples_split': 5,
#  'n_estimators': 100}

In [15]:
params={'n_estimators':[100],
        'max_depth':[None, 20, 50, 100],
        'min_samples_split':[2, 5, 10, 15, 20, 25],
        'max_features':[None, 5, 10, 15, 20, 25],
        'min_samples_leaf':[1, 2],
        'bootstrap':[True, False]}
grid=GridSearchCV(RandomForestClassifier(), params, verbose=10, cv=2,scoring='f1_macro')
grid.fit(tx, ty)
grid.best_params_
# {'bootstrap': True,
#  'max_depth': 100,
#  'max_features': 20,
#  'min_samples_leaf': 1,
#  'min_samples_split': 2,
#  'n_estimators': 100}

# {'bootstrap': False,
#  'max_depth': 20,
#  'max_features': 10,
#  'min_samples_leaf': 1,
#  'min_samples_split': 25,
#  'n_estimators': 100}

- from the above experiment it is clear that we have to tune the model on the whole dataset

# Whole dataset

In [121]:
ada=AdaBoostClassifier()
ada.fit(x_train, yabs)
y_test=pd.DataFrame(ada.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('ada.csv', index=False)
# 49.20840

# Choosing the best

# RF

In [None]:
#         'max_depth':[None, 20, 50, 100],
#         'min_samples_split':[2, 5, 10],
#         'max_features':[None, 5, 10, 15]
rf1 - 76.56

In [17]:
rf=RandomForestClassifier()
rf.fit(x_train, y1d)
y_test=pd.DataFrame(rf.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('rf.csv', index=False)
# 75.69791

In [18]:
rf=RandomForestClassifier(max_depth=20, max_features=15, min_samples_split=5, n_estimators=100)
rf.fit(x_train, y1d)
y_test=pd.DataFrame(rf.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('rf1.csv', index=False)
# 76.05622

In [19]:
rf=RandomForestClassifier(max_depth=20, max_features=15, min_samples_split=2, n_estimators=5000)
rf.fit(x_train, y1d)
y_test=pd.DataFrame(rf.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('rf2.csv', index=False)
# 75.24

In [20]:
rf=RandomForestClassifier(max_depth=20, max_features=15, min_samples_split=5, n_estimators=1000)
rf.fit(x_train, y1d)
y_test=pd.DataFrame(rf.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('rf3.csv', index=False)
# 76.04795

In [21]:
rf=RandomForestClassifier(max_depth=50, max_features=15, min_samples_split=5, n_estimators=1000)
rf.fit(x_train, y1d)
y_test=pd.DataFrame(rf.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('rf4.csv', index=False)
# 76.50499--

In [22]:
rf=RandomForestClassifier(max_depth=20, max_features=20, min_samples_split=5, n_estimators=1000)
rf.fit(x_train, y1d)
y_test=pd.DataFrame(rf.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('rf5.csv', index=False)
# 76.22893

In [23]:
rf=RandomForestClassifier(max_depth=50, max_features=20, min_samples_split=5, n_estimators=1000)
rf.fit(x_train, y1d)
y_test=pd.DataFrame(rf.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('rf6.csv', index=False)
# 76.00399

In [24]:
rf=RandomForestClassifier(max_depth=20, max_features=30, min_samples_split=5, n_estimators=1000)
rf.fit(x_train, y1d)
y_test=pd.DataFrame(rf.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('rf8.csv', index=False)
# 76.07151

In [25]:
rf=RandomForestClassifier(max_depth=20, max_features=15, min_samples_split=5, n_estimators=5000)
rf.fit(x_train, y1d)
y_test=pd.DataFrame(rf.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('rf9.csv', index=False)
# 76.17997

In [26]:
rf=RandomForestClassifier(max_depth=50, max_features=15, min_samples_split=5, n_estimators=5000)
rf.fit(x_train, y1d)
y_test=pd.DataFrame(rf.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('rf10.csv', index=False)
# 75.95995

In [27]:
rf=RandomForestClassifier(max_depth=20, max_features=20, min_samples_split=5, n_estimators=5000)
rf.fit(x_train, y1d)
y_test=pd.DataFrame(rf.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('rf11.csv', index=False)
# 76.14370

In [28]:
rf=RandomForestClassifier(max_depth=20, max_features=20, min_samples_split=5, n_estimators=100)
rf.fit(x_train, y1d)
y_test=pd.DataFrame(rf.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('rf12.csv', index=False)
# 76.26224

In [29]:
rf=RandomForestClassifier(max_depth=100, max_features=20, min_samples_split=2, min_samples_leaf=1, bootstrap=True, n_estimators=100)
rf.fit(x_train, y1d)
y_test=pd.DataFrame(rf.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('rf13.csv', index=False)
# 76.13453

In [30]:
rf=RandomForestClassifier(max_depth=30, max_features=25, min_samples_split=3, n_estimators=5000)
rf.fit(x_train, y1d)
y_test=pd.DataFrame(rf.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('rf14.csv', index=False)
# 76.14370

In [31]:
rf=RandomForestClassifier(max_depth=20, max_features=15, min_samples_split=5, n_estimators=500)
rf.fit(x_train, y1d)
y_test=pd.DataFrame(rf.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('rf15.csv', index=False)
# 76.56

# XGB

In [None]:
params={'learning_rate':[0.300000012, 0.1, 0.2, 0.01],
        'gamma': [0, 1, 3],
        'max_depth': [6, 10, 15],
        'min_child_weight': [0, 2, 5],
        'red_lambda': [1, 3],
        'sampling_method':['uniform', 'gradient_based']}
grid=GridSearchCV(XGBRegressor(gpu_id=0,tree_method='gpu_hist', n_jobs=-1), params, verbose=10)
grid.fit(x_sc_train, ynp)
grid.best_params_
# {'gamma': 0,
#  'learning_rate': 0.2,
#  'max_depth': 6,
#  'min_child_weight': 5,
#  'red_lambda': 1,
#  'sampling_method': 'uniform'}
5- 76.50

In [32]:
xgb=XGBClassifier(gamma=0, learning_rate=0.3, max_depth=6, min_child_weight=1, reg_lambda=1, sampling_method='uniform')
xgb.fit(x_train, y1d)
y_test=pd.DataFrame(xgb.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('xgb.csv', index=False)
# 76.46553

In [33]:
xgb=XGBClassifier(gamma=0, learning_rate=0.2, max_depth=6, min_child_weight=5, reg_lambda=1, sampling_method='uniform')
xgb.fit(x_train, y1d)
y_test=pd.DataFrame(xgb.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('xgb1.csv', index=False)
# 76.22531

In [34]:
xgb=XGBClassifier(gamma=0, learning_rate=0.1, max_depth=6, min_child_weight=5, reg_lambda=1, sampling_method='uniform')
xgb.fit(x_train, y1d)
y_test=pd.DataFrame(xgb.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('xgb2.csv', index=False)
# 76.27881

In [35]:
xgb=XGBClassifier(gamma=0, learning_rate=0.01, max_depth=6, min_child_weight=5, reg_lambda=1, sampling_method='uniform')
xgb.fit(x_train, y1d)
y_test=pd.DataFrame(xgb.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('xgb3.csv', index=False)
# 75.40368

In [36]:
xgb=XGBClassifier(gamma=0, learning_rate=0.3, max_depth=6, min_child_weight=5, reg_lambda=1, sampling_method='uniform')
xgb.fit(x_train, y1d)
y_test=pd.DataFrame(xgb.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('xgb4.csv', index=False)
# 75.86570

In [37]:
xgb=XGBClassifier(gamma=1, learning_rate=0.2, max_depth=6, min_child_weight=5, reg_lambda=1, sampling_method='uniform')
xgb.fit(x_train, y1d)
y_test=pd.DataFrame(xgb.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('xgb5.csv', index=False)
# 76.42308

In [38]:
xgb=XGBClassifier(gamma=1, learning_rate=0.2, max_depth=6, min_child_weight=1, reg_lambda=1, sampling_method='uniform')
xgb.fit(x_train, y1d)
y_test=pd.DataFrame(xgb.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('xgb6.csv', index=False)
# 76.12656

In [39]:
xgb=XGBClassifier(gamma=1, learning_rate=0.2, max_depth=6, min_child_weight=10, reg_lambda=1, sampling_method='uniform')
xgb.fit(x_train, y1d)
y_test=pd.DataFrame(xgb.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('xgb7.csv', index=False)
# 76.14594

In [40]:
xgb=XGBClassifier(gamma=1, learning_rate=0.2, max_depth=10, min_child_weight=5, reg_lambda=1, sampling_method='uniform')
xgb.fit(x_train, y1d)
y_test=pd.DataFrame(xgb.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('xgb8.csv', index=False)
# 76.24170--

In [41]:
xgb=XGBClassifier(gamma=1, learning_rate=0.2, max_depth=10, min_child_weight=10, reg_lambda=1, sampling_method='uniform')
xgb.fit(x_train, y1d)
y_test=pd.DataFrame(xgb.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('xgb9.csv', index=False)
# 75.91932

In [42]:
xgb=XGBClassifier(gamma=1, learning_rate=0.2, max_depth=6, min_child_weight=5, reg_lambda=1, reg_alpha=1, sampling_method='uniform')
xgb.fit(x_train, y1d)
y_test=pd.DataFrame(xgb.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('xgb10.csv', index=False)
# 75.92505

In [43]:
xgb=XGBClassifier(gamma=1, learning_rate=0.2, max_depth=6, min_child_weight=10, reg_lambda=1, reg_alpha=1, sampling_method='uniform')
xgb.fit(x_train, y1d)
y_test=pd.DataFrame(xgb.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('xgb11.csv', index=False)
# 76.11484

In [44]:
xgb=XGBClassifier(gamma=2, learning_rate=0.2, max_depth=6, min_child_weight=5, reg_lambda=1, sampling_method='uniform')
xgb.fit(x_train, y1d)
y_test=pd.DataFrame(xgb.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('xgb12.csv', index=False)
# 76.21041

In [45]:
xgb=XGBClassifier(gamma=0, learning_rate=0.2, max_depth=6, min_child_weight=5, reg_lambda=1, reg_alpha=1, sampling_method='uniform')
xgb.fit(x_train, y1d)
y_test=pd.DataFrame(xgb.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('xgb13.csv', index=False)
# 76.14331

In [46]:
xgb=XGBClassifier(gamma=1, learning_rate=0.2, max_depth=5, min_child_weight=5, reg_lambda=1, sampling_method='uniform')
xgb.fit(x_train, y1d)
y_test=pd.DataFrame(xgb.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('xgb14.csv', index=False)
# 76.24698

In [47]:
xgb=XGBClassifier(gamma=1, learning_rate=0.2, max_depth=6, min_child_weight=5, reg_lambda=1, sampling_method='uniform', 
                  subsample=0.7, colsample_bytree=0.7)
xgb.fit(x_train, y1d)
y_test=pd.DataFrame(xgb.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('xgb15.csv', index=False)
# 76.15158

In [48]:
xgb=XGBClassifier(gamma=1, learning_rate=0.2, max_depth=6, min_child_weight=5, reg_lambda=1, sampling_method='uniform', subsample=0.7)
xgb.fit(x_train, y1d)
y_test=pd.DataFrame(xgb.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('xgb16.csv', index=False)
# 75.68550

In [49]:
xgb=XGBClassifier(gamma=1, learning_rate=0.2, max_depth=6, min_child_weight=5, reg_lambda=1, sampling_method='uniform', subsample=0.8)
xgb.fit(x_train, y1d)
y_test=pd.DataFrame(xgb.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('xgb17.csv', index=False)
# 76.27809

In [50]:
xgb=XGBClassifier(gamma=1, learning_rate=0.2, max_depth=6, min_child_weight=5, reg_lambda=1, sampling_method='uniform', 
                  subsample=0.8, colsample_bytree=0.8)
xgb.fit(x_train, y1d)
y_test=pd.DataFrame(xgb.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('xgb18.csv', index=False)
# 76.12090

In [51]:
xgb=XGBClassifier(gamma=1, learning_rate=0.2, max_depth=6, min_child_weight=5, reg_lambda=0.5, sampling_method='uniform')
xgb.fit(x_train, y1d)
y_test=pd.DataFrame(xgb.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('xgb19.csv', index=False)
# 76.11515

# GB

In [None]:
# learning_rate=0.1
# n_estimators=100
# min_samples_split=2
# min_samples_leaf=1
# max_depth=3

12- 76.61

In [52]:
gbr=GradientBoostingClassifier(learning_rate=0.1, n_estimators=100, min_samples_split=2, min_samples_leaf=1, max_depth=3)
gbr.fit(x_train, y1d)
y_test=pd.DataFrame(gbr.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('gbr.csv', index=False)
# 76.48140--

In [53]:
gbr=GradientBoostingClassifier(learning_rate=0.2, n_estimators=100, min_samples_split=2, min_samples_leaf=1, max_depth=3)
gbr.fit(x_train, y1d)
y_test=pd.DataFrame(gbr.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('gbr1.csv', index=False)
# 76.48270--

In [54]:
gbr=GradientBoostingClassifier(learning_rate=0.3, n_estimators=100, min_samples_split=2, min_samples_leaf=1, max_depth=3)
gbr.fit(x_train, y1d)
y_test=pd.DataFrame(gbr.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('gbr2.csv', index=False)
# 75.82118

In [55]:
gbr=GradientBoostingClassifier(learning_rate=0.05, n_estimators=100, min_samples_split=2, min_samples_leaf=1, max_depth=3)
gbr.fit(x_train, y1d)
y_test=pd.DataFrame(gbr.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('gbr3.csv', index=False)
# 76.19001

In [56]:
gbr=GradientBoostingClassifier(learning_rate=0.01, n_estimators=100, min_samples_split=2, min_samples_leaf=1, max_depth=3)
gbr.fit(x_train, y1d)
y_test=pd.DataFrame(gbr.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('gbr4.csv', index=False)
# 66.78013

In [57]:
gbr=GradientBoostingClassifier(learning_rate=0.1, n_estimators=100, min_samples_split=5, min_samples_leaf=1, max_depth=3)
gbr.fit(x_train, y1d)
y_test=pd.DataFrame(gbr.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('gbr5.csv', index=False)
# 76.28894

In [58]:
gbr=GradientBoostingClassifier(learning_rate=0.1, n_estimators=100, min_samples_split=10, min_samples_leaf=1, max_depth=3)
gbr.fit(x_train, y1d)
y_test=pd.DataFrame(gbr.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('gbr6.csv', index=False)
# 76.32689

In [59]:
gbr=GradientBoostingClassifier(learning_rate=0.2, n_estimators=100, min_samples_split=10, min_samples_leaf=1, max_depth=3)
gbr.fit(x_train, y1d)
y_test=pd.DataFrame(gbr.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('gbr7.csv', index=False)
# 76.23836

In [60]:
gbr=GradientBoostingClassifier(learning_rate=0.1, n_estimators=100, min_samples_split=10, min_samples_leaf=1, max_depth=5)
gbr.fit(x_train, y1d)
y_test=pd.DataFrame(gbr.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('gbr8.csv', index=False)
# 75.62583

In [61]:
gbr=GradientBoostingClassifier(learning_rate=0.1, n_estimators=200, min_samples_split=2, min_samples_leaf=1, max_depth=3)
gbr.fit(x_train, y1d)
y_test=pd.DataFrame(gbr.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('gbr9.csv', index=False)
# 76.58300

In [62]:
gbr=GradientBoostingClassifier(learning_rate=0.1, n_estimators=500, min_samples_split=2, min_samples_leaf=1, max_depth=3)
gbr.fit(x_train, y1d)
y_test=pd.DataFrame(gbr.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('gbr10.csv', index=False)
# 75.97324

In [11]:
gbr=GradientBoostingClassifier(learning_rate=0.1, n_estimators=1000, min_samples_split=2, min_samples_leaf=1, max_depth=3)
gbr.fit(x_train, y1d)
y_test=pd.DataFrame(gbr.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('gbr11.csv', index=False)
# 75.86020

In [12]:
gbr=GradientBoostingClassifier(learning_rate=0.1, n_estimators=300, min_samples_split=2, min_samples_leaf=1, max_depth=3)
gbr.fit(x_train, y1d)
y_test=pd.DataFrame(gbr.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('gbr12.csv', index=False)
# 76.44777

In [13]:
gbr=GradientBoostingClassifier(learning_rate=0.1, n_estimators=300, min_samples_split=5, min_samples_leaf=1, max_depth=3)
gbr.fit(x_train, y1d)
y_test=pd.DataFrame(gbr.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('gbr13.csv', index=False)
# 76.32619

In [14]:
gbr=GradientBoostingClassifier(learning_rate=0.1, n_estimators=300, min_samples_split=2, min_samples_leaf=2, max_depth=3)
gbr.fit(x_train, y1d)
y_test=pd.DataFrame(gbr.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('gbr14.csv', index=False)
# 76.19080

In [15]:
gbr=GradientBoostingClassifier(learning_rate=0.1, n_estimators=100, min_samples_split=2, min_samples_leaf=1, max_depth=3)
gbr.fit(x_train, y1d)
y_test=pd.DataFrame(gbr.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('gbr.csv', index=False)
# 76.36151

# CAT

In [19]:
params={'iterations':[500, 1000, 1500, 2000],
        'learning_rate':[0.03, 0.02, 0.01, 0.1, 0.2, 0.3],
        'l2_leaf_reg':[1.0, 3.0, 5.0, 7.0],
        'depth':[6, 10, 16],
        'grow_policy':['Lossguide', 'SymmetricTree']}
grid=RandomizedSearchCV(CatBoostRegressor(logging_level='Silent'),
                  params, verbose=10)
grid.fit(x_train, y1d)
grid.best_params_
# {'depth': 16,
#  'grow_policy': 'Lossguide',
#  'iterations': 1000,
#  'l2_leaf_reg': 3.0,
#  'learning_rate': 0.03}
14- 76.51

In [16]:
cat=CatBoostClassifier(logging_level='Silent', iterations=1000, l2_leaf_reg=3.0, depth=6, grow_policy="SymmetricTree")
cat.fit(x_train, y1d)
y_test=pd.DataFrame(cat.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('cat.csv', index=False)
# 76.24766--
cat.get_all_params()['learning_rate']

0.029999999329447743

In [17]:
cat=CatBoostClassifier(logging_level='Silent', iterations=1000, l2_leaf_reg=3.0, depth=6, grow_policy="Lossguide")
cat.fit(x_train, y1d)
y_test=pd.DataFrame(cat.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('cat1.csv', index=False)
# 76.31847--
cat.get_all_params()['learning_rate']

0.029999999329447743

In [18]:
cat=CatBoostClassifier(logging_level='Silent', iterations=500, l2_leaf_reg=3.0, depth=6, grow_policy="SymmetricTree")
cat.fit(x_train, y1d)
y_test=pd.DataFrame(cat.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('cat2.csv', index=False)
# 76.37977--
cat.get_all_params()['learning_rate']

0.029999999329447743

In [19]:
cat=CatBoostClassifier(logging_level='Silent', iterations=800, l2_leaf_reg=3.0, depth=6, grow_policy="SymmetricTree")
cat.fit(x_train, y1d)
y_test=pd.DataFrame(cat.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('cat3.csv', index=False)
# 76.16426
cat.get_all_params()['learning_rate']

0.029999999329447743

In [20]:
cat=CatBoostClassifier(logging_level='Silent', iterations=500, l2_leaf_reg=3.0, depth=6, grow_policy="Lossguide")
cat.fit(x_train, y1d)
y_test=pd.DataFrame(cat.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('cat4.csv', index=False)
# 76.59734--
cat.get_all_params()['learning_rate']

0.029999999329447743

In [21]:
cat=CatBoostClassifier(logging_level='Silent', iterations=800, l2_leaf_reg=3.0, depth=6, grow_policy="Lossguide")
cat.fit(x_train, y1d)
y_test=pd.DataFrame(cat.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('cat5.csv', index=False)
# 76.27547
cat.get_all_params()['learning_rate']

0.029999999329447743

In [22]:
cat=CatBoostClassifier(logging_level='Silent', iterations=1000, l2_leaf_reg=3.0, depth=16, grow_policy="Lossguide")
cat.fit(x_train, y1d)
y_test=pd.DataFrame(cat.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('cat6.csv', index=False)
# 76.22987
cat.get_all_params()['learning_rate']

0.029999999329447743

In [23]:
cat=CatBoostClassifier(logging_level='Silent', iterations=800, l2_leaf_reg=3.0, depth=16, grow_policy="Lossguide")
cat.fit(x_train, y1d)
y_test=pd.DataFrame(cat.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('cat7.csv', index=False)
# 76.67810--
cat.get_all_params()['learning_rate']

0.029999999329447743

In [24]:
cat=CatBoostClassifier(logging_level='Silent', iterations=800, l2_leaf_reg=3.0, depth=10, grow_policy="Lossguide")
cat.fit(x_train, y1d)
y_test=pd.DataFrame(cat.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('cat8.csv', index=False)
# 76.13747
cat.get_all_params()['learning_rate']

0.029999999329447743

In [25]:
cat=CatBoostClassifier(logging_level='Silent', iterations=500, l2_leaf_reg=3.0, depth=16, grow_policy="Lossguide")
cat.fit(x_train, y1d)
y_test=pd.DataFrame(cat.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('cat9.csv', index=False)
# 76.47830
cat.get_all_params()['learning_rate']

0.029999999329447743

In [26]:
cat=CatBoostClassifier(logging_level='Silent', iterations=500, l2_leaf_reg=3.0, depth=10, grow_policy="Lossguide")
cat.fit(x_train, y1d)
y_test=pd.DataFrame(cat.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('cat10.csv', index=False)
# 76.33930
cat.get_all_params()['learning_rate']

0.029999999329447743

In [27]:
cat=CatBoostClassifier(logging_level='Silent', iterations=1500, l2_leaf_reg=3.0, depth=16, grow_policy="Lossguide")
cat.fit(x_train, y1d)
y_test=pd.DataFrame(cat.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('cat11.csv', index=False)
# forgot to recode 76.10
cat.get_all_params()['learning_rate']

0.029999999329447743

In [28]:
cat=CatBoostClassifier(learning_rate=0.1, logging_level='Silent', iterations=500, l2_leaf_reg=3.0, depth=16, grow_policy="Lossguide")
cat.fit(x_train, y1d)
y_test=pd.DataFrame(cat.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('cat12.csv', index=False)
# 76.31254
cat.get_all_params()['learning_rate']

0.10000000149011612

In [29]:
cat=CatBoostClassifier(learning_rate=0.05, logging_level='Silent', iterations=500, l2_leaf_reg=3.0, depth=16, grow_policy="Lossguide")
cat.fit(x_train, y1d)
y_test=pd.DataFrame(cat.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('cat13.csv', index=False)
# 76.25858
cat.get_all_params()['learning_rate']

0.05000000074505806

In [30]:
cat=CatBoostClassifier(learning_rate=0.1, logging_level='Silent', iterations=1500, l2_leaf_reg=3.0, depth=16, grow_policy="Lossguide")
cat.fit(x_train, y1d)
y_test=pd.DataFrame(cat.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('cat14.csv', index=False)
# 76.11609
cat.get_all_params()['learning_rate']

0.10000000149011612

In [31]:
cat=CatBoostClassifier(learning_rate=0.05, logging_level='Silent', iterations=1500, l2_leaf_reg=3.0, depth=16, grow_policy="Lossguide")
cat.fit(x_train, y1d)
y_test=pd.DataFrame(cat.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('cat15.csv', index=False)
# 75.89816
cat.get_all_params()['learning_rate']

0.05000000074505806

In [32]:
cat=CatBoostClassifier(learning_rate=0.2, logging_level='Silent', iterations=1500, l2_leaf_reg=3.0, depth=16, grow_policy="Lossguide")
cat.fit(x_train, y1d)
y_test=pd.DataFrame(cat.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('cat16.csv', index=False)
# 76.31995
cat.get_all_params()['learning_rate']

0.20000000298023224

In [33]:
cat=CatBoostClassifier(learning_rate=0.2, logging_level='Silent', iterations=500, l2_leaf_reg=3.0, depth=16, grow_policy="Lossguide")
cat.fit(x_train, y1d)
y_test=pd.DataFrame(cat.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('cat17.csv', index=False)
# 76.19633
cat.get_all_params()['learning_rate']

0.20000000298023224

# LGB

In [None]:
lgb=LGBMClassifier()
lgb.fit(x_train, y1d)
y_test=pd.DataFrame(lgb.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('lgb.csv', index=False)
# 76.05823

In [None]:
lgb=LGBMClassifier()
lgb.fit(x_train, yabs)
y_test=pd.DataFrame(lgb.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('lgb.csv', index=False)
# 76.05823

# VC

In [None]:
# max_depth=20, max_features=15, min_samples_split=5, n_estimators=100 76.56 rf
# gamma=1, learning_rate=0.2, max_depth=6, min_child_weight=5, reg_lambda=1, sampling_method='uniform' 76.50 xg
# learning_rate=0.1, n_estimators=300, min_samples_split=2, min_samples_leaf=1, max_depth=3 76.61 gb
# learning_rate=0.1, logging_level='Silent', iterations=1500, l2_leaf_reg=3.0, depth=16, grow_policy="Lossguide" 76.51 cat


In [34]:
estimators=[('RF', RandomForestClassifier()), ('XGB', XGBClassifier()), ('LGB', LGBMClassifier()), ('GBR', GradientBoostingClassifier()), ('CAT', CatBoostClassifier(logging_level='Silent'))]
vc=VotingClassifier(estimators=estimators)
vc.fit(x_train, y1d)
y_test=pd.DataFrame(vc.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('vc.csv', index=False)
# 76.33831

In [35]:
estimators=[('XGB', XGBClassifier()), ('LGB', LGBMClassifier()), ('GBR', GradientBoostingClassifier())]
vc=VotingClassifier(estimators=estimators)
vc.fit(x_train, y1d)
y_test=pd.DataFrame(vc.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('vc1.csv', index=False)
# 76.37648

In [36]:
estimators=[('XGB', XGBClassifier()), ('LGB', LGBMClassifier()), ('GBR', GradientBoostingClassifier()), ('CAT', CatBoostClassifier(logging_level='Silent'))]
vc=VotingClassifier(estimators=estimators)
vc.fit(x_train, y1d)
y_test=pd.DataFrame(vc.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('vc2.csv', index=False)
# 76.38160

In [37]:
estimators=[('RF', RandomForestClassifier(max_depth=20, max_features=15, min_samples_split=5, n_estimators=100)), 
            ('XGB', XGBClassifier(gamma=1, learning_rate=0.2, max_depth=6, min_child_weight=5, reg_lambda=1, sampling_method='uniform')), 
            ('LGB', LGBMClassifier()), 
            ('GBR', GradientBoostingClassifier(learning_rate=0.1, n_estimators=300, min_samples_split=2, min_samples_leaf=1, max_depth=3)), 
            ('CAT', CatBoostClassifier(learning_rate=0.1, logging_level='Silent', iterations=1500, l2_leaf_reg=3.0, depth=16, grow_policy="Lossguide"))]
vc=VotingClassifier(estimators=estimators)
vc.fit(x_train, y1d)
y_test=pd.DataFrame(vc.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('vc3.csv', index=False)
# 76.35617

In [38]:
estimators=[('RF', RandomForestClassifier(max_depth=20, max_features=15, min_samples_split=5, n_estimators=100)), 
            ('XGB', XGBClassifier(gamma=1, learning_rate=0.2, max_depth=6, min_child_weight=5, reg_lambda=1, sampling_method='uniform')), 
            ('GBR', GradientBoostingClassifier(learning_rate=0.1, n_estimators=300, min_samples_split=2, min_samples_leaf=1, max_depth=3)), 
            ('CAT', CatBoostClassifier(learning_rate=0.1, logging_level='Silent', iterations=1500, l2_leaf_reg=3.0, depth=16, grow_policy="Lossguide"))]
vc=VotingClassifier(estimators=estimators)
vc.fit(x_train, y1d)
y_test=pd.DataFrame(vc.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('vc4.csv', index=False)
# 76.45142

In [39]:
estimators=[('RF', RandomForestClassifier(max_depth=20, max_features=15, min_samples_split=5, n_estimators=100)), 
            ('GBR', GradientBoostingClassifier(learning_rate=0.1, n_estimators=300, min_samples_split=2, min_samples_leaf=1, max_depth=3)), 
            ('CAT', CatBoostClassifier(learning_rate=0.1, logging_level='Silent', iterations=1500, l2_leaf_reg=3.0, depth=16, grow_policy="Lossguide"))]
vc=VotingClassifier(estimators=estimators)
vc.fit(x_train, y1d)
y_test=pd.DataFrame(vc.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('vc5.csv', index=False)
# 76.45855

In [40]:
estimators=[('XGB', XGBClassifier(gamma=1, learning_rate=0.2, max_depth=6, min_child_weight=5, reg_lambda=1, sampling_method='uniform')), 
            ('GBR', GradientBoostingClassifier(learning_rate=0.1, n_estimators=300, min_samples_split=2, min_samples_leaf=1, max_depth=3)), 
            ('CAT', CatBoostClassifier(learning_rate=0.1, logging_level='Silent', iterations=1500, l2_leaf_reg=3.0, depth=16, grow_policy="Lossguide"))]
vc=VotingClassifier(estimators=estimators)
vc.fit(x_train, y1d)
y_test=pd.DataFrame(vc.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('vc6.csv', index=False)
# 76.41566

# ST

In [None]:
# max_depth=20, max_features=15, min_samples_split=5, n_estimators=100 76.56 rf
# gamma=1, learning_rate=0.2, max_depth=6, min_child_weight=5, reg_lambda=1, sampling_method='uniform' 76.50 xg
# learning_rate=0.1, n_estimators=300, min_samples_split=2, min_samples_leaf=1, max_depth=3 76.61 gb
# learning_rate=0.1, logging_level='Silent', iterations=1500, l2_leaf_reg=3.0, depth=16, grow_policy="Lossguide" 76.51 cat


In [43]:
estimators=[('XGB', XGBClassifier()), ('LGB', LGBMClassifier()), ('GBR', GradientBoostingClassifier()), ('CAT', CatBoostClassifier(logging_level='Silent'))]
st=StackingClassifier(estimators=estimators, final_estimator=CatBoostClassifier(logging_level='Silent'))
st.fit(x_train, y1d)
y_test=pd.DataFrame(st.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('st.csv', index=False)
# 76.22705

  y = column_or_1d(y, warn=True)


In [44]:
estimators=[('XGB', XGBClassifier(gamma=1, learning_rate=0.2, max_depth=6, min_child_weight=5, reg_lambda=1, sampling_method='uniform')), 
            ('LGB', LGBMClassifier()), 
            ('GBR', GradientBoostingClassifier(learning_rate=0.1, n_estimators=300, min_samples_split=2, min_samples_leaf=1, max_depth=3)), 
            ('CAT', CatBoostClassifier(learning_rate=0.1, logging_level='Silent', iterations=1500, l2_leaf_reg=3.0, depth=16, grow_policy="Lossguide"))]
st=StackingClassifier(estimators=estimators, final_estimator=CatBoostClassifier(logging_level='Silent'))
st.fit(x_train, y1d)
y_test=pd.DataFrame(st.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('st1.csv', index=False)
# 75.71718

  y = column_or_1d(y, warn=True)


In [45]:
estimators=[('XGB', XGBClassifier(gamma=1, learning_rate=0.2, max_depth=6, min_child_weight=5, reg_lambda=1, sampling_method='uniform')), 
            ('LGB', LGBMClassifier()), 
            ('GBR', GradientBoostingClassifier(learning_rate=0.1, n_estimators=300, min_samples_split=2, min_samples_leaf=1, max_depth=3)), 
            ('CAT', CatBoostClassifier(learning_rate=0.1, logging_level='Silent', iterations=1500, l2_leaf_reg=3.0, depth=16, grow_policy="Lossguide"))]
st=StackingClassifier(estimators=estimators, final_estimator=CatBoostClassifier(learning_rate=0.1, logging_level='Silent', iterations=1500, l2_leaf_reg=3.0, depth=16, grow_policy="Lossguide"))
st.fit(x_train, y1d)
y_test=pd.DataFrame(st.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('st2.csv', index=False)
# 75.66086

  y = column_or_1d(y, warn=True)


In [46]:
estimators=[('XGB', XGBClassifier(gamma=1, learning_rate=0.2, max_depth=6, min_child_weight=5, reg_lambda=1, sampling_method='uniform')),  
            ('GBR', GradientBoostingClassifier(learning_rate=0.1, n_estimators=300, min_samples_split=2, min_samples_leaf=1, max_depth=3)), 
            ('CAT', CatBoostClassifier(learning_rate=0.1, logging_level='Silent', iterations=1500, l2_leaf_reg=3.0, depth=16, grow_policy="Lossguide"))]
st=StackingClassifier(estimators=estimators, final_estimator=CatBoostClassifier(learning_rate=0.1, logging_level='Silent', iterations=1500, l2_leaf_reg=3.0, depth=16, grow_policy="Lossguide"))
st.fit(x_train, y1d)
y_test=pd.DataFrame(st.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('st3.csv', index=False)
# 75.79892

  y = column_or_1d(y, warn=True)


In [47]:
estimators=[('RF', RandomForestClassifier(max_depth=20, max_features=15, min_samples_split=5, n_estimators=100)),
            ('XGB', XGBClassifier(gamma=1, learning_rate=0.2, max_depth=6, min_child_weight=5, reg_lambda=1, sampling_method='uniform')), 
            ('LGB', LGBMClassifier()), 
            ('GBR', GradientBoostingClassifier(learning_rate=0.1, n_estimators=300, min_samples_split=2, min_samples_leaf=1, max_depth=3)), 
            ('CAT', CatBoostClassifier(learning_rate=0.1, logging_level='Silent', iterations=1500, l2_leaf_reg=3.0, depth=16, grow_policy="Lossguide"))]
st=StackingClassifier(estimators=estimators, final_estimator=CatBoostClassifier(learning_rate=0.1, logging_level='Silent', iterations=1500, l2_leaf_reg=3.0, depth=16, grow_policy="Lossguide"))
st.fit(x_train, y1d)
y_test=pd.DataFrame(st.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('st4.csv', index=False)
# 76.69668

  y = column_or_1d(y, warn=True)


In [48]:
estimators=[('RF', RandomForestClassifier(max_depth=20, max_features=15, min_samples_split=5, n_estimators=100)),
            ('XGB', XGBClassifier(gamma=1, learning_rate=0.2, max_depth=6, min_child_weight=5, reg_lambda=1, sampling_method='uniform')), 
            ('LGB', LGBMClassifier()), 
            ('GBR', GradientBoostingClassifier(learning_rate=0.1, n_estimators=300, min_samples_split=2, min_samples_leaf=1, max_depth=3)), 
            ('CAT', CatBoostClassifier(learning_rate=0.1, logging_level='Silent', iterations=1500, l2_leaf_reg=3.0, depth=16, grow_policy="Lossguide"))]
st=StackingClassifier(estimators=estimators, final_estimator=CatBoostClassifier(logging_level='Silent'))
st.fit(x_train, y1d)
y_test=pd.DataFrame(st.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('st5.csv', index=False)
# 75.52253

  y = column_or_1d(y, warn=True)


In [49]:
estimators=[('XGB', XGBClassifier(gamma=1, learning_rate=0.2, max_depth=6, min_child_weight=5, reg_lambda=1, sampling_method='uniform')), 
            ('LGB', LGBMClassifier()), 
            ('GBR', GradientBoostingClassifier(learning_rate=0.1, n_estimators=300, min_samples_split=2, min_samples_leaf=1, max_depth=3)), 
            ('CAT', CatBoostClassifier(learning_rate=0.1, logging_level='Silent', iterations=1500, l2_leaf_reg=3.0, depth=16, grow_policy="Lossguide"))]
st=StackingClassifier(estimators=estimators, final_estimator=CatBoostClassifier(learning_rate=0.1, logging_level='Silent', iterations=1500, l2_leaf_reg=3.0, depth=16, grow_policy="Lossguide"))
st.fit(x_train, y1d)
y_test=pd.DataFrame(st.predict(x_test))
fin=pd.concat([test['customer_id'], y_test], axis=1)
fin.columns=['customer_id', 'churn_risk_score']
fin.to_csv('st2.csv', index=False)
# 75.66086

  y = column_or_1d(y, warn=True)
