In [1]:
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.simplefilter(action='ignore')
from sklearn.model_selection import train_test_split
import lightgbm as lgb
import catboost as cb
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import KFold, cross_val_score,RandomizedSearchCV
import xgboost as xgb
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score,accuracy_score
from sklearn.preprocessing import StandardScaler

In [2]:
np.random.seed(1)

In [3]:
def sigmoid(x):
     return 1 / (1 + np.exp(-x))

In [4]:
def ReLU(x):
    return x  *  (x  >  0)

In [5]:
def sigmoid_deriv(x):
    return sigmoid(x)*(1 - sigmoid(x))

In [6]:
def ReLU_deriv(x):
        return (x>0).astype(np.int0)

In [7]:
# преобразование массива в бинарный вид результатов
def to_one_hot(Y):
    n_col = np.amax(Y) 
    n = len(Y)
    binarized = np.zeros((n, n_col))
    for i in range(n):
        binarized[i, Y[i]-1] = 1.
    return binarized

In [8]:
# преобразование массива в необходимый вид
def from_one_hot(layer):
    arr = np.argmax(layer,axis=1)+1
    return arr.reshape(-1,1)

In [9]:
def normalize(X, axis=-1, order=2):
    l2 = np.atleast_1d(np.linalg.norm(X, order, axis))
    l2[l2 == 0] = 1
    return X / np.expand_dims(l2, axis)

In [10]:
def replace_target(df,name):
    df_1 = df.copy()
    d = { j:(i+1) for i,j in enumerate(iris_data['Species'].unique())}
    df_1[name] = df_1[name].map(d)
    return df_1
    
    

In [11]:
def make_cross_validation(X: pd.DataFrame,
                          y: pd.Series,
                          estimator: object,
                          metric: callable,
                          cv_strategy,print_is = True):
   
    
    estimators, fold_train_scores, fold_valid_scores = [], [], []
    oof_predictions = np.zeros(X.shape[0])

    for fold_number, (train_idx, valid_idx) in enumerate(cv_strategy.split(X, y)):
        x_train, x_valid = X.iloc[train_idx], X.iloc[valid_idx]
        y_train, y_valid = y.iloc[train_idx], y.iloc[valid_idx]

        estimator.fit(x_train, y_train) 

        y_valid_pred = estimator.predict_proba(x_valid)[:,1]
        y_train_pred = estimator.predict_proba(x_train)[:,1]
        
        
        fold_train_scores.append(metric(y_train, y_train_pred))
        fold_valid_scores.append(metric(y_valid, y_valid_pred))
        oof_predictions[valid_idx] = y_valid_pred

        msg = (
            f"Fold: {fold_number+1}, train-observations = {len(train_idx)}, "
            f"valid-observations = {len(valid_idx)}\n"
            f"train-score = {round(fold_train_scores[fold_number], 4)}, "
            f"valid-score = {round(fold_valid_scores[fold_number], 4)}" 
        )
        if print_is:
            print(msg)
            print("="*69)
        estimators.append(estimator)
    oof_score = metric(y, oof_predictions)
    if print_is:
        print(f"CV-results train: {round(np.mean(fold_train_scores), 4)} +/- {round(np.std(fold_train_scores), 3)}")
        print(f"CV-results valid: {round(np.mean(fold_valid_scores), 4)} +/- {round(np.std(fold_valid_scores), 3)}")
        print(f"OOF-score = {round(oof_score, 4)}")

    return estimators, oof_score, fold_train_scores, fold_valid_scores, oof_predictions  

In [12]:
def make_cross_validation_classif_multi_n(X: pd.DataFrame,
                          y: pd.Series,
                          estimator: object,
                          metric: callable,
                          cv_strategy,print_is = True):
   
    
    estimators, fold_train_scores, fold_valid_scores = [], [], []
    oof_predictions = np.zeros((X.shape[0],y.shape[1]))   

    for fold_number, (train_idx, valid_idx) in enumerate(cv_strategy.split(X, y)):
        x_train, x_valid = X[train_idx], X[valid_idx]
        y_train, y_valid = y[train_idx], y[valid_idx]

        estimator.fit(x_train, y_train) 

        y_valid_pred = estimator.predict_proba(x_valid)
        y_train_pred = estimator.predict_proba(x_train)
        
        
        fold_train_scores.append(metric(y_train, y_train_pred,multi_class = 'ovr'))
        fold_valid_scores.append(metric(y_valid, y_valid_pred,multi_class = 'ovr'))
        oof_predictions[valid_idx] = y_valid_pred

        msg = (
            f"Fold: {fold_number+1}, train-observations = {len(train_idx)}, "
            f"valid-observations = {len(valid_idx)}\n"
            f"train-score = {round(fold_train_scores[fold_number], 4)}, "
            f"valid-score = {round(fold_valid_scores[fold_number], 4)}" 
        )
        if print_is:
            print(msg)
            print("="*69)
        estimators.append(estimator)
    oof_score = metric(y, oof_predictions,multi_class = 'ovr')
    if print_is:
        print(f"CV-results train: {round(np.mean(fold_train_scores), 4)} +/- {round(np.std(fold_train_scores), 3)}")
        print(f"CV-results valid: {round(np.mean(fold_valid_scores), 4)} +/- {round(np.std(fold_valid_scores), 3)}")
        print(f"OOF-score = {round(oof_score, 4)}")

    return estimators, oof_score, fold_train_scores, fold_valid_scores, oof_predictions  

In [13]:
def make_cross_validation_classif_multi(X: pd.DataFrame,
                          y: pd.Series,
                          estimator: object,
                          metric: callable,
                          cv_strategy,print_is = True):
   
    
    estimators, fold_train_scores, fold_valid_scores = [], [], []
    oof_predictions = np.zeros((X.shape[0],y.nunique())) 

    for fold_number, (train_idx, valid_idx) in enumerate(cv_strategy.split(X, y)):
        x_train, x_valid = X.iloc[train_idx], X.iloc[valid_idx]
        y_train, y_valid = y.iloc[train_idx], y.iloc[valid_idx]

        estimator.fit(x_train, y_train) 

        y_valid_pred = estimator.predict_proba(x_valid)
        y_train_pred = estimator.predict_proba(x_train)
        
        
        fold_train_scores.append(metric(y_train, y_train_pred,multi_class = 'ovr'))
        fold_valid_scores.append(metric(y_valid, y_valid_pred,multi_class = 'ovr'))
        oof_predictions[valid_idx] = y_valid_pred

        msg = (
            f"Fold: {fold_number+1}, train-observations = {len(train_idx)}, "
            f"valid-observations = {len(valid_idx)}\n"
            f"train-score = {round(fold_train_scores[fold_number], 4)}, "
            f"valid-score = {round(fold_valid_scores[fold_number], 4)}" 
        )
        if print_is:
            print(msg)
            print("="*69)
        estimators.append(estimator)
    oof_score = metric(y, oof_predictions,multi_class = 'ovr')
    if print_is:
        print(f"CV-results train: {round(np.mean(fold_train_scores), 4)} +/- {round(np.std(fold_train_scores), 3)}")
        print(f"CV-results valid: {round(np.mean(fold_valid_scores), 4)} +/- {round(np.std(fold_valid_scores), 3)}")
        print(f"OOF-score = {round(oof_score, 4)}")

    return estimators, oof_score, fold_train_scores, fold_valid_scores, oof_predictions  

In [14]:
def serch_best_params(param_grid,estimator,metric,cv_strategy,x_train,y_train,res_func=make_cross_validation,k=10):    
    param_name =list(param_grid.keys())
    n = len(param_grid[param_name[0]])
    list_params=[]
    list_res=[]

    for j in range(n):
        qu=0
        for i in range(k*n):
            work_param ={p:np.random.choice(param_grid[p]) for p in param_name}
            if work_param in list_params:
                continue 
            list_res.append(res_func(x_train,y_train,\
                        estimator.set_params(**work_param),metric,cv_strategy,print_is = False)[1])    
            list_params.append(work_param)
            qu+=1
        if qu==0:
            continue
        min_param = list_params[list_res.index(min(list_res[-qu:]))]
        for min_key in min_param:
            param_grid[min_key].remove(min_param[min_key])
            
    max_res= max(list_res)
    best_params = list_params[list_res.index(max_res)]
    return max_res,best_params

In [15]:
class Neural_network_hide_1:
    def __init__(self,eta,num_neurons,num_iter,activ_func,start):
        self.base = start
        self.eta = eta
        self.num_neurons = num_neurons
        self.num_iter = num_iter
        self.activ_func,self.deriv_activ_func = activ_func
     
    def fit(self,X,y):
        self.w0 = np.zeros((X.shape[1], self.num_neurons))
        self.w0 = self.w0 + self.base
        self.w1 = np.zeros((self.num_neurons, y.shape[1]))
        self.w1 = self.w1 +self.base
        
        for i in range(self.num_iter): 
            layer0,layer1,layer2 = self.direct(X)
            self.opposite(y,layer0,layer1,layer2)

    def direct(self,X):
        layer0 = X
        layer1 = self.activ_func(np.dot(layer0, self.w0))
        layer2 = self.activ_func(np.dot(layer1, self.w1))
        return layer0,layer1,layer2
    
    def opposite(self,y,layer0,layer1,layer2):
        layer2_error = y - layer2
        layer2_delta = layer2_error * self.deriv_activ_func(layer2)

        layer1_error = layer2_delta.dot(self.w1.T)
        layer1_delta = layer1_error * self.deriv_activ_func(layer1)

        self.w1 += layer1.T.dot(layer2_delta) * self.eta
        self.w0 += layer0.T.dot(layer1_delta) * self.eta
    
    def  predict_proba(self,X):
        layer0,layer1,layer2 = self.direct(X)
        sum_labals = (np.sum(layer2,axis=1))
        sum_labals[sum_labals==0] = 1 
        sum_labals = sum_labals.reshape(-1,1)              
        return layer2/sum_labals
        
        
    
    
    
        
    

In [16]:
def enc_target(train,feature,target):
    d = train[train[target]==1].groupby(feature).size()/len(train)*100
    return d

In [17]:
cv_strategy = KFold(n_splits=5, shuffle=True, random_state=100)

In [18]:
param_grid_lgbm   = {'n_estimators':[50,100,200,300,400,500,700,800,600,1000],'reg_alpha': [0.5,1,1.5,2,3,4,5,8,10,20],
               'reg_lambda': [0.5,1,20,50,100,150,300,500,550,600],
                'max_depth': [2,3,4,5,6,7,8,9,10,12],'min_child_samples':[5,10,15,20,25,30,35,40,45,50],
               'num_leaves':[5,10,25,30,35,40,45,50,55,60]}

param_grid_cb   = {'n_estimators':[50,100,200,300,400,500,700,800,600,1000],'l2_leaf_reg': [0.5,1,2,5,10,15,20,30,40,50],
                'max_depth': [2,3,4,5,6,7,8,9,10,12],'min_child_samples':[5,10,15,20,25,30,35,40,45,50],
                'max_bin':[5,10,25,30,35,40,45,50,55,60]}
param_grid_rf = {'n_estimators': [50,100,200,300,400,500,700,800,600,1000], 'min_samples_split': [2,4,6,8,10,12,14,16,18,25]
                 , 'min_samples_leaf': [2,4,6,8,10,12,14,16,18,25]}


param_grid_lr ={'C': [0.1,1,1.5,2,2.5,3,4,5,6,7] ,'penalty': ['l1','l2']}

params=[param_grid_lr,
        param_grid_rf,
        param_grid_cb,
        param_grid_lgbm]    

### Посмотрим какие результаты дадут классические модели

In [19]:
estimators= [LogisticRegression(max_iter=1000,solver = 'liblinear'),
             RandomForestClassifier(n_jobs=-1),
             cb.CatBoostClassifier(thread_count=15,early_stopping_rounds=90,verbose=False),
             lgb.LGBMClassifier(n_jobs=-1)]

In [20]:
df = pd.read_csv("train.csv")

In [21]:
df

Unnamed: 0,Home Ownership,Annual Income,Years in current job,Tax Liens,Number of Open Accounts,Years of Credit History,Maximum Open Credit,Number of Credit Problems,Months since last delinquent,Bankruptcies,Purpose,Term,Current Loan Amount,Current Credit Balance,Monthly Debt,Credit Score,Credit Default
0,Own Home,482087.0,,0.0,11.0,26.3,685960.0,1.0,,1.0,debt consolidation,Short Term,99999999.0,47386.0,7914.0,749.0,0
1,Own Home,1025487.0,10+ years,0.0,15.0,15.3,1181730.0,0.0,,0.0,debt consolidation,Long Term,264968.0,394972.0,18373.0,737.0,1
2,Home Mortgage,751412.0,8 years,0.0,11.0,35.0,1182434.0,0.0,,0.0,debt consolidation,Short Term,99999999.0,308389.0,13651.0,742.0,0
3,Own Home,805068.0,6 years,0.0,8.0,22.5,147400.0,1.0,,1.0,debt consolidation,Short Term,121396.0,95855.0,11338.0,694.0,0
4,Rent,776264.0,8 years,0.0,13.0,13.6,385836.0,1.0,,0.0,debt consolidation,Short Term,125840.0,93309.0,7180.0,719.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7495,Rent,402192.0,< 1 year,0.0,3.0,8.5,107866.0,0.0,,0.0,other,Short Term,129360.0,73492.0,1900.0,697.0,0
7496,Home Mortgage,1533984.0,1 year,0.0,10.0,26.5,686312.0,0.0,43.0,0.0,debt consolidation,Long Term,444048.0,456399.0,12783.0,7410.0,1
7497,Rent,1878910.0,6 years,0.0,12.0,32.1,1778920.0,0.0,,0.0,buy a car,Short Term,99999999.0,477812.0,12479.0,748.0,0
7498,Home Mortgage,,,0.0,21.0,26.5,1141250.0,0.0,,0.0,debt consolidation,Short Term,615274.0,476064.0,37118.0,,0


In [22]:
# Обработаем данные

df_work = df.copy()

d={}
for i in df_work['Years in current job'].value_counts().index:
    if i[:2]=='10':
        d[i]=10
        continue
    if i[0]=='<':
        d[i]=0
        continue    
df_work['Years in current job'] = df_work['Years in current job'].map(d) 


df_work.loc[df_work['Maximum Open Credit']>7000000,'Maximum Open Credit']=7000000

df_work.fillna(999,inplace=True)

target_col = ['Credit Default']
categorical_features=['Home Ownership','Tax Liens','Purpose','Term']
discrete_feature = ['Years in current job','Number of Open Accounts',\
                    'Years of Credit History','Number of Credit Problems',\
                   'Months since last delinquent','Bankruptcies','Credit Score']
continuous_feature = ['Annual Income','Maximum Open Credit','Current Loan Amount',\
                      'Current Credit Balance','Monthly Debt']

target = 'Credit Default'

for i in categorical_features:
    d = enc_target(df_work,i,target)
    df_work[i] = df_work[i].map(d)
    df_work[i] = df_work[[i]].fillna(0)

    
X_train =  df_work.drop('Credit Default',1 )
y_train = df_work[target]
y_train= y_train.map({0:1,1:2})

for i in categorical_features:
    X_train=pd.concat([X_train,pd.get_dummies(X_train[i],prefix=i)],axis=1)
    X_train.drop(i,1,inplace=True)
    
for i in continuous_feature + discrete_feature:
    X_train[i]=StandardScaler().fit_transform(X_train[[i]])    
    


In [23]:
X_train

Unnamed: 0,Annual Income,Years in current job,Number of Open Accounts,Years of Credit History,Maximum Open Credit,Number of Credit Problems,Months since last delinquent,Bankruptcies,Current Loan Amount,Current Credit Balance,...,Purpose_0.13333333333333333,Purpose_0.18666666666666668,Purpose_0.29333333333333333,Purpose_0.32,Purpose_0.7333333333333333,Purpose_1.44,Purpose_2.626666666666667,Purpose_22.053333333333335,Term_10.879999999999999,Term_17.293333333333333
0,-0.643131,0.792873,-0.026674,1.133645,0.039563,1.664779,0.914881,-0.022769,2.760520,-0.762772,...,0,0,0,0,0,0,0,1,0,1
1,-0.061492,-1.257172,0.788223,-0.428528,0.726742,-0.340979,0.914881,-0.045961,-0.363620,0.330781,...,0,0,0,0,0,0,0,1,1,0
2,-0.354854,0.792873,-0.026674,2.369181,0.727718,-0.340979,0.914881,-0.045961,2.760520,0.058379,...,0,0,0,0,0,0,0,1,0,1
3,-0.297422,0.792873,-0.637847,0.593985,-0.706926,1.664779,0.914881,-0.022769,-0.368118,-0.610282,...,0,0,0,0,0,0,0,1,0,1
4,-0.328253,0.792873,0.380774,-0.669954,-0.376434,1.664779,0.914881,-0.045961,-0.367978,-0.618292,...,0,0,0,0,0,0,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7495,-0.728649,-1.277900,-1.656468,-1.394234,-0.761724,-0.340979,0.914881,-0.045961,-0.367868,-0.680639,...,0,0,0,0,0,0,1,0,0,1
7496,0.482788,0.792873,-0.230398,1.162048,0.040051,-0.340979,-1.074734,-0.045961,-0.358011,0.524039,...,0,0,0,0,0,0,0,1,1,0
7497,0.851987,0.792873,0.177050,1.957336,1.554498,-0.340979,0.914881,-0.045961,2.760520,0.591407,...,0,0,0,1,0,0,0,0,0,1
7498,-1.158074,0.792873,2.010568,1.162048,0.670634,-0.340979,0.914881,-0.045961,-0.352647,0.585907,...,0,0,0,0,0,0,0,1,0,1


In [24]:
feath =np.array(X_train.columns)

In [25]:
feath

array(['Annual Income', 'Years in current job', 'Number of Open Accounts',
       'Years of Credit History', 'Maximum Open Credit',
       'Number of Credit Problems', 'Months since last delinquent',
       'Bankruptcies', 'Current Loan Amount', 'Current Credit Balance',
       'Monthly Debt', 'Credit Score',
       'Home Ownership_0.02666666666666667', 'Home Ownership_2.56',
       'Home Ownership_12.213333333333333',
       'Home Ownership_13.373333333333335', 'Tax Liens_0.0',
       'Tax Liens_0.013333333333333334', 'Tax Liens_0.04',
       'Tax Liens_0.06666666666666667', 'Tax Liens_0.2', 'Tax Liens_0.32',
       'Tax Liens_27.53333333333333', 'Purpose_0.013333333333333334',
       'Purpose_0.02666666666666667', 'Purpose_0.05333333333333334',
       'Purpose_0.12', 'Purpose_0.13333333333333333',
       'Purpose_0.18666666666666668', 'Purpose_0.29333333333333333',
       'Purpose_0.32', 'Purpose_0.7333333333333333', 'Purpose_1.44',
       'Purpose_2.626666666666667', 'Purpose_22.053

In [26]:
X= normalize(X_train[feath].to_numpy())

In [27]:
X

array([[-0.14766609,  0.18204755, -0.00612455, ...,  0.22960483,
         0.        ,  0.22960483],
       [-0.02149227, -0.43939861,  0.27549452, ...,  0.34951364,
         0.34951364,  0.        ],
       [-0.07995004,  0.17863765, -0.00600983, ...,  0.22530413,
         0.        ,  0.22530413],
       ...,
       [ 0.18651446,  0.17357344,  0.03875923, ...,  0.        ,
         0.        ,  0.21891698],
       [-0.2920079 ,  0.19992269,  0.50696384, ...,  0.25214959,
         0.        ,  0.25214959],
       [-0.37102112,  0.25401896, -0.20435194, ...,  0.32037772,
         0.        ,  0.32037772]])

In [28]:
Y= to_one_hot(y_train.to_numpy())

In [29]:
Y

array([[1., 0.],
       [0., 1.],
       [1., 0.],
       ...,
       [1., 0.],
       [1., 0.],
       [1., 0.]])

In [30]:
ml_result=pd.DataFrame(columns=['model','params','result'])

In [31]:
%%time
for i,estimator in enumerate(estimators):
    ml_result.loc[i,'model'] = str(estimator)
    model=RandomizedSearchCV(estimator=estimator,cv=cv_strategy,\
        param_distributions = params[i],n_jobs = -1,n_iter = 300).fit(X_train,y_train)
    best_params = model.best_params_
    max_res = make_cross_validation(X_train,\
                        y_train,model.best_estimator_,roc_auc_score,cv_strategy=cv_strategy)[1]
    ml_result.loc[i,'result'] = max_res
    ml_result.loc[i,'params'] = str(best_params)
    print(ml_result)
    
    

Fold: 1, train-observations = 6000, valid-observations = 1500
train-score = 0.7673, valid-score = 0.7654
Fold: 2, train-observations = 6000, valid-observations = 1500
train-score = 0.7683, valid-score = 0.7641
Fold: 3, train-observations = 6000, valid-observations = 1500
train-score = 0.7695, valid-score = 0.7623
Fold: 4, train-observations = 6000, valid-observations = 1500
train-score = 0.7676, valid-score = 0.7682
Fold: 5, train-observations = 6000, valid-observations = 1500
train-score = 0.7709, valid-score = 0.7576
CV-results train: 0.7687 +/- 0.001
CV-results valid: 0.7635 +/- 0.004
OOF-score = 0.7633
                                               model  \
0  LogisticRegression(max_iter=1000, solver='libl...   

                        params    result  
0  {'penalty': 'l2', 'C': 0.1}  0.763265  
Fold: 1, train-observations = 6000, valid-observations = 1500
train-score = 0.969, valid-score = 0.7689
Fold: 2, train-observations = 6000, valid-observations = 1500
train-score = 0.9675,

In [32]:
ml_result.sort_values('result',ascending=False,inplace=True)
ml_result

Unnamed: 0,model,params,result
2,<catboost.core.CatBoostClassifier object at 0x...,"{'n_estimators': 600, 'min_child_samples': 25,...",0.774363
3,LGBMClassifier(),"{'reg_lambda': 1, 'reg_alpha': 10, 'num_leaves...",0.771849
1,RandomForestClassifier(n_jobs=-1),"{'n_estimators': 800, 'min_samples_split': 2, ...",0.766409
0,"LogisticRegression(max_iter=1000, solver='libl...","{'penalty': 'l2', 'C': 0.1}",0.763265


### Решение задачи с помощью нейронной сети

In [33]:
params_grig1={'eta' : [0.001,0.01,0.1],
             'num_neurons' :[3, 5,10,15,25],
             'num_iter' : [1000,5000,15000],
             'activ_func' : (sigmoid,sigmoid_deriv),
             'start' : 0.3}
params_grig2={'eta' : '',
             'num_neurons' :'',
             'num_iter' : '',
             'activ_func' : (sigmoid,sigmoid_deriv),
             'start' : [0,0.1,-0.1,-0.3,0.5,0.7,1,2,5,-2]}
params_grig3={'eta' : [0.0005,0.001,0.01],
             'num_neurons' :[3, 5,10,15,25],
             'num_iter' : [1000,5000,15000],
             'activ_func' : (ReLU,ReLU_deriv),
             'start' : 0.3}
params_grig4={'eta' : '',
             'num_neurons' :'',
             'num_iter' : '',
             'activ_func' : (ReLU,ReLU_deriv),
             'start' : [0,0.1,-0.1,-0.3,0.5,0.7,1,2,5,-2]}


In [34]:
%%time
res_sig = pd.DataFrame(columns=['eta','num_neurons','num_iter','activ_func','start','roc_auc'])
my_params={}
for eta  in params_grig1['eta']:
    for num_neurons in  params_grig1['num_neurons']:
        for num_iter in  params_grig1['num_iter']:
            my_params = {'eta' : eta,
                         'num_neurons' :num_neurons,
                         'num_iter' : num_iter,
                         'activ_func' : params_grig1['activ_func'],
                         'start' : params_grig1['start']}
            my_net = Neural_network_hide_1(**my_params) 
            my_params['roc_auc']  = make_cross_validation_classif_multi_n(X, Y,my_net,roc_auc_score,cv_strategy,print_is =False)[1]
            my_params['activ_func'] = str(params_grig1['activ_func'])[10:18]            
            res_sig = res_sig.append(my_params,ignore_index=True)               
                    
        

Wall time: 1h 16min 11s


In [35]:
res_sig.sort_values('roc_auc',ascending=False,inplace=True)
res_sig

Unnamed: 0,eta,num_neurons,num_iter,activ_func,start,roc_auc
13,0.001,25,5000,sigmoid,0.3,0.748486
0,0.001,3,1000,sigmoid,0.3,0.747601
10,0.001,15,5000,sigmoid,0.3,0.747328
14,0.001,25,15000,sigmoid,0.3,0.746231
3,0.001,5,1000,sigmoid,0.3,0.746184
7,0.001,10,5000,sigmoid,0.3,0.74533
6,0.001,10,1000,sigmoid,0.3,0.744718
9,0.001,15,1000,sigmoid,0.3,0.740968
11,0.001,15,15000,sigmoid,0.3,0.738624
4,0.001,5,5000,sigmoid,0.3,0.738603


In [36]:
%%time
params_grig2['eta']=res_sig.iloc[0].eta
params_grig2['num_neurons']=res_sig.iloc[0].num_neurons
params_grig2['num_iter']=res_sig.iloc[0].num_iter
my_params={}
for start  in params_grig2['start']:
    my_params = params_grig2.copy()
    my_params['start'] = start
    my_net = Neural_network_hide_1(**my_params) 
    my_params['roc_auc']  = make_cross_validation_classif_multi_n(X, Y,my_net,roc_auc_score,cv_strategy,print_is =False)[1]
    my_params['activ_func'] = str(params_grig1['activ_func'])[10:18]            
    res_sig = res_sig.append(my_params,ignore_index=True)      

Wall time: 24min 39s


In [37]:
res_sig.sort_values('roc_auc',ascending=False,inplace=True)
res_sig.head(20)

Unnamed: 0,eta,num_neurons,num_iter,activ_func,start,roc_auc
49,0.001,25,5000,sigmoid,0.5,0.755193
51,0.001,25,5000,sigmoid,1.0,0.753968
50,0.001,25,5000,sigmoid,0.7,0.751105
0,0.001,25,5000,sigmoid,0.3,0.748486
1,0.001,3,1000,sigmoid,0.3,0.747601
2,0.001,15,5000,sigmoid,0.3,0.747328
46,0.001,25,5000,sigmoid,0.1,0.746843
3,0.001,25,15000,sigmoid,0.3,0.746231
48,0.001,25,5000,sigmoid,-0.3,0.746209
4,0.001,5,1000,sigmoid,0.3,0.746184


In [38]:
%%time
res_relu = pd.DataFrame(columns=['eta','num_neurons','num_iter','activ_func','start','roc_auc'])
my_params={}
for eta  in params_grig3['eta']:
    for num_neurons in  params_grig3['num_neurons']:
        for num_iter in  params_grig3['num_iter']:
            my_params = {'eta' : eta,
                         'num_neurons' :num_neurons,
                         'num_iter' : num_iter,
                         'activ_func' : params_grig3['activ_func'],
                         'start' : params_grig3['start']}
            my_net = Neural_network_hide_1(**my_params) 
            my_params['roc_auc']  = make_cross_validation_classif_multi_n(X, Y,my_net,roc_auc_score,cv_strategy,print_is =False)[1]
            my_params['activ_func'] = str(params_grig3['activ_func'])[10:18]            
            res_relu = res_relu.append(my_params,ignore_index=True)       



Wall time: 21min 38s


In [39]:
res_relu.sort_values('roc_auc',ascending=False,inplace=True)
res_relu.head(20)

Unnamed: 0,eta,num_neurons,num_iter,activ_func,start,roc_auc
0,0.0005,3,1000,ReLU at,0.3,0.760726
2,0.0005,3,15000,ReLU at,0.3,0.757612
1,0.0005,3,5000,ReLU at,0.3,0.751027
43,0.01,25,5000,ReLU at,0.3,0.5
42,0.01,25,1000,ReLU at,0.3,0.5
25,0.001,15,5000,ReLU at,0.3,0.5
26,0.001,15,15000,ReLU at,0.3,0.5
27,0.001,25,1000,ReLU at,0.3,0.5
28,0.001,25,5000,ReLU at,0.3,0.5
29,0.001,25,15000,ReLU at,0.3,0.5


In [40]:
%%time
params_grig4['eta']=res_relu.iloc[0].eta
params_grig4['num_neurons']=res_relu.iloc[0].num_neurons
params_grig4['num_iter']=res_relu.iloc[0].num_iter
my_params={}
for start  in params_grig4['start']:
    my_params = params_grig4.copy()
    my_params['start'] = start
    my_net = Neural_network_hide_1(**my_params) 
    my_params['roc_auc']  = make_cross_validation_classif_multi_n(X, Y,my_net,roc_auc_score,cv_strategy,print_is =False)[1]
    my_params['activ_func'] = str(params_grig4['activ_func'])[10:18]            
    res_relu = res_relu.append(my_params,ignore_index=True) 

Wall time: 16.8 s


In [41]:
res_relu.sort_values('roc_auc',ascending=False,inplace=True)
res_relu.head(20)

Unnamed: 0,eta,num_neurons,num_iter,activ_func,start,roc_auc
0,0.0005,3,1000,ReLU at,0.3,0.760726
1,0.0005,3,15000,ReLU at,0.3,0.757612
46,0.0005,3,1000,ReLU at,0.1,0.7559
2,0.0005,3,5000,ReLU at,0.3,0.751027
40,0.001,3,15000,ReLU at,0.3,0.5
30,0.0005,10,5000,ReLU at,0.3,0.5
31,0.0005,10,15000,ReLU at,0.3,0.5
32,0.0005,15,1000,ReLU at,0.3,0.5
33,0.0005,15,5000,ReLU at,0.3,0.5
34,0.0005,25,1000,ReLU at,0.3,0.5


In [42]:
res_sig.sort_values('roc_auc',ascending=False,inplace=True)
res_sig.head(20)

Unnamed: 0,eta,num_neurons,num_iter,activ_func,start,roc_auc
49,0.001,25,5000,sigmoid,0.5,0.755193
51,0.001,25,5000,sigmoid,1.0,0.753968
50,0.001,25,5000,sigmoid,0.7,0.751105
0,0.001,25,5000,sigmoid,0.3,0.748486
1,0.001,3,1000,sigmoid,0.3,0.747601
2,0.001,15,5000,sigmoid,0.3,0.747328
46,0.001,25,5000,sigmoid,0.1,0.746843
3,0.001,25,15000,sigmoid,0.3,0.746231
48,0.001,25,5000,sigmoid,-0.3,0.746209
4,0.001,5,1000,sigmoid,0.3,0.746184


In [43]:
ml_result

Unnamed: 0,model,params,result
2,<catboost.core.CatBoostClassifier object at 0x...,"{'n_estimators': 600, 'min_child_samples': 25,...",0.774363
3,LGBMClassifier(),"{'reg_lambda': 1, 'reg_alpha': 10, 'num_leaves...",0.771849
1,RandomForestClassifier(n_jobs=-1),"{'n_estimators': 800, 'min_samples_split': 2, ...",0.766409
0,"LogisticRegression(max_iter=1000, solver='libl...","{'penalty': 'l2', 'C': 0.1}",0.763265
