In [45]:
from random import randint
para={'XGBClassifier':{'learning_rate':[0.1,0.01,0.001],
                      'n_estimators':[600,800,1000],
                      'max_depth':[4,5,6],
                      'min_child_weight':[4,5,6],
                      'gamma':[i/10.0 for i in range(0,4)]},
      'Random Forest Classifier':{'n_estimators':[600,800,1000],
                                 'max_depth':[4,5,6],
                                 'min_samples_split':[],
                                 'max_leaf_nodes':[],
                                 'max_features' : ['sqrt', 'log2']},
      'Decision Tree':{'criterion':['gini'],
                       'splitter':[2,4,6],
                       'max_depth':[],
                       'min_samples_leaf':[],
                       'max_features' :['sqrt', 'log2']},
      'CatboostClassifier':{'iterations':[],
                           'learning_rate':[],
                           'rsm':[],
                           'loss_function':[],
                           'max_depth':randint(3, 10)},
      'LogisticRegression':{'max_iter':[],
                           'solver':['lbfgs','sag','saga','liblinear'],
                           'penalty':['None','l2'],
                           'c_values':[100,10,1,0.1,0.01]                           },
      'KNeighborsClassifier':{'n_neighbors' : range(1, 21, 2),
                              'weights' : ['uniform', 'distance'],
                              'metric' : ['euclidean', 'manhattan', 'minkowski']},
      'SVMClassifier':{'kernel' : ['poly', 'rbf', 'sigmoid'],
                      'C' : [50, 10, 1.0, 0.1, 0.01],
                      'gamma' :['scale']},
      'GradientBoostingClassifier':{'n_estimators' : [10, 100, 1000],
                                    'learning_rate' : [0.001, 0.01, 0.1],
                                    'subsample' : [0.5, 0.7, 1.0],
                                    'max_depth' : [3, 7, 9]},
      'AdaBoost Classifier':{'n_estimators': [2, 3, 4],
                             'learning_rate': [(0.97 + x / 100) for x in range(0, 4)],
                             'algorithm': ['SAMME', 'SAMME.R']}
     }

# ------------------------------------------------------------------------------------------

# Exception

In [46]:
import sys

def error_message_detail(error,error_detail:sys):
    _,_,exc_tb = error_detail.exc_info()
    file_name = exc_tb.tb_frame.f_code.co_filename
    error_message = "Error occurred in python script name [{0}] line number [{1}] error message [{2}]".format(
        file_name, exc_tb.tb_lineno,str(error)
    )
    return error_message

class CustomException(Exception):
    def __init__(self,error_message,error_detail:sys):
        super().__init__(error_message)
        self.error_message = error_message_detail(error_message, error_detail=error_detail)

    def __str__(self) -> str:
        return self.error_message   

# utils

In [47]:
import os
import sys
import numpy as np
import pandas as pd
import dill
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import roc_auc_score
from imblearn.over_sampling import SMOTE

def save_object(file_path,obj):
    try:
        dir_path = os.path.dirname(file_path)

        os.makedirs(dir_path,exist_ok=True)

        with open(file_path, 'wb') as file_obj:
            dill.dump(obj, file_obj)

    except Exception as e:
        raise CustomException(e,sys)
    


def evaluate_models(x_train,y_train,x_test,y_test,models):
    try:
        report = {}
        for i in range(len(list(models))):
            model = list(models.values())[i]

            # para = params[list(models.keys())[i]]

            # gs = GridSearchCV(model, para, cv=3)
            # gs.fit(x_train,y_train)
            

            # model.set_params(**gs.best_params_)
            model.fit(x_train,y_train)

            y_train_pred = model.predict(x_train)

            y_test_pred = model.predict(x_test)

            train_model_score = roc_auc_score(y_train,y_train_pred)

            test_model_score = roc_auc_score(y_test,y_test_pred)

            report[list(models.keys())[i]] = test_model_score

        return report
    except Exception as e:
        raise CustomException(e,sys)

    

def load_object(file_path):
    try:
        with open(file_path,'rb') as file_obj:
            return dill.load(file_obj)
    except Exception as e:
        raise CustomException(e,sys)

# Data Ingestion

In [60]:
from sklearn.model_selection import train_test_split
from dataclasses import dataclass
import os
import sys
import pandas as pd


# diffing variable so use data class

class DataIngestion:
    
    
    def initiate_data_ingestion(self):
        try:
            df = pd.read_csv('data/UCI_Credit_Card.csv')
            
            # Data set column rename
            df.rename(columns={'PAY_0':'PAY_SEPT','PAY_2':'PAY_AUG','PAY_3':'PAY_JUL','PAY_4':'PAY_JUN','PAY_5':'PAY_MAY','PAY_6':'PAY_APR','default.payment.next.month':'DEFAULT'},inplace=True)
            df.rename(columns={'BILL_AMT1':'BILL_AMT_SEPT','BILL_AMT2':'BILL_AMT_AUG','BILL_AMT3':'BILL_AMT_JUL','BILL_AMT4':'BILL_AMT_JUN','BILL_AMT5':'BILL_AMT_MAY','BILL_AMT6':'BILL_AMT_APR'}, inplace = True)
            df.rename(columns={'PAY_AMT1':'PAY_AMT_SEPT','PAY_AMT2':'PAY_AMT_AUG','PAY_AMT3':'PAY_AMT_JUL','PAY_AMT4':'PAY_AMT_JUN','PAY_AMT5':'PAY_AMT_MAY','PAY_AMT6':'PAY_AMT_APR'},inplace=True)
            
            df.replace({'EDUCTION':{1:1,2:1,3:2,4:3,5:3,6:3,0:3},'MARRIAGE':{1:1,2:2,0:3,3:3}},inplace=True)
            
            ind_data = df.iloc[:,:-1]
            dep_data = df.iloc[:,-1]
            
            smote = SMOTE(sampling_strategy='minority')
            x_sm,y_sm = smote.fit_resample(ind_data,dep_data)
            
            data = pd.merge(x_sm, y_sm, left_index=True, right_index= True)
            # splitting the data into train and test
        
            train_set, test_set = train_test_split(data,test_size=0.25, random_state=42)

            return  train_set, test_set
        
        except Exception as e:
            raise CustomException(e,sys)

# data_transformation

In [61]:
import pandas as pd 
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from catboost import CatBoostClassifier
from sklearn.pipeline import Pipeline
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC 
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, roc_auc_score, recall_score, precision_score, classification_report
from dataclasses import dataclass


class DataTransformation:

    def get_data_transformer_object(self):
        try:
            numerical_columns =['LIMIT_BAL', 'AGE', 'BILL_AMT_SEPT', 'BILL_AMT_AUG', 'BILL_AMT_JUL', 'BILL_AMT_JUN', 'BILL_AMT_MAY',
                                 'BILL_AMT_APR', 'PAY_AMT_SEPT', 'PAY_AMT_AUG', 'PAY_AMT_JUL', 'PAY_AMT_JUN', 'PAY_AMT_MAY', 'PAY_AMT_APR']
            categorical_columns =['SEX','EDUCATION', 'MARRIAGE', 'PAY_SEPT', 'PAY_AUG', 'PAY_JUL', 'PAY_JUN', 'PAY_MAY', 'PAY_APR']

            
            numerical_pipeline = Pipeline(
                steps=[
                ("scaler",StandardScaler())
                ]
            )

            categorical_pipeline = Pipeline(
                steps=[
                ("scaler",StandardScaler(with_mean=False))
                ]
            )
            
            preprocessor = ColumnTransformer(
                [
                ("numerical_pipeline",numerical_pipeline,numerical_columns),
                ("categorical_pipeline",categorical_pipeline,categorical_columns)
                ]
                )
            
            return preprocessor
           
        except Exception as e:
            raise CustomException(e,sys)
        
    def initiate_data_transformation(self,train_df,test_df):
        try:
            
            preprocessing_obj = self.get_data_transformer_object()

            target_column_name = "DEFAULT"

            input_feature_train_df = train_df.drop(columns=[target_column_name],axis=1)
            target_feature_train_df = train_df[target_column_name]

            input_feature_test_df = test_df.drop(columns=[target_column_name],axis=1)
            target_feature_test_df = test_df[target_column_name]


            input_feature_train_arr = preprocessing_obj.fit_transform(input_feature_train_df)
            input_feature_test_arr = preprocessing_obj.fit_transform(input_feature_test_df)

            train_arr = np.c_[input_feature_train_arr, np.array(target_feature_train_df)]
            test_arr = np.c_[input_feature_test_arr,np.array(target_feature_test_df)]


            return train_arr, test_arr
        except Exception as e:
            raise CustomException(e,sys)

# model_trainer

In [62]:
import pandas as pd
import numpy as np
import os
import sys

from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import Pipeline
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier 
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from catboost import CatBoostClassifier
from sklearn.metrics import roc_auc_score
from dataclasses import dataclass


class ModelTrainer:
    
    def initiate_mode_trainer(self,train_array, test_array):
        try:
            x_train,y_train,x_test,y_test =(
                train_array[:,:-1],train_array[:,-1], test_array[:,:-1],test_array[:,-1])

            models = {
                'Random Forest': RandomForestClassifier(),
                'Decision Tree': DecisionTreeClassifier(),
                'XGBClassifier': XGBClassifier(),
                'CatBoosting': CatBoostClassifier(),
                    }
            
#             params = {
#                 'Random Forest':{
#                     'n_estimators':[800,1000],
#                     # 'max_depth':[4,5,6],
#                     # 'min_samples_split':[2,4,5],
#                     # 'max_features' = ['sqrt', 'log2']
#                     # 'criterion':['gini','entropy']
#                 },
#                 'Decision Tree':{
                    
#                     'max_depth':[4,5,6],
#                     # 'min_samples_split':[2,4,5],
#                     # 'max_features' : ['sqrt', 'log2']
#                     # 'criterion':['gini','entropy']                
#                 },
#                 'XGBClassifier':{
#                     'learning_rate':[0.01,0.001],
#                     # 'n_estimators':[600,800],
#                     # 'max_depth':[4,5,6],
#                     # 'min_child_weight':[4,5,6],
#                     # 'gamma':[i/10.0 for i in range(0,4)]
#                 },
#                 'CatBoosting':{
#                     'n_estimators':[100,200,300],
#                     # 'learning_rate':[0., 0.05, 0.1 , 0.15, 0.2 ],
#                     # 'max_depth': [3,4,5],
#                 #     'rsm':[],
#                 #     'loss_function':[]
#                 },
#             }
                
            model_report = evaluate_models(x_train,y_train,x_test,y_test,models)
            print(model_report)

            # to get best model score from dict
            best_model_score = max(sorted(model_report.values()))

            # to get best model name from dict
            best_model_name = list(model_report.keys())[list(model_report.values()).index(best_model_score)]
            best_model = models[best_model_name]

            if best_model_score<0.6:
                raise CustomException('No best Model found')
            
            predicted = best_model.predict(x_test)
            roc_auc= roc_auc_score(y_test, predicted)

            return roc_auc
                                    
        except Exception as e:
            raise CustomException(e,sys)
        

In [63]:
if __name__ == "__main__":
    obj = DataIngestion()
    train_data, test_data = obj.initiate_data_ingestion()
    print('ingestion done')

    data_transformation = DataTransformation()
    train_arr,test_arr = data_transformation.initiate_data_transformation(train_data,test_data)
    print('data transformation done')

    modeltrainer = ModelTrainer()
    auc_roc_socres = modeltrainer.initiate_mode_trainer(train_arr,test_arr)
    print('model training get done')
    print(auc_roc_socres)

ingestion done
data transformation done
Learning rate set to 0.047043
0:	learn: 0.6758149	total: 44.8ms	remaining: 44.8s
1:	learn: 0.6610206	total: 79.7ms	remaining: 39.8s
2:	learn: 0.6485476	total: 116ms	remaining: 38.5s
3:	learn: 0.6350408	total: 151ms	remaining: 37.6s
4:	learn: 0.6257556	total: 186ms	remaining: 36.9s
5:	learn: 0.6164277	total: 220ms	remaining: 36.5s
6:	learn: 0.6073934	total: 257ms	remaining: 36.5s
7:	learn: 0.5994736	total: 291ms	remaining: 36.1s
8:	learn: 0.5925957	total: 324ms	remaining: 35.7s
9:	learn: 0.5858179	total: 359ms	remaining: 35.5s
10:	learn: 0.5793468	total: 393ms	remaining: 35.4s
11:	learn: 0.5735938	total: 434ms	remaining: 35.8s
12:	learn: 0.5689257	total: 470ms	remaining: 35.7s
13:	learn: 0.5644491	total: 504ms	remaining: 35.5s
14:	learn: 0.5581644	total: 536ms	remaining: 35.2s
15:	learn: 0.5539446	total: 572ms	remaining: 35.2s
16:	learn: 0.5477965	total: 607ms	remaining: 35.1s
17:	learn: 0.5426062	total: 641ms	remaining: 35s
18:	learn: 0.5389138	t

166:	learn: 0.4384953	total: 5.55s	remaining: 27.7s
167:	learn: 0.4382130	total: 5.58s	remaining: 27.6s
168:	learn: 0.4380775	total: 5.61s	remaining: 27.6s
169:	learn: 0.4372579	total: 5.64s	remaining: 27.6s
170:	learn: 0.4371282	total: 5.68s	remaining: 27.5s
171:	learn: 0.4368785	total: 5.72s	remaining: 27.5s
172:	learn: 0.4366604	total: 5.75s	remaining: 27.5s
173:	learn: 0.4364452	total: 5.78s	remaining: 27.4s
174:	learn: 0.4362057	total: 5.81s	remaining: 27.4s
175:	learn: 0.4360783	total: 5.84s	remaining: 27.4s
176:	learn: 0.4359009	total: 5.88s	remaining: 27.3s
177:	learn: 0.4356859	total: 5.91s	remaining: 27.3s
178:	learn: 0.4354784	total: 5.94s	remaining: 27.3s
179:	learn: 0.4352407	total: 5.97s	remaining: 27.2s
180:	learn: 0.4349682	total: 6.01s	remaining: 27.2s
181:	learn: 0.4348097	total: 6.04s	remaining: 27.1s
182:	learn: 0.4345825	total: 6.07s	remaining: 27.1s
183:	learn: 0.4342178	total: 6.11s	remaining: 27.1s
184:	learn: 0.4339547	total: 6.14s	remaining: 27.1s
185:	learn: 

327:	learn: 0.4042771	total: 10.9s	remaining: 22.4s
328:	learn: 0.4041018	total: 11s	remaining: 22.4s
329:	learn: 0.4039446	total: 11s	remaining: 22.3s
330:	learn: 0.4038299	total: 11s	remaining: 22.3s
331:	learn: 0.4036905	total: 11.1s	remaining: 22.2s
332:	learn: 0.4035664	total: 11.1s	remaining: 22.2s
333:	learn: 0.4034219	total: 11.1s	remaining: 22.2s
334:	learn: 0.4032738	total: 11.2s	remaining: 22.1s
335:	learn: 0.4031138	total: 11.2s	remaining: 22.1s
336:	learn: 0.4029023	total: 11.2s	remaining: 22.1s
337:	learn: 0.4027715	total: 11.3s	remaining: 22s
338:	learn: 0.4026230	total: 11.3s	remaining: 22s
339:	learn: 0.4024519	total: 11.3s	remaining: 22s
340:	learn: 0.4023631	total: 11.4s	remaining: 21.9s
341:	learn: 0.4021999	total: 11.4s	remaining: 21.9s
342:	learn: 0.4020677	total: 11.4s	remaining: 21.9s
343:	learn: 0.4019493	total: 11.4s	remaining: 21.8s
344:	learn: 0.4015307	total: 11.5s	remaining: 21.8s
345:	learn: 0.4014028	total: 11.5s	remaining: 21.8s
346:	learn: 0.4012076	to

488:	learn: 0.3798380	total: 16.4s	remaining: 17.1s
489:	learn: 0.3797416	total: 16.4s	remaining: 17.1s
490:	learn: 0.3796122	total: 16.4s	remaining: 17s
491:	learn: 0.3794666	total: 16.5s	remaining: 17s
492:	learn: 0.3792552	total: 16.5s	remaining: 17s
493:	learn: 0.3791803	total: 16.5s	remaining: 16.9s
494:	learn: 0.3790671	total: 16.6s	remaining: 16.9s
495:	learn: 0.3789289	total: 16.6s	remaining: 16.9s
496:	learn: 0.3788198	total: 16.6s	remaining: 16.8s
497:	learn: 0.3787125	total: 16.7s	remaining: 16.8s
498:	learn: 0.3786056	total: 16.7s	remaining: 16.8s
499:	learn: 0.3784442	total: 16.7s	remaining: 16.7s
500:	learn: 0.3783690	total: 16.8s	remaining: 16.7s
501:	learn: 0.3782786	total: 16.8s	remaining: 16.7s
502:	learn: 0.3782585	total: 16.8s	remaining: 16.6s
503:	learn: 0.3781609	total: 16.9s	remaining: 16.6s
504:	learn: 0.3779292	total: 16.9s	remaining: 16.6s
505:	learn: 0.3777958	total: 16.9s	remaining: 16.5s
506:	learn: 0.3775768	total: 17s	remaining: 16.5s
507:	learn: 0.377430

650:	learn: 0.3594129	total: 21.8s	remaining: 11.7s
651:	learn: 0.3593354	total: 21.8s	remaining: 11.6s
652:	learn: 0.3592246	total: 21.8s	remaining: 11.6s
653:	learn: 0.3591308	total: 21.9s	remaining: 11.6s
654:	learn: 0.3590270	total: 21.9s	remaining: 11.5s
655:	learn: 0.3588942	total: 21.9s	remaining: 11.5s
656:	learn: 0.3587310	total: 22s	remaining: 11.5s
657:	learn: 0.3586152	total: 22s	remaining: 11.4s
658:	learn: 0.3584963	total: 22s	remaining: 11.4s
659:	learn: 0.3583891	total: 22.1s	remaining: 11.4s
660:	learn: 0.3582841	total: 22.1s	remaining: 11.3s
661:	learn: 0.3581815	total: 22.1s	remaining: 11.3s
662:	learn: 0.3581118	total: 22.2s	remaining: 11.3s
663:	learn: 0.3579736	total: 22.2s	remaining: 11.2s
664:	learn: 0.3579166	total: 22.2s	remaining: 11.2s
665:	learn: 0.3578366	total: 22.3s	remaining: 11.2s
666:	learn: 0.3577374	total: 22.3s	remaining: 11.1s
667:	learn: 0.3576401	total: 22.3s	remaining: 11.1s
668:	learn: 0.3575128	total: 22.4s	remaining: 11.1s
669:	learn: 0.3574

809:	learn: 0.3434368	total: 27.2s	remaining: 6.37s
810:	learn: 0.3433392	total: 27.2s	remaining: 6.34s
811:	learn: 0.3432311	total: 27.2s	remaining: 6.3s
812:	learn: 0.3431756	total: 27.3s	remaining: 6.27s
813:	learn: 0.3431015	total: 27.3s	remaining: 6.24s
814:	learn: 0.3429752	total: 27.3s	remaining: 6.2s
815:	learn: 0.3427854	total: 27.4s	remaining: 6.17s
816:	learn: 0.3426890	total: 27.4s	remaining: 6.13s
817:	learn: 0.3426811	total: 27.4s	remaining: 6.1s
818:	learn: 0.3425872	total: 27.4s	remaining: 6.07s
819:	learn: 0.3424746	total: 27.5s	remaining: 6.03s
820:	learn: 0.3423796	total: 27.5s	remaining: 6s
821:	learn: 0.3422453	total: 27.6s	remaining: 5.97s
822:	learn: 0.3421984	total: 27.6s	remaining: 5.93s
823:	learn: 0.3420622	total: 27.6s	remaining: 5.9s
824:	learn: 0.3419796	total: 27.7s	remaining: 5.87s
825:	learn: 0.3418731	total: 27.7s	remaining: 5.83s
826:	learn: 0.3417766	total: 27.7s	remaining: 5.8s
827:	learn: 0.3416861	total: 27.8s	remaining: 5.76s
828:	learn: 0.341577

970:	learn: 0.3288416	total: 32.5s	remaining: 971ms
971:	learn: 0.3287253	total: 32.6s	remaining: 938ms
972:	learn: 0.3286405	total: 32.6s	remaining: 904ms
973:	learn: 0.3285489	total: 32.6s	remaining: 871ms
974:	learn: 0.3284080	total: 32.7s	remaining: 838ms
975:	learn: 0.3283168	total: 32.7s	remaining: 804ms
976:	learn: 0.3281888	total: 32.7s	remaining: 771ms
977:	learn: 0.3281334	total: 32.8s	remaining: 738ms
978:	learn: 0.3280681	total: 32.8s	remaining: 704ms
979:	learn: 0.3279726	total: 32.9s	remaining: 671ms
980:	learn: 0.3278765	total: 32.9s	remaining: 637ms
981:	learn: 0.3278016	total: 32.9s	remaining: 604ms
982:	learn: 0.3277218	total: 33s	remaining: 570ms
983:	learn: 0.3276447	total: 33s	remaining: 537ms
984:	learn: 0.3275535	total: 33s	remaining: 503ms
985:	learn: 0.3274614	total: 33.1s	remaining: 469ms
986:	learn: 0.3273981	total: 33.1s	remaining: 436ms
987:	learn: 0.3273355	total: 33.1s	remaining: 402ms
988:	learn: 0.3272622	total: 33.2s	remaining: 369ms
989:	learn: 0.3272