In [18]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import RobustScaler

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score as acc

from sklearn.model_selection import GridSearchCV

import catboost
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier

import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', None)

# <b> <span style='color:#F1C40F'>|</span> Table of Contents</b>

* [1 | Missing Values](#1)<br>
* [2 | Model ](#2)<br>
    - [2.1 | CatBoost ](#21)<br>
    - [2.2 | XGBClassifier ](#22)<br>
    - [2.3 | LGBMClassifier ](#23)<br>
* [3 | Result](#3)<b>

In [4]:
train = pd.read_csv('data/3.4/train.csv')
test = pd.read_csv('data/3.4//test.csv')
sub = pd.read_csv('data/3.4//sample_submission.csv')
original = pd.read_csv('data/3.4/creditcard.csv')

<a id="1"></a>
# <b>1 </b><b> <span style='color:#F1C40F'>|</span> Missing Values</b>

In [27]:
def ausentes(data):
    vl = data.isnull().sum()
    per = 100 * data.isnull().sum() / len(data)
    tab = pd.concat([vl, per], axis=1)
    df = tab.rename(columns={0:'Valores Ausentes', 1:'% do Valor'})
    df = df[df.iloc[:,1] != 0].sort_values('% do Valor', ascending=False).round(1)
    return df

In [28]:
ausentes(train)

Unnamed: 0,Valores Ausentes,% do Valor


In [29]:
ausentes(test)

Unnamed: 0,Valores Ausentes,% do Valor


In [5]:
train.drop(columns=['id'], inplace=True)
test.drop(columns=['id'], inplace=True)
df = pd.concat([train,original,test])
df = df.drop_duplicates()

In [6]:
df['hour'] = df['Time'] % (24 * 3600) // 3600
df['day'] = (df['Time'] // (24 * 3600)) % 7

<a id="2"></a>
# <b>2 </b><b> <span style='color:#F1C40F'>|</span> Model</b>

In [7]:
rbs = RobustScaler()

y = df['Class']
x = df.drop(columns='Class',axis=1)

x[x.columns] = rbs.fit_transform(x[x.columns])

In [8]:
df_train = x.iloc[:-len(test),:]
df_train['Class'] = y[:-len(test)]

df_test = df.iloc[-len(test):,:].reset_index(drop=True)

x = df_train.drop(['Class'], axis=1)
y = df_train.Class

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, shuffle=True, stratify=y, random_state=2023)

<a id="21"></a>
# <b>2.1 </b><b> <span style='color:#F1C40F'>|</span> CatBoost</b>

In [9]:
catboost_params = {'n_estimators'     : [200,300,500],
                   'learning_rate'    : [0.001,0.0025], 
                   'one_hot_max_size' : [8,12],
                   'depth'            : [4,6],
                   'l2_leaf_reg'      : [0.014],
                   'colsample_bylevel': [0.06],
                   'min_data_in_leaf' : [12],
                   'boosting_type'    : ['Plain'],
                   'bootstrap_type'   : ['Bernoulli'],
                   #'task_type'        : [ "GPU"],
                   'random_state'     : [2023],
                   'verbose'          : [False]}

ml_cat_rg = catboost.CatBoostClassifier()
clf_grid = GridSearchCV(estimator=ml_cat_rg, param_grid=catboost_params, scoring=acc, cv=10,n_jobs=-1)
clf_grid.fit(X_train, y_train)
best = clf_grid.best_params_
ml_cat_rg = CatBoostClassifier(**best)
ml_cat_rg.fit(X_train, y_train)
pdc_catboost = ml_cat_rg.predict(X_test)

<a id="22"></a>
# <b>2.2 </b><b> <span style='color:#F1C40F'>|</span> XGBClassifier</b>

In [12]:
xgb_params = {'n_estimators'     : [500,750,1000],
              'min_child_weight' : [96],
              'max_depth'        : [4,8,10],
              'learning_rate'    : [0.001,0.0025], 
              'subsample'        : [0.85, 0.95],
              'reg_lambda'       : [1.50],
              'reg_alpha'        : [1.50],
              'gamma'            : [1.50],
              'max_bin'          : [512],
              'random_state'     : [2023],
              'n_jobs'           : [-1],
              'tree_method'      : ['gpu_hist'],
             } 

xgb = XGBClassifier()
xgb_grid = GridSearchCV(estimator=xgb, param_grid=xgb_params, scoring=acc)
xgb_grid.fit(X_train, y_train)
best = xgb_grid.best_params_
xgb = XGBClassifier(**best)
xgb.fit(X_train, y_train)
xgb_pre = xgb.predict(X_test)

0.9981

<a id="23"></a>
# <b>2.3 </b><b> <span style='color:#F1C40F'>|</span> LGBMClassifier</b>

In [23]:
ml_lgbm = LGBMClassifier()
ml_lgbm.fit(X_train, y_train)
ml_lgbm = ml_lgbm.predict(X_test)
round(acc(y_test,xgb_pre),4)

0.9981

<a id="3"></a>
# <b>3 </b><b> <span style='color:#F1C40F'>|</span> Result</b>

In [24]:
print(round(acc(y_test,pdc_catboost),4))
print(round(acc(y_test,xgb_pre),4))
print(round(acc(y_test,ml_lgbm),4))

0.9981
0.9981
0.9957
