  # Imports

In [1]:
from google.colab import drive
drive.mount('/content/drive')

import os
import sys
sys.path.append('/content/drive/MyDrive/Kaggle/HomeCredit')

from homecredit.setup_env import setup_environment
setup_environment()

import numpy as np
import pandas as pd

from sklearn.pipeline import Pipeline

from homecredit.data.data_processor_1 import DataProcessor1
from homecredit.data.data_processor_2 import DataProcessor2
from homecredit.data.data_processor_3 import DataProcessor3
from homecredit.data.data_processor_4 import DataProcessor4
from homecredit.data.data_processor_5 import DataProcessor5
from homecredit.data.data_processor_6 import DataProcessor6
from homecredit.config import COL_TARGET, COL_ID, COL_DATE, COL_WEEK
from homecredit.pipeline import FullPipeline
from homecredit.tracker import evaluate_and_log

# Data

In [2]:
data_processor = DataProcessor2("version_2")
df = data_processor.get_data(fit=True, load=False)
df.sort_values([COL_WEEK, COL_ID], inplace=True)

# Model

In [None]:
from homecredit.models.tree import LGBM

MODEL_NAME = "blend_lgb_run_test"
N_SPLITS = 5

params_lgb = {
    'boosting_type': 'gbdt',
    'colsample_bynode': 0.8,
    'colsample_bytree': 0.8,
    'extra_trees': True,
    'learning_rate': 0.03,
    'max_depth': 10,
    'metric': 'auc',
    'n_estimators': 4000,
    'num_leaves': 64,
    'objective': 'binary',
    'random_state': 42,
    'reg_alpha': 10,
    'reg_lambda': 10,
    # "device": "gpu",
    'verbose': -1,
    "max_bin": 150,
}

features = [i for i in df.columns if i not in [COL_ID, COL_DATE, COL_TARGET]]
model = LGBM(params_lgb, early_stopping_rounds=200, test_size=0.01, shuffle=True)
pipeline = FullPipeline(
    Pipeline(steps=[
        ('classifier', model),
    ]),
    run_name="full",
    name=MODEL_NAME,
    load_model=False,
    features=features
)
# Fit model on the whole train set
pipeline.fit(df, verbose=True)

# Evaluate the model on CV
model, preds_i_df = evaluate_and_log(
    df, params_lgb, pipeline,
    n_splits=N_SPLITS,
    features=features, model_name=MODEL_NAME,
    comment="",
    track=True, verbose=True
)