In [1]:
#####################
# IMPORT LIBS
#####################

import pandas as pd
import numpy as np
from pathlib import Path
import wandb
import datetime
import os
import random
import joblib

from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from lightgbm import LGBMClassifier


#####################
# SET CONSTANTS
#####################

INPUT_PATH = Path('../input')
OUTPUT_PATH = Path('../output')
TRAIN_PATH = INPUT_PATH 

TARGET_COLUMNS = ['sale_flg', 'sale_amount', 'contacts']

RANDOM_SEED = 4444
USE_WANDB = True
CURRENT_TIME = str(datetime.datetime.now()).replace(' ', '_').split('.')[0]

def seed_everything(seed=1234):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
seed_everything(RANDOM_SEED)

In [2]:
###############
# Config
###############

In [3]:
if USE_WANDB:
    
    wandb.login()
    run = wandb.init(project="idao-2021-finals", name = f'{CURRENT_TIME}') # todo add config here

In [4]:
%%time

train = pd.read_csv(INPUT_PATH / 'trxn.csv')
assets_under_management = pd.read_csv(INPUT_PATH / 'aum.csv')
balance = pd.read_csv(INPUT_PATH / 'balance.csv')
client = pd.read_csv(INPUT_PATH / 'client.csv')
campaigns = pd.read_csv(INPUT_PATH / 'com.csv')
deals = pd.read_csv(INPUT_PATH / 'deals.csv')
dict_merchant_category_code = pd.read_csv(INPUT_PATH / 'dict_mcc.csv')
payments = pd.read_csv(INPUT_PATH / 'payments.csv')
funnel = pd.read_csv(INPUT_PATH / 'funnel.csv')
appl = pd.read_csv(INPUT_PATH / 'appl.csv')

In [5]:
X = funnel.drop(columns = TARGET_COLUMNS + ['client_id'])
Y = funnel[TARGET_COLUMNS[0]]

In [6]:
model = LGBMClassifier(n_jobs = -1)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, random_state = RANDOM_SEED)
model.fit(X_train, Y_train)

Y_pred = model.predict(X_test)
validation_accuracy = accuracy_score(Y_test, Y_pred)
print(f'Accuracy score {validation_accuracy}')

validation_accuracy = accuracy_score(Y_test, Y_pred)

if USE_WANDB:
    run.summary["validation_accuracy"] = validation_accuracy

In [7]:
model.fit(X, Y)

LGBMClassifier()

In [8]:
try:
    os.mkdir(OUTPUT_PATH / 'models')
except:
    pass

# save model
joblib.dump(model, OUTPUT_PATH / 'models' / f'lightgbm_{CURRENT_TIME}.pkl')
# load model
model = joblib.load( OUTPUT_PATH / 'models' / f'lightgbm_{CURRENT_TIME}.pkl')

In [9]:
if USE_WANDB:
    run.finish()