In [1]:
import math
import numpy as np
import pandas as pd
from lightgbm import LGBMClassifier

import matplotlib.pyplot as plt

import seaborn as sns
from tqdm import tqdm

from sklearn.model_selection import train_test_split
from sklearn.metrics import average_precision_score, roc_auc_score




%matplotlib inline

  import cryptography.exceptions


In [None]:
train_data = pd.read_csv('fraud_train_preprocessed.csv')
val_data = pd.read_csv('fraud_val_preprocessed.csv')
test_data = pd.read_csv('fraud_test_preprocessed.csv')

In [None]:
# Column information
NUMERIC_FEATURES = train_data.select_dtypes(include=np.number).columns[:-2] # exclude label column and DT
CATEGORICAL_FEATURES = train_data.select_dtypes(exclude=np.number).columns

FEATURES = list(NUMERIC_FEATURES) + list(CATEGORICAL_FEATURES)
LABEL = 'isFraud'

In [None]:
train_data[CATEGORICAL_FEATURES] = train_data[CATEGORICAL_FEATURES].astype("category")
val_data[CATEGORICAL_FEATURES] = val_data[CATEGORICAL_FEATURES].astype("category")
test_data[CATEGORICAL_FEATURES] = test_data[CATEGORICAL_FEATURES].astype("category")

train_data[NUMERIC_FEATURES] = train_data[NUMERIC_FEATURES].astype(float)
val_data[NUMERIC_FEATURES] = val_data[NUMERIC_FEATURES].astype(float)
test_data[NUMERIC_FEATURES] = test_data[NUMERIC_FEATURES].astype(float)


In [None]:
base_lgbm = LGBMClassifier(objective="binary", n_estimators=10000)
eval_set = [(val_data[FEATURES], val_data[LABEL])]

base_lgbm.fit(
    train_data[FEATURES],
    train_data[LABEL],
    eval_set=eval_set,
    early_stopping_rounds=100,
    eval_metric="binary_logloss",
)

In [None]:
lgbm_val_preds = base_lgbm.predict_proba(val_data[FEATURES])
print(f"PR AUC: {average_precision_score(val_data[LABEL], lgbm_val_preds[:, 1])}")
print(f"ROC AUC: {roc_auc_score(val_data[LABEL], lgbm_val_preds[:, 1])}")

In [None]:
lgbm_test_preds = base_lgbm.predict_proba(test_data[FEATURES])

In [None]:
transaction_ids = pd.read_csv('../test_transaction.csv', usecols=['TransactionID'])
submission_df = pd.DataFrame({"TransactionID": transaction_ids.values.ravel(),
              "isFraud": lgbm_test_preds[:, 1]
             })
submission_df.to_csv("lgbm_submission.csv", index=False)