In [None]:
import numpy as np
import pandas as pd

import lightgbm as lgb

from tools import split_data, feature_labels

import matplotlib.pyplot as plt

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report


In [None]:
data = pd.read_csv("all_data.csv")

X = data.iloc[:, 1:-1]
y = data.iloc[:, -1:]

X_train, X_val, X_test, y_train, y_val, y_test = split_data(X, y)


In [None]:
train_data = lgb.Dataset(X_train, label=y_train, free_raw_data=False)
train_data.save_binary('lightgbm/train.bin')

val_data = lgb.Dataset(X_val, label=y_val, free_raw_data=False)
val_data.save_binary('lightgbm/val.bin')


In [None]:
param = {'num_leaves': 81, 'objective': 'cross_entropy', 'num_iterations': 750}
param['metric'] = ["accuracy", 'mean_squared_error', 'auc']

num_round = 150
bst = lgb.train(param, train_data, num_round, valid_sets=[
                val_data])
bst.save_model('lightgbm/model.txt')


In [None]:
y_pred_val = bst.predict(X_val)
y_pred_val[y_pred_val >= 0.5] = 1
y_pred_val[y_pred_val < 0.5] = 0

print(classification_report(y_val, y_pred_val,
      target_names=["Non-Scam", "Ponzi"], digits=4))

disp_svm = ConfusionMatrixDisplay(confusion_matrix(
    y_val, y_pred_val), display_labels=["Non-Scam", "Ponzi"])
disp_svm.plot()
plt.title("LightGBM: Confusion Matrix")
plt.show()


In [None]:
ypred = bst.predict(X_test)
ypred[ypred >= 0.5] = 1
ypred[ypred < 0.5] = 0

print(classification_report(y_test, ypred,
      target_names=["Non-Scam", "Ponzi"], digits=4))

disp_svm = ConfusionMatrixDisplay(confusion_matrix(
    y_test, ypred), display_labels=["Non-Scam", "Ponzi"])
disp_svm.plot()
plt.title("LightGBM: Confusion Matrix")
plt.show()


In [None]:
lgb.plot_importance(bst, )


In [None]:
feature_labels[18]