In [8]:
# importing external libraries
from pathlib import Path
import os
import pandas as pd
import pickle
import json
import matplotlib.pyplot as plt
import numpy as np
import logging
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import FeatureUnion, Pipeline
from sklearn.preprocessing import FunctionTransformer, OneHotEncoder, MinMaxScaler
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn import set_config
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from lightgbm import LGBMClassifier
set_config(transform_output = "pandas")

# Importing function to load data

# Making sure any changes are instantly added
%load_ext autoreload
%autoreload 2

from Modules.load_data import load_data
from Modules.preprocessing import missing_summary, dollar_to_int, find_unique_values
from Modules.plotting import Plotter
from Modules.transforming import *
from Modules.modelling import MLearner

# Importing Pipelines
from Modules.Pipelines import Pipeline1, Pipeline2, Pipeline3

/home/as3620/FODS_coursework/FraudDetection


In [9]:
# Obtaining Root dir

root = str(Path.cwd())
DATA_FOLDER = root + "/data"

# Obtaining seed from config.yaml

# Load the config file
with open(root + "/config.yaml", "r") as file:
    config = yaml.safe_load(file)

seed = config["global"]["seed"]

#print(f"seed: {seed}")

# Set global seeds for reproducibility
random.seed(seed)        
np.random.seed(seed)     

# Use the seed in scikit-learn
random_state = check_random_state(seed)

In [10]:
# Loading in balanced df

df = pd.read_pickle(DATA_FOLDER + "/balanced_data.pkl")

In [14]:
# parameters to use with lgbm

lgbm_params = {"learning_rate": 0.1, "n_estimators": 50}

final_ml = MLearner(dataset=df, transformation_pipeline=Pipeline3, params=lgbm_params, estimator=LGBMClassifier(), HPT=False)

final_ml.fit()

[LightGBM] [Info] Number of positive: 10665, number of negative: 10666
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.028857 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1891
[LightGBM] [Info] Number of data points in the train set: 21331, number of used features: 23
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499977 -> initscore=-0.000094
[LightGBM] [Info] Start training from score -0.000094
[CV] END  accuracy: (train=0.996, test=0.991) f1: (train=0.996, test=0.991) precision: (train=0.994, test=0.988) recall: (train=0.999, test=0.994) total time=  44.3s
[LightGBM] [Info] Number of positive: 10665, number of negative: 10666
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.027873 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1890
[LightGBM] [Info] Number of data points in the train set: 21331, number o

: 

In [6]:
lgbm_params = {"learning_rate": 0.1, "n_estimators": 50}

for vals in lgbm_params.values():
    print(type(vals))
    if type(vals) != int or float:
        print("error")

<class 'float'>
error
<class 'int'>
error


In [9]:
fina

Unnamed: 0,date,client_id,card_id,amount,use_chip,merchant_id,merchant_city,merchant_state,zip,mcc,...,expires,cvv,has_chip,num_cards_issued,credit_limit,acct_open_date,year_pin_last_changed,card_on_dark_web,id,target
0,2015-12-26 15:43:00,1391,4107,$24.87,Online Transaction,41184,ONLINE,,,5310,...,08/2019,71,YES,2,$2559,08/2002,2012,No,17147670,Yes
1,2019-08-27 12:08:00,1128,4583,$182.16,Chip Transaction,48919,Rome,Italy,,5311,...,01/2020,785,YES,2,$8600,09/2005,2011,No,23453647,Yes
2,2015-09-26 12:51:00,1928,2442,$-383.00,Online Transaction,51300,ONLINE,,,3359,...,09/2022,740,YES,1,$31,04/2003,2010,No,16720254,Yes
3,2012-07-11 18:54:00,1466,5884,$16.51,Swipe Transaction,28407,Seaford,DE,19973.0,4121,...,09/2020,405,YES,1,$1866,12/2007,2014,No,11381800,No
4,2012-06-16 08:07:00,1963,4565,$22.91,Swipe Transaction,18534,Atlanta,GA,30325.0,4121,...,12/2023,234,YES,1,$20700,09/2004,2012,No,11267846,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26659,2016-03-01 11:18:00,868,322,$7.88,Online Transaction,94989,ONLINE,,,5661,...,07/2022,21,YES,2,$5000,01/2013,2013,No,17450635,Yes
26660,2011-01-10 12:51:00,1613,4538,$-90.00,Swipe Transaction,59935,Farmington,ME,4938.0,5499,...,06/2022,270,YES,2,$109,09/2003,2009,No,9006584,No
26661,2012-12-21 15:00:00,1236,5351,$26.67,Swipe Transaction,12467,Colorado Springs,CO,80913.0,5300,...,01/2024,459,YES,1,$4304,11/1997,2012,No,12102281,No
26662,2010-12-12 03:19:00,737,148,$39.50,Online Transaction,18563,ONLINE,,,4121,...,06/2020,700,NO,1,$6789,01/2007,2009,No,8881613,Yes
