In [1]:
import os
import pickle
import time
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LogisticRegression
import warnings

warnings.filterwarnings('ignore')

def prepOneHotEncoder(df, col, pathPackages):
    oneHotEncoder = pickle.load(open(os.path.join(pathPackages,'prep' + col + '.pkl') , 'rb'))
    dfOneHotEncoder = pd.DataFrame(oneHotEncoder.transform(df[[col]]).toarray(),
                                   columns=[col + "_" + str(i+1) for i in range(len(oneHotEncoder.categories_[0]))])
    df = pd.concat([df.drop(col, axis=1), dfOneHotEncoder], axis=1)
    return df

def prepStandardScaler(df, col, pathPackages):
    scaler = pickle.load(open(os.path.join(pathPackages,'prep' + col + '.pkl'), 'rb'))
    df[col] = scaler.transform(df[[col]])
    return df

def runModel(data, path):
    pathPackages = os.path.join(path, "packages") + ""
    col = pickle.load(open(os.path.join(pathPackages, 'columnModelling.pkl'), 'rb'))
    df = pd.DataFrame(data, index=[0])
    df = df[col]

    df = prepOneHotEncoder(df, 'type', pathPackages)

    cols_to_scale = ['amount', 'oldbalanceOrg', 'newbalanceOrig', 'oldbalanceDest', 'newbalanceDest']
    for col in cols_to_scale:
        df = prepStandardScaler(df, col, pathPackages)

    X = df.values
    model = pickle.load(open(os.path.join(pathPackages, 'modelFraud.pkl'), 'rb'))
    y = model.predict(X)[0]
    if y == 0:
        return "White List"
    else:
        return "Fraud"

if __name__ == "__main__":
    pathPackages = os.getcwd() + "\\" + "packages" + "\\"
    target = 'isFraud'
    
    data = pd.read_csv(pathPackages + 'Fraud_Detection.csv')
    data = data.drop(['nameOrig', 'nameDest'], axis=1)
    
    df = data.drop(target, axis=1)
    pickle.dump(df.columns.tolist(), open(pathPackages + 'columnModelling.pkl', 'wb'))

    colOneHotEncoder = ['type']
    for col in colOneHotEncoder:
        df = prepOneHotEncoder(df, col, pathPackages)

    colprepStandardScaler = ['amount', 'oldbalanceOrg', 'newbalanceOrig', 'oldbalanceDest', 'newbalanceDest']
    for col in colprepStandardScaler:
        df = prepStandardScaler(df, col, pathPackages)

    X = df.values.tolist()
    y = data[[target]].values.ravel()
    
    start = time.time()
    model = LogisticRegression()
    model.fit(X, y)
    stop = time.time()
    
    with open(pathPackages + 'modelFraud.pkl', 'wb') as file:
        pickle.dump(model, file)
    print(f"Training model done in {stop-start} seconds...")

Preprocessing data type has been saved...
Preprocessing data amount has been saved...
Preprocessing data oldbalanceOrg has been saved...
Preprocessing data newbalanceOrig has been saved...
Preprocessing data oldbalanceDest has been saved...
Preprocessing data newbalanceDest has been saved...
Training model done in 0.9316391944885254 seconds...
