# Detecção de Fraudes: Protótipo

In [8]:
import json
import joblib
import requests

import numpy as np
import pandas as pd

from flask import Flask, jsonify, request

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.dummy import DummyClassifier

from lightgbm import LGBMClassifier

In [10]:
df = pd.read_parquet('../data/processed/creditcard.parquet')
df.shape

(284807, 31)

In [11]:
df.head()

Unnamed: 0,time,v1,v2,v3,v4,v5,v6,v7,v8,v9,...,v21,v22,v23,v24,v25,v26,v27,v28,amount,class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [12]:
# class balance
display(df['class'].value_counts())
df['class'].value_counts().values / len(df['class'])

0    284315
1       492
Name: class, dtype: int64

array([0.99827251, 0.00172749])

In [13]:
# missing values
df.isnull().sum().sum() / len(df)

0.0

In [14]:
X = df.drop('class', axis=1).values
y = df['class'].values

X.shape, y.shape

((284807, 30), (284807,))

In [15]:
(X_train,
 X_test,
 y_train,
 y_test) = train_test_split(
    X, y,
    test_size=.3,
    shuffle=False
)

X_train.shape, y_train.shape

((199364, 30), (199364,))

In [16]:
dummy = DummyClassifier(strategy='stratified')
dummy.fit(X_train, y_train)
y_pred = dummy.predict(X_test)

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     85335
           1       0.01      0.01      0.01       108

    accuracy                           1.00     85443
   macro avg       0.50      0.50      0.50     85443
weighted avg       1.00      1.00      1.00     85443



In [17]:
model = LGBMClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     85335
           1       0.09      0.03      0.04       108

    accuracy                           1.00     85443
   macro avg       0.55      0.51      0.52     85443
weighted avg       1.00      1.00      1.00     85443



In [19]:
# serializando o modelo

model_path = '../model/lgbm_fraud_detection.sav'
joblib.dump(model, model_path)

['../model/lgbm_fraud_detection.sav']

In [21]:
# realizando uma predição para uma amostra

sample = df.iloc[-1][:-1]
sample_json = sample.to_json()

model.predict([sample])

array([0], dtype=int64)

In [None]:
# realizando um request para o modelo

response = requests.post(
    'http://localhost:5025/predict',
    json=sample_json
)

print(response.json())