In [1]:
import os
import json
import dill
import numpy as np
import pandas as pd
import requests
from sklearn_pandas import DataFrameMapper
from tensorflow import keras
from typing import Dict

In [2]:
MODEL_NAME = "fraud-detection-fd6e7"
NAMESPACE = "user-example-com"
HOST = f"{MODEL_NAME}-predictor-default.{NAMESPACE}"
HEADERS = {"Host": HOST}
MODEL_ENDPOINT = f"http://{MODEL_NAME}-predictor-default/v2/models/model"
PREDICT_ENDPOINT = MODEL_ENDPOINT + "/infer"

In [3]:
merchants = {
    0: {"hash": 0, "name": "Sandwiches & Books Outlet"},
    1: {"hash": 0, "name": "Treasures & Gadgets Shop"},
    2: {"hash": 0, "name": "Lucky Treasures Mart"},
    3: {"hash": 0, "name": "Gas Station"},
    4: {"hash": 0, "name": "The Cool Store"},
    5: {"hash": 0, "name": "Sunny Gifts Boutique"},
    6: {"hash": 0, "name": "Gadgets & Books"},
    7: {"hash": 0, "name": "Bobs Diner"},
    8: {"hash": 0, "name": "Corner Store"},
    9: {"hash": 0, "name": "Target"},
}

merchant_strings = {
    0: "-4282466774399734331",
    1: "-5467922351692495955",
    2: "-4282466774399734331",
    3: "-2088492411650162548",
    4: "-2916542501422915698",
    5: "-3626050238374547691",
    6: "-1270758750219685742",
    7: "3672572098448220151",
    8: "-5949357157231676152",
    9: "8637658108713563470",
}

for k, v in merchants.items():
    print(k, v)
    merchants[k]['hash'] = merchant_strings[k]

def load_transaction_data(data):
    with open(data, 'r') as f:
        raw_data = json.load(f)
    
    indexed_data = {}
    for record in raw_data:
        index = record['index']
        record['merchant name'] = merchant_strings[index]
        
        del record['index']
        indexed_data[index] = record
    
    return indexed_data

def transform_transaction_data(raw_data: Dict):
    transformed = {}
    
    for key, record in data.items():
        print(key, record)
        print(record['year'])
        
        transformed[key] = {
            'Transaction ID': key,
            'Amount': record['amount'],
            'Place': merchants[key]['name'],
            'Date': f"{int(record['year'])}-{int(record['month'])}-{int(record['day'])}",
            'Time': record['time'],
            'Fraud Status': "Unchecked"
            
        }

    return transformed


# Sample transactions data
data = load_transaction_data('transactions.json')
# Convert to DataFrame
transactions_df = pd.DataFrame.from_dict(data, orient='index')
# Assuming transactions_df is already defined
transactions_df['Fraud Status'] = 'Unchecked'  # Initialize all transactions as 'Unchecked'
# Assuming transactions_df is already defined
if 'Tested' not in transactions_df.columns:
    transactions_df['Tested'] = False  # Initialize all rows as not tested


transformed_data = transform_transaction_data(data)

0 {'hash': 0, 'name': 'Sandwiches & Books Outlet'}
1 {'hash': 0, 'name': 'Treasures & Gadgets Shop'}
2 {'hash': 0, 'name': 'Lucky Treasures Mart'}
3 {'hash': 0, 'name': 'Gas Station'}
4 {'hash': 0, 'name': 'The Cool Store'}
5 {'hash': 0, 'name': 'Sunny Gifts Boutique'}
6 {'hash': 0, 'name': 'Gadgets & Books'}
7 {'hash': 0, 'name': 'Bobs Diner'}
8 {'hash': 0, 'name': 'Corner Store'}
9 {'hash': 0, 'name': 'Target'}
0 {'user': 29.0, 'card': 3.0, 'year': 2011.0, 'month': 9.0, 'day': 28.0, 'time': '10:36', 'amount': '$18.61   ', 'use chip': 'Swipe Transaction ', 'merchant name': '-4282466774399734331', 'merchant city': 'Whites Creek              ', 'merchant state': 'TN                              ', 'zip': 37189.0, 'mcc': 4121.0, 'errors?': '                                                    ', 'is fraud?': 'No '}
2011.0
1 {'user': 36.0, 'card': 2.0, 'year': 2017.0, 'month': 9.0, 'day': 14.0, 'time': '12:29', 'amount': '$15.84   ', 'use chip': 'Chip Transaction  ', 'merchant name': '-546

In [15]:
class FraudDatasetTransformer:
    def __init__(self): ...

    def transform(self, dataset: pd.DataFrame, mapper: DataFrameMapper):
        """
        
        dropped columns:
            - mcc
            - zip
            - merchant state

        Args:
            dataset (pd.DataFrame): _description_
            mapper (DataFrameMapper): _description_

        Returns:
            _type_: _description_
        """
        tdf = dataset.copy()
        tdf["merchant name"] = tdf["merchant name"].astype(str)
        tdf.drop(["mcc", "zip", "merchant state"], axis=1, inplace=True)
        tdf.sort_values(by=["user", "card"], inplace=True)
        tdf.reset_index(inplace=True, drop=True)

        tdf = mapper.transform(tdf)
        return tdf


def get_df_mapper():
    with open(os.path.join("encoders", "data", "mapper.pkl"), "rb") as f:
        t_mapper = dill.load(f)
        return t_mapper


def predict(vdf: pd.DataFrame) -> pd.DataFrame:
    
    res_svc = requests.get(MODEL_ENDPOINT, headers=HEADERS)
    response_svc = json.loads(res_svc.text)
    print(response_svc)

    # Data preparation
    x = vdf.drop(vdf.columns.values[0], axis=1).to_numpy()
    y = np.array([vdf[vdf.columns.values[0]].iloc[0]])

    # Adjust the shape of x to match model expectations
    # We need to expand or repeat our single data point to match the sequence length and feature count
    # Assuming your single row is a flat array of features, reshape and repeat it
    sequence_length = response_svc['inputs'][0]['shape'][1]  # Model's expected sequence length
    num_features = response_svc['inputs'][0]['shape'][2]   # Model's expected number of features per sequence element

    # Check if the original features match the required total features
    original_features = x.shape[1]
    print(original_features)
    if original_features < num_features:
        print("pad maybe?")
        # If fewer, we may need to pad or adjust the data; this is situational and may not be exactly correct without more context
        # For now, let's assume padding with zeros is acceptable
        x_padded = np.pad(
            x,
            ((0, 0), (0, num_features - original_features)),
            mode="constant",
            constant_values=0,
        )
    else:
        print("reshape accordingly")
        # If it matches or exceeds, truncate or reshape accordingly (though unusual for a single data point)
        x_padded = x[:, :num_features]

    # Reshape to [1, sequence_length, num_features], replicating the single data point across the new sequence length
    x_reshaped = np.tile(x_padded, (sequence_length, 1)).reshape(
        1, sequence_length, num_features
    )

    # Preparing the payload
    payload = {
        "inputs": [
            {
                "name": "input_1",
                "shape": [1, sequence_length, num_features],
                "datatype": "FP32",
                "data": x_reshaped.tolist(),
            }
        ]
    }
    
    print(payload)

    # Sending the request
    res = requests.post(PREDICT_ENDPOINT, headers=HEADERS, data=json.dumps(payload))
    response = json.loads(res.text)

    # Handle response
    if "error" in response:
        print(f"Error: {response['error']}")
    else:
        print(response["outputs"])
        pred = response["outputs"][0]["data"][0]
        print(f"Actual ({y[0]}) vs. Prediction ({round(pred, 3)} => {int(round(pred, 0))})")
    
    return response

def do_predict(test_data: Dict):
    dataset_transfomer = FraudDatasetTransformer()
    test = pd.DataFrame([test_data])
    vdf = dataset_transfomer.transform(test, get_df_mapper())
    
    return predict(vdf)

In [16]:
str(do_predict({'user': 2,
  'card': 4,
  'year': 2022,
  'month': 9,
  'day': 2,
  'time': '14:09',
  'amount': '$149345.84  ',
  'use chip': 'Online Transaction',
  'merchant name': 3452760747765970571,
  'merchant city': ' ONLINE                   ',
  'merchant state': '                                ',
  'zip': 0,
  'mcc': 3174,
  'errors?': '                                                    ',
  'is fraud?': 'Yes'})['outputs'][0]['data'][0])

{'name': 'model', 'versions': ['2'], 'platform': 'onnxruntime_onnx', 'inputs': [{'name': 'input_1', 'datatype': 'FP32', 'shape': [-1, 4, 103]}], 'outputs': [{'name': 'dense', 'datatype': 'FP32', 'shape': [-1, 1]}]}
103
reshape accordingly
{'inputs': [{'name': 'input_1', 'shape': [1, 4, 103], 'datatype': 'FP32', 'data': [[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.1049505255954766, 1.3496262271969264], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,

'0.5039835572242737'