In [30]:
import pandas as pd
import json
import requests
import os
import time
from sklearn.preprocessing import MinMaxScaler

In [44]:
ep = "http://localhost:8501/api/prediction"

In [45]:
# test conn
requests.get(ep + "/sanityCheck").content

b'"prediction fine"'

In [46]:
API_URL = ep + "/batch"

BATCH_DIR = "datasets/simulation_perday_topredict" 
OUTPUT_DIR = "predicted_results"
os.makedirs(OUTPUT_DIR, exist_ok=True)

def predict_batch(csv_file_path):
    """
    Reads a CSV, sends it to the API, and saves the results.
    """
    print(f"--- Processing {csv_file_path} ---")
    df = pd.read_csv(csv_file_path)

    try:
        df = df.drop(columns=["Time"])
    except Exception as e:
        print(e)
        
        
    if df.shape[1] != 29:
        return f"ERROR: Expected 29 columns, got {df.shape[1]}. Check your CSV format."

    # SCALE AMOUNT
    scaler = MinMaxScaler()
    df["scaled_amount"] = scaler.fit_transform(df["Amount"].values.reshape(-1, 1))
    df = df.drop(columns=["Amount"])
    
    # Convert dataframe to list of lists
    feature_lists = df.values.tolist()
    
    # Wrap each row in a "features" dictionary
    transactions_payload = [{"features": row} for row in feature_lists]
    
    # Create final JSON body
    payload = {"transactions": transactions_payload}
    
    # 3. Send Request to API
    start_time = time.time()
    try:
        response = requests.post(API_URL, json=payload)
        response.raise_for_status() # Raise error for 4xx or 5xx
        
        duration = time.time() - start_time
        print(f"‚úÖ API Success! Took {duration:.2f} seconds for {len(df)} rows.")
        
    except requests.exceptions.RequestException as e:
        print(f"‚ùå API Error: {e}")
        if response:
             print(f"Response: {response.text}")
        return None

    # 4. Parse Response
    api_data = response.json()
    predictions = api_data['transactions']
    
    # 5. Merge Results back into DataFrame
    pred_df = pd.DataFrame(predictions)
    
    # Combine original features with prediction results
    final_df = pd.concat([df, pred_df], axis=1)
    
    return final_df

    
result_df = predict_batch("./datasets/simulation_perday_topredict/data_to_predict_1.csv")
if result_df is not None:    
    # Look for Fraud
    frauds = result_df[result_df['status'] == 'FRAUD_RISK']
    print(f"\nüö® Found {len(frauds)} potential fraud cases in this batch.")
    
    # Save to CSV
    save_path = os.path.join(OUTPUT_DIR, f"scored_test.csv")
    result_df.to_csv(save_path, index=False)
    print(f"Saved scored data to {save_path}")

--- Processing ./datasets/simulation_perday_topredict/data_to_predict_1.csv ---
‚úÖ API Success! Took 0.51 seconds for 9784 rows.

üö® Found 136 potential fraud cases in this batch.
Saved scored data to predicted_results/scored_test.csv
