In [1]:
import os
import json
from typing import List
from pydantic import BaseModel, Field

class TransactionAnalysisDetail(BaseModel):
    txn_fraud_category:str=Field(...)
    reasoning:str = Field(...)

class TransactionAnalysis(BaseModel):
    txn_id:str = Field(...)
    txn_analysis_detail: TransactionAnalysisDetail = Field(...)

class AnalysisResponse(BaseModel):
    transactions : List[TransactionAnalysis]

class TransactionRequestDetail(BaseModel):
    card_number: str = Field(...)
    timestamp: str = Field(...)
    amount: float= Field(...)
    merchant: str= Field(...)
    category: str= Field(...)
    location: str= Field(...)


class TransactionRequest(BaseModel):
    txn_id:str = Field(...)
    txn_details: TransactionRequestDetail = Field(...)
    txn_fraud_category: str= Field(...)

class AnalysisRequest(BaseModel):
    transactions : List[TransactionRequest]

results_file_name = "../transactions_feed_results.json"
if os.path.exists(results_file_name):
    with open(results_file_name, 'r') as rf:
        actual_result=json.load(rf)
        # print("actual_result: \n",actual_result)
        ground_truth_list = AnalysisRequest.model_validate({"transactions":actual_result})
print("ground_truth_list: \n",ground_truth_list)


ground_truth_list: 
 transactions=[TransactionRequest(txn_id='TXN_101', txn_details=TransactionRequestDetail(card_number='1234-5678-9012-3456', timestamp='2023-10-27T09:00:00Z', amount=4.5, merchant='Daily Grind Coffee', category='Food & Drink', location='London, UK'), txn_fraud_category='legitimate'), TransactionRequest(txn_id='TXN_102', txn_details=TransactionRequestDetail(card_number='1234-5678-9012-3456', timestamp='2023-10-27T09:00:00Z', amount=4.5, merchant='Daily Grind Coffee', category='Food & Drink', location='India, UK'), txn_fraud_category='legitimate')]


In [2]:
def calculate_accuracy_score(model_result:AnalysisResponse)->float:
    print("model_result: \n",model_result)
    model_predictions = model_result.transactions
    truth_lookup =  {txn_request.txn_id: txn_request for txn_request in ground_truth_list.transactions}

    correct_count = 0
    total_count = 0

    for prediction in model_predictions:
        txn_id = prediction.txn_id

        # Get the model's guess
        predicted_label = prediction.txn_analysis_detail.txn_fraud_category

        # Retrieve the TRUTH instantly using the ID
        if txn_id in truth_lookup:
            actual_label = truth_lookup[txn_id].txn_fraud_category
            if predicted_label.lower() == actual_label.lower():
                correct_count += 1
                print(f"✅ {txn_id}: Match!")
            else:
                print(f"❌ {txn_id}: Mismatch (Model: {predicted_label}, Actual: {actual_label})")

            total_count += 1
        else:
            print(f"⚠️ Warning: Model predicted {txn_id}, but it's not in our ground truth!")

    # 4. Final Score
    accuracy = (correct_count / total_count) * 100
    print(f"\nFinal Accuracy: {accuracy}%")
    return round(accuracy,2)

In [3]:
import os
import json

txn_file_name= "../transactions_feed.json"
if os.path.exists(txn_file_name):
    with open(txn_file_name, 'r') as f:
        txns = json.load(f)
    serialized_txn = json.dumps(txns, indent=2)


In [4]:
import ollama
# --- 1. Setup Models ---
models_to_evaluate = [
                        'llama3.2:latest',
                       'mistral:7b',
                        'phi3:mini'
]
print("Checking available models...")
available_models = [mod['model'] for mod in ollama.list()['models']]
available_models

Checking available models...


['llama3.2:latest',
 'storyteller:latest',
 'mistral:7b',
 'llama3.1:latest',
 'phi3:mini',
 'llama3.2:3b']

In [5]:
system_prompt = "You are a Financial Fraud Investigator. Analyse the transactions submitted and categorize as Legitimate or Suspicious. As output, for each transaction, give Category (as txn_fraud_category) and Reasoning (as reasoning) in JSON"
payload=[
    {"role" :"system",
     "content":system_prompt},
    {"role":"user",
     "content":serialized_txn}
]

# options_map = {
#     "deterministic": {"temperature": 0.1, "top_p": 0.95},
#     # "balanced": {"temperature": 0.5, "top_p": 0.9},
#     # "creative": {"temperature": 0.9, "top_p": 0.85}

options= {
   "temperature": 0.1, "top_p": 0.95}

In [6]:
evaluation_results = []

for model in models_to_evaluate:
    if model not in available_models:
        # print(f"\nModel '{model}' is missing. Pulling now...")
        response = ollama.pull(model, stream=True)
        for progress in response:
            print(f"  {progress.get('status')}", end='\r')
        # print(f"\nFinished pulling {model}")
    else:
        print(f"Model '{model}' is already available, processing the request now.....")
        result = ollama.chat(model=model, messages=payload, stream=False, options=options, format=AnalysisResponse.model_json_schema())
        analysis_response = AnalysisResponse.model_validate_json(result.message.content)
        print("analysis_response==",analysis_response)
        accuracy_score=calculate_accuracy_score(analysis_response)
        # Convert to seconds first
        duration_ns = result['total_duration']
        total_seconds = duration_ns / 1_000_000_000
        # Calculate minutes and remaining seconds
        minutes, seconds = divmod(total_seconds, 60)
        speed_display = f"{int(minutes)}m {seconds:.2f}s"
        print(f"Time taken: {speed_display}")
        print(f"Accuracy: {accuracy_score}")
        model_eval= {"model": model, "duration": speed_display, "accuracy": accuracy_score}
        evaluation_results.append(model_eval)


Model 'llama3.2:latest' is already available, processing the request now.....
analysis_response== transactions=[TransactionAnalysis(txn_id='TXN_101', txn_analysis_detail=TransactionAnalysisDetail(txn_fraud_category='Legitimate', reasoning='Transaction details match and location is consistent with merchant information.')), TransactionAnalysis(txn_id='TXN_102', txn_analysis_detail=TransactionAnalysisDetail(txn_fraud_category='Suspicious', reasoning="Location differs from merchant's stated location, indicating potential false or misinformed merchant information."))]
model_result: 
 transactions=[TransactionAnalysis(txn_id='TXN_101', txn_analysis_detail=TransactionAnalysisDetail(txn_fraud_category='Legitimate', reasoning='Transaction details match and location is consistent with merchant information.')), TransactionAnalysis(txn_id='TXN_102', txn_analysis_detail=TransactionAnalysisDetail(txn_fraud_category='Suspicious', reasoning="Location differs from merchant's stated location, indicating

In [7]:
import pandas as pd

df = pd.DataFrame(evaluation_results)

df.round(2)

Unnamed: 0,model,duration,accuracy
0,llama3.2:latest,0m 18.64s,50.0
1,mistral:7b,0m 45.47s,50.0
2,phi3:mini,0m 22.37s,50.0
