In [2]:
!pip install scikit-learn transformers



In [3]:

import random
from sklearn.ensemble import IsolationForest
from transformers import pipeline

# Sample transaction data
transactions = [
    {"amount": 120.5, "location": "New York", "time": 14},
    {"amount": 20.0, "location": "New York", "time": 9},
    {"amount": 3050.0, "location": "Houston", "time": 2},
    {"amount": 150.0, "location": "Chicago", "time": 12},
    {"amount": 10.0, "location": "New York", "time": 10},
    {"amount": 5000.0, "location": "Los Angeles", "time": 1},
]

trusted_users = ["New York", "Chicago"]  # trusted locations (proxy for user behavior)

# Extract features for anomaly detection
def extract_features(tx):
    return [tx["amount"], tx["time"]]

X = [extract_features(tx) for tx in transactions]

# Train Isolation Forest
model = IsolationForest(contamination=0.3, random_state=42)
model.fit(X)

# NLP Explanation Model from Hugging Face
nlp_model = pipeline('text-generation', model='distilgpt2')

# Score each transaction and generate alerts
def trust_score(transaction):
    # A basic heuristic: more trust if location is known/trusted
    score = 100
    if transaction["location"] not in trusted_users:
        score -= 30
    if transaction["time"] < 6 or transaction["time"] > 22:
        score -= 20
    if transaction["amount"] > 1000:
        score -= 30
    return max(score, 0)

def generate_explanation(transaction, score):
    prompt = (
        f"The transaction of ${transaction['amount']} at {transaction['time']}h "
        f"in {transaction['location']} was flagged. Trust score: {score}/100. "
        f"Explain why it may be suspicious in simple terms:\n"
    )
    output = nlp_model(prompt, max_length=100, num_return_sequences=1)
    return output[0]["generated_text"]

# Evaluate each transaction
for i, tx in enumerate(transactions):
    score = trust_score(tx)
    prediction = model.predict([extract_features(tx)])
    is_anomaly = prediction[0] == -1

    print(f"\n--- Transaction #{i+1} ---")
    print(f"Details: {tx}")
    if is_anomaly:
        print(f"Flagged as suspicious.")
        print(f"Trust Score: {score}/100")
        explanation = generate_explanation(tx, score)
        print("Explanation:")
        print(explanation)
    else:
        print("Transaction appears normal.")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/762 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/353M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cpu
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



--- Transaction #1 ---
Details: {'amount': 120.5, 'location': 'New York', 'time': 14}
Transaction appears normal.

--- Transaction #2 ---
Details: {'amount': 20.0, 'location': 'New York', 'time': 9}
Transaction appears normal.

--- Transaction #3 ---
Details: {'amount': 3050.0, 'location': 'Houston', 'time': 2}
Flagged as suspicious.
Trust Score: 20/100


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Explanation:
The transaction of $3050.0 at 2h in Houston was flagged. Trust score: 20/100. Explain why it may be suspicious in simple terms:

2. There is no real indication that a person is on a transaction that is on a shortlist of 500 to 1st of $10k. The person on this list is the person who might have just turned 10 in the last 4 days. What if a person has a shortlist of 500 to 2nd of $

--- Transaction #4 ---
Details: {'amount': 150.0, 'location': 'Chicago', 'time': 12}
Transaction appears normal.

--- Transaction #5 ---
Details: {'amount': 10.0, 'location': 'New York', 'time': 10}
Transaction appears normal.

--- Transaction #6 ---
Details: {'amount': 5000.0, 'location': 'Los Angeles', 'time': 1}
Flagged as suspicious.
Trust Score: 20/100
Explanation:
The transaction of $5000.0 at 1h in Los Angeles was flagged. Trust score: 20/100. Explain why it may be suspicious in simple terms:


A typical financial news item or piece could not be published with the word "paypal" anywhere in Ne