# BASIC AI AGENTS STRUCTURE

## Imports and configurations

Create `.env` file with:
```
OPENAI_API_KEY=YOUR_KEY
```
Make sure you are able to install all necessary libraries

In [None]:
%pip install langchain langgraph langchain-openai openai tiktoken pandas numpy pydantic python-dotenv rich faker

Note: you may need to restart the kernel to use updated packages.


ERROR: Could not find a version that satisfies the requirement random (from versions: none)

[notice] A new release of pip is available: 24.3.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip
ERROR: No matching distribution found for random


In [4]:
import logging

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
    handlers=[logging.StreamHandler()]
)
logger = logging.getLogger("MAS-Pipeline")

In [11]:
from dotenv import load_dotenv
import os, json, re
import pandas as pd
import numpy as np
from pydantic import BaseModel, Field, confloat
from typing import List, Optional, Literal, Dict, Any
from rich import print as rprint

import random
from faker import Faker
from datetime import datetime, timedelta

# Updated imports for LangChain v0.2+
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

from langgraph.graph import StateGraph, END
from langgraph.checkpoint.memory import MemorySaver

# OpenAI LLM import
from langchain_openai import ChatOpenAI

# Load env vars
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

MODEL_NAME = "gpt-4o-mini"
assert OPENAI_API_KEY, "Set OPENAI_API_KEY env var in .env"

llm = ChatOpenAI(
    api_key=OPENAI_API_KEY,
    model_name=MODEL_NAME,
    temperature=0,
    max_tokens=700
)

rprint(f"[bold green]OpenAI (LangChain) ready with model: {MODEL_NAME}[/bold green]")


## Generate fake datasets to study

In [None]:
fake = Faker()
Faker.seed(42)
np.random.seed(42)
random.seed(42)


def generate_customers(n_customers: int = 20) -> pd.DataFrame:
    customer_ids = [f"CUST-{1000+i}" for i in range(n_customers)]
    countries = ["US", "UK", "DE", "CN", "IN", "SG", "NG", "BR", "AE", "RU"]
    risk_levels = ["low", "medium", "high"]

    data = {
        "customer_id": customer_ids,
        "name": [fake.name() for _ in range(n_customers)],
        "dob": [fake.date_of_birth(minimum_age=18, maximum_age=80) for _ in range(n_customers)],
        "country": [random.choice(countries) for _ in range(n_customers)],
        "kyc_level": [random.choice(["basic", "enhanced", "simplified"]) for _ in range(n_customers)],
        "account_open_date": [
            fake.date_between(start_date="-5y", end_date="today") for _ in range(n_customers)
        ],
        "balance_usd": np.round(np.random.lognormal(mean=9, sigma=0.8, size=n_customers), 2),
    }
    return pd.DataFrame(data)


def generate_transactions(customers: pd.DataFrame, n_trx: int = 300) -> pd.DataFrame:
    trx_ids = [f"TX-{100000+i}" for i in range(n_trx)]
    customer_ids = np.random.choice(customers["customer_id"], size=n_trx)
    currencies = ["USD", "EUR", "GBP", "BTC", "ETH", "USDT"]
    trx_types = ["deposit", "withdrawal", "transfer", "crypto_buy", "crypto_sell"]

    start_date = datetime.now() - timedelta(days=365)
    timestamps = [start_date + timedelta(minutes=random.randint(0, 525600)) for _ in range(n_trx)]

    amounts = np.random.lognormal(mean=7, sigma=1, size=n_trx)

    deposit_desc = [
        "Salary payment",
        "Freelance project payout",
        "Refund from online purchase",
        "Bank deposit via ATM",
        "Investment income credit",
        "Cash deposit at branch"
    ]

    withdrawal_desc = [
        "ATM cash withdrawal",
        "Online purchase at Amazon",
        "Utility bill payment",
        "Debit card transaction",
        "Rent payment",
        "Credit card bill payment"
    ]

    transfer_desc = [
        "Transfer to family account",
        "Wire transfer to offshore entity",
        "Peer-to-peer transfer",
        "Payment for services",
        "Internal transfer between accounts",
        "Transfer to business partner"
    ]

    crypto_buy_desc = [
        "Crypto purchase via Coinbase",
        "Buy order on Binance",
        "Converted USD to BTC",
        "Bought ETH for staking",
        "Crypto top-up using card"
    ]

    crypto_sell_desc = [
        "Sold BTC for USD",
        "Converted ETH to USDT",
        "Crypto liquidation transaction",
        "Payout from exchange wallet",
        "Sale of digital assets"
    ]

    desc_map = {
        "deposit": deposit_desc,
        "withdrawal": withdrawal_desc,
        "transfer": transfer_desc,
        "crypto_buy": crypto_buy_desc,
        "crypto_sell": crypto_sell_desc
    }

    tx_type_values = np.random.choice(trx_types, size=n_trx)
    descriptions = [
        random.choice(desc_map[tt]) for tt in tx_type_values
    ]

    data = {
        "trx_id": trx_ids,
        "timestamp": timestamps,
        "customer_id": customer_ids,
        "currency": np.random.choice(currencies, size=n_trx),
        "amount": np.round(amounts, 2),
        "trx_type": tx_type_values,
        "description": descriptions,
    }
    return pd.DataFrame(data)



def generate_crypto_activity(customers: pd.DataFrame, n_wallets: int = 5) -> pd.DataFrame:
    wallet_ids = [f"WALLET-{i:05d}" for i in range(n_wallets)]
    assets = ["BTC", "ETH", "USDT", "SOL", "BNB", "DOGE"]
    data = {
        "wallet_id": wallet_ids,
        "customer_id": np.random.choice(customers["customer_id"], size=n_wallets),
        "crypto_asset": np.random.choice(assets, size=n_wallets),
        "wallet_address": [fake.sha256(raw_output=False)[:20] for _ in range(n_wallets)],
        "balance": np.round(np.random.lognormal(mean=2, sigma=1.2, size=n_wallets), 4),
        "on_chain_tx_count": np.random.poisson(25, n_wallets),
    }
    return pd.DataFrame(data)


customers = generate_customers()
trx = generate_transactions(customers)
crypto = generate_crypto_activity(customers)

In [22]:
# Familiarize with the data
print("CUSTOMERS DATA")
print(customers.head())
print("\n")
print(customers.dtypes)
print("\n\n")
print("TRANSACTIONS DATA")
print(trx.head())
print("\n")
print(trx.dtypes)
print("\n\n")
print("CRYPTO DATA")
print(crypto.head())
print("\n")
print(crypto.dtypes)
print("\n\n")

CUSTOMERS DATA
  customer_id             name         dob country   kyc_level  \
0   CUST-1000     Allison Hill  2007-10-23      UK       basic   
1   CUST-1001      Noah Rhodes  1988-05-22      US  simplified   
2   CUST-1002  Angie Henderson  1991-09-01      IN  simplified   
3   CUST-1003    Daniel Wagner  1982-09-27      CN  simplified   
4   CUST-1004  Cristian Santos  1959-10-13      CN  simplified   

  account_open_date  balance_usd  
0        2025-08-29     12056.65  
1        2023-07-06      7254.58  
2        2022-08-23     13604.43  
3        2025-01-30     27403.44  
4        2024-03-14      6718.88  


customer_id           object
name                  object
dob                   object
country               object
kyc_level             object
account_open_date     object
balance_usd          float64
dtype: object



TRANSACTIONS DATA
       tx_id                  timestamp customer_id currency   amount  \
0  TX-100000 2025-01-09 05:13:55.688952   CUST-1014      BTC   62

## Advanced feature engineering

In [None]:
# Amount Z-Score
amt_mean = trx['amount'].mean(); amt_std = trx['amount'].std(ddof=0) or 1.0
trx['amount_z'] = (trx['amount'] - amt_mean) / amt_std

# Transaction Velocity
trx = trx.sort_values(['customer_id','timestamp'])
trx['number_txn_7d'] = 0
for cid, grp in trx.groupby('customer_id'):
    idxs = grp.index.tolist()
    for i, idx in enumerate(idxs):
        d = grp.loc[idx, 'timestamp']
        n = grp[(grp['timestamp'] >= d - pd.Timedelta(days=7)) & (grp['timestamp'] <= d)].shape[0]
        trx.loc[idx, 'number_txn_7d'] = n

# Keyword Flag
kw = re.compile(r"\b(crypto|cash|gift|urgent|refund|invoice split|transfer split|exchange)\b", re.I)
trx['kw_flag'] = trx['description'].fillna("").str.contains(kw).astype(int)


trx.head(10)

  trx['kw_flag'] = trx['description'].fillna("").str.contains(kw).astype(int)


Unnamed: 0,tx_id,timestamp,customer_id,currency,amount,tx_type,description,amount_z,c_txn_7d,kw_flag
242,TX-100242,2024-11-08 09:05:55.688952,CUST-1000,ETH,1145.62,transfer,Peer-to-peer transfer,-0.302542,1,0
74,TX-100074,2024-11-11 01:00:55.688952,CUST-1000,USDT,1717.54,deposit,Cash deposit at branch,-0.062231,2,1
274,TX-100274,2024-11-28 22:00:55.688952,CUST-1000,BTC,766.47,transfer,Transfer to family account,-0.461854,1,0
212,TX-100212,2024-12-01 14:54:55.688952,CUST-1000,EUR,1343.61,crypto_sell,Sale of digital assets,-0.21935,2,0
64,TX-100064,2024-12-06 23:47:55.688952,CUST-1000,BTC,234.22,crypto_buy,Converted USD to BTC,-0.685496,2,0
153,TX-100153,2024-12-12 03:04:55.688952,CUST-1000,USD,416.92,crypto_buy,Crypto top-up using card,-0.608729,2,1
199,TX-100199,2024-12-14 07:24:55.688952,CUST-1000,BTC,1797.96,withdrawal,Credit card bill payment,-0.02844,2,0
130,TX-100130,2024-12-15 13:15:55.688952,CUST-1000,ETH,224.42,crypto_sell,Payout from exchange wallet,-0.689614,3,1
272,TX-100272,2024-12-27 11:20:55.688952,CUST-1000,USD,2609.33,crypto_buy,Bought ETH for staking,0.312483,1,0
169,TX-100169,2024-12-27 13:48:55.688952,CUST-1000,ETH,246.51,crypto_buy,Buy order on Binance,-0.680332,2,0


## Define data schemas

In [None]:
# Datasets schemas
class Customer(BaseModel):
    customer_id: constr(regex=r"^CUST-\d+$") = Field(..., description="Unique customer ID")
    name: str = Field(..., description="Customer full name")
    dob: date = Field(..., description="Date of birth")
    country: str = Field(..., description="Country of residence (ISO or label)")
    kyc_level: Literal["simplified", "basic", "enhanced"] = Field(..., description="KYC verification level")
    account_open_date: date = Field(..., description="Date when the account was opened")
    balance_usd: confloat(ge=0) = Field(..., description="Current account balance in USD")


class Transaction(BaseModel):
    trx_id: constr(regex=r"^TX-\d+$") = Field(..., description="Unique transaction ID")
    timestamp: datetime = Field(..., description="Timestamp of transaction")
    customer_id: constr(regex=r"^CUST-\d+$") = Field(..., description="Reference to customer ID")
    currency: Literal["USD", "EUR", "GBP", "BTC", "ETH", "USDT"] = Field(..., description="Currency used in transaction")
    amount: confloat(gt=0) = Field(..., description="Transaction amount in given currency")
    trx_type: Literal["deposit", "withdrawal", "transfer", "crypto_buy", "crypto_sell"] = Field(..., description="Type of transaction")
    description: str = Field(..., description="Narrative text describing transaction purpose")


class Crypto(BaseModel):
    wallet_id: constr(regex=r"^WALLET-\d+$") = Field(..., description="Unique wallet identifier")
    customer_id: constr(regex=r"^CUST-\d+$") = Field(..., description="Reference to customer ID")
    crypto_asset: Literal["BTC", "ETH", "USDT", "SOL", "BNB", "DOGE"] = Field(..., description="Crypto asset type")
    wallet_address: str = Field(..., description="Shortened wallet address hash")
    balance: confloat(ge=0) = Field(..., description="Current wallet balance")
    on_chain_tx_count: int = Field(..., description="Number of on-chain transactions")

# RiskLabel schema
class RiskLabel(BaseModel):
    risk_level: Literal["Low", "Medium", "High", "Very High"]
    score: confloat(ge=0.0, le=1.0) = 0.0
    reason: str

# EscalationDecision schema
class EscalationDecision(BaseModel):
    create_case: bool
    severity: Optional[Literal["Low", "Medium", "High", "Very High", "Critical"]] = None
    reason: str

# ReportDoc schema
class ReportDoc(BaseModel):
    title: str = Field(..., description="[__A concise and descriptive title for the case__]")
    summary: str = Field(..., description="[__A short, neutral summary of the transaction and key findings__]")
    main_risks: List[str] = Field(..., description="[__A list of key red flags or risk factors observed__]")
    counterparty_context: str = Field(..., description="[__Relevant details about the customer or entities involved__]")
    timeline: List[str] = Field(..., description="[__A chronological sequence of important events__]")
    recommendation: str = Field(..., description="[__The suggested next step, e.g., 'Escalate for review'__]")

    class Config:
        extra = "forbid"

# CaseState schema
class CaseState(BaseModel):
    txn: Optional[Transaction]
    risk: Optional[RiskLabel]
    decision: Optional[EscalationDecision]
    report: Optional[ReportDoc]
    logs: List[str] = []

