In [5]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Number of records
num_records = 10000

# Simulate data for each column
trade_id = [f"T{str(i).zfill(5)}" for i in range(1, num_records + 1)]
trade_type = np.random.choice(['Buy', 'Sell'], size=num_records)
instrument_type = np.random.choice(['Equity', 'Bond', 'Derivative'], size=num_records)
trade_value = np.random.lognormal(mean=13, sigma=1, size=num_records)
trade_volume = np.random.randint(1, 10000, size=num_records)
counterparty_id = [f"CP{np.random.randint(1, 500)}" for _ in range(num_records)]
counterparty_risk_score = np.random.uniform(0, 1, size=num_records)
counterparty_failures = np.random.randint(0, 10, size=num_records)
settlement_status = np.random.choice(['Success', 'Fail'], size=num_records, p=[0.9, 0.1])
settlement_duration = np.random.normal(48, 10, size=num_records)
market_volatility = np.random.uniform(0.1, 0.5, size=num_records)
liquidity = np.random.uniform(0.01, 0.1, size=num_records)
processing_time = np.random.randint(1, 60, size=num_records)
manual_intervention = np.random.choice([0, 1], size=num_records, p=[0.7, 0.3])
time_to_settle = settlement_duration + np.random.randint(-5, 5, size=num_records)
counterparty_failure_rate = counterparty_failures / 100
currencies = np.random.choice(['USD', 'EUR', 'GBP', 'INR', 'JPY'], size=num_records)
settlement_type = np.random.choice(['FoP', 'AoP'], size=num_records)
client_ids = [f"CL{np.random.randint(1, 300)}" for _ in range(num_records)]

# Generate 50 known ISINs
known_isins = [f"INE{str(i).zfill(6)}{np.random.choice(list('ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'))}" for i in range(50)]
isins = np.random.choice(known_isins, size=num_records)

# Expanded Settlement Failure Reasons
failure_reasons = [
    'Insufficient Funds', 
    'Invalid Counterparty', 
    'Technical Error', 
    'Market Closure', 
    'Documentation Issue',
    'Insufficient Funds at the Counterparty',
    'Issues at the CSD',
    'Client Instruction Missing',
    'Client Short to Deliver',
    'Counterparty Short to Deliver',
    'Currency Difference',
    'Account Difference',
    'Quantity Difference'
]

# Add Settlement Failure Reason column
settlement_failure_reason = [
    np.random.choice(failure_reasons) if status == 'Fail' else None 
    for status in settlement_status
]

# Add Trade Matched or Not column
trade_matched = np.random.choice(['Matched', 'Not Matched'], size=num_records, p=[0.95, 0.05])

# Generate random Expected Settlement Dates within the past year
expected_settlement_dates = pd.to_datetime(
    [datetime(2024, 1, 1) + timedelta(days=np.random.randint(0, 365)) for _ in range(num_records)]
)

# Generate Actual Settlement Dates based on Settlement Status
actual_settlement_dates = [
    date + timedelta(days=np.random.randint(1, 6)) if status == 'Fail' else date
    for date, status in zip(expected_settlement_dates, settlement_status)
]

# Calculate Days Difference
days_difference = [
    (actual_date - expected_date).days
    for actual_date, expected_date in zip(actual_settlement_dates, expected_settlement_dates)
]

# Combine data into a DataFrame
data = {
    'Trade ID': trade_id,
    'Trade Type': trade_type,
    'Instrument Type': instrument_type,
    'Trade Value': trade_value,
    'Trade Volume': trade_volume,
    'Counterparty ID': counterparty_id,
    'Counterparty Risk Score': counterparty_risk_score,
    'Counterparty Failures': counterparty_failures,
    'Settlement Status': settlement_status,
    'Settlement Failure Reason': settlement_failure_reason,
    'Settlement Duration': settlement_duration,
    'Market Volatility': market_volatility,
    'Liquidity': liquidity,
    'Processing Time': processing_time,
    'Manual Intervention': manual_intervention,
    'Time to Settle': time_to_settle,
    'Counterparty Failure Rate': counterparty_failure_rate,
    'Currency': currencies,
    'Settlement Type': settlement_type,
    'Client ID': client_ids,
    'ISIN': isins,
    'Trade Matched or Not': trade_matched,
    'Expected Settlement Date': expected_settlement_dates,
    'Actual Settlement Date': actual_settlement_dates,
    'Days Difference': days_difference
}

df = pd.DataFrame(data)

# Save dataset to CSV
df.to_csv('TradeSettlementDetails.csv', index=False)

print("Dataset created and saved as 'TradeSettlementDetails.csv'.")


Dataset created and saved as 'TradeSettlementDetails.csv'.
