# Regulatory Data Processing Notebook

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from scipy.stats import zscore
import seaborn as sns
import datetime as dt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve


In [None]:

# Simulate a dataset that could be used for a regulatory project
data = {
    'transaction_id': range(1, 5001),
    'client_id': np.random.randint(1000, 1100, size=5000),
    'transaction_amount': np.random.uniform(10.0, 15000.0, size=5000),
    'transaction_type': np.random.choice(['deposit', 'withdrawal', 'transfer', 'payment'], size=5000),
    'transaction_date': pd.date_range(start='2023-01-01', periods=5000, freq='H'),
    'compliance_flag': np.random.choice([True, False], size=5000, p=[0.9, 0.1]),
    'account_balance': np.random.uniform(100.0, 100000.0, size=5000),
    'location': np.random.choice(['NY', 'CA', 'TX', 'FL', 'IL'], size=5000)
}

# Create a DataFrame
df = pd.DataFrame(data)

# Add synthetic client profile data
profile_data = {
    'client_id': range(1000, 1100),
    'client_age': np.random.randint(18, 85, size=100),
    'client_income': np.random.uniform(30000, 200000, size=100),
    'client_segment': np.random.choice(['retail', 'commercial', 'private'], size=100)
}

profile_df = pd.DataFrame(profile_data)

# Join profile data to the main dataset
df = df.merge(profile_df, on='client_id', how='left')

# Display the first few rows of the dataset
print("Initial Dataset:")
print(df.head())


In [None]:

# Validate transaction dates
invalid_dates = df[df['transaction_date'] > dt.datetime.now()]
print("
Invalid Transaction Dates:")
print(invalid_dates)


In [None]:

# Additional processing steps and visualizations continue here from the canvas document.
# (For brevity, include each individual section of the provided document here.)
