In [58]:
pip install streamlit gradio numpy pandas scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [60]:
import numpy as np
import pandas as pd
import streamlit as st
import gradio as gr
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

# Step 1: Simulate Historical Data (Reconciliation Data)
np.random.seed(42)

# Simulating historical transaction amounts (mean=500, std=50)
n_samples = 1000
historical_data = np.random.normal(loc=500, scale=50, size=n_samples)

# Introduce some anomalies (outliers)
historical_data[100] = 1200  # anomaly
historical_data[200] = 300   # anomaly
historical_data[500] = 800   # anomaly
historical_data[800] = 1000  # anomaly

# Create a DataFrame for historical data
df_historical = pd.DataFrame(historical_data, columns=["Transaction Amount"])
df_historical["Index"] = df_historical.index

# Step 2: Simulate Real-time Reconciliation Data
real_time_data = np.random.normal(loc=500, scale=50, size=n_samples)
real_time_data[500] = 700  # anomaly (introduce anomaly in real-time data)

df_real_time = pd.DataFrame(real_time_data, columns=["Transaction Amount"])
df_real_time["Index"] = df_real_time.index

# Step 3: Anomaly Detection using Isolation Forest (Real-time Data)
scaler = StandardScaler()

# Standardize the data for anomaly detection
scaled_historical_data = scaler.fit_transform(df_historical[["Transaction Amount"]])
scaled_real_time_data = scaler.transform(df_real_time[["Transaction Amount"]])

# Initialize and train the Isolation Forest model
model = IsolationForest(contamination=0.05, random_state=42)  # assuming 5% anomalies
df_historical['Anomaly'] = model.fit_predict(scaled_historical_data)
df_real_time['Anomaly'] = model.predict(scaled_real_time_data)

# Mark anomalies as 1 for normal, -1 for anomalies
df_real_time['Anomaly Label'] = df_real_time['Anomaly'].map({1: 'Normal', -1: 'Anomaly'})

# Step 4: Root Cause Classification using Predefined Categories (without LLMs)
def classify_anomaly(row):
    """ Classify anomaly based on predefined rules """
    if row['Transaction Amount'] > 0:
        return 'Match'
    elif row['Transaction Amount'] < 0:
        return 'Break'
    
    else:
        return 'Unknown Cause'

# Apply anomaly classification to the detected anomalies
df_real_time['Root Cause'] = df_real_time.apply(lambda row: classify_anomaly(row) if row['Anomaly Label'] == 'Anomaly' else 'Normal', axis=1)

# Step 5: Visualization and Reconciliation Break Summarization

# Function to generate a plot for the transaction amounts
def plot_transactions():
    fig, ax = plt.subplots(figsize=(10, 6))
    ax.plot(df_real_time["Index"], df_real_time["Transaction Amount"], label='Transaction Amount', color='blue', alpha=0.7)
    ax.scatter(df_real_time[df_real_time['Anomaly Label'] == 'Anomaly']["Index"],
               df_real_time[df_real_time['Anomaly Label'] == 'Anomaly']["Transaction Amount"],
               color='red', label='Anomalies', alpha=0.6)
    ax.set_title('Transaction Amount Over Time with Anomalies')
    ax.set_xlabel('Index')
    ax.set_ylabel('Transaction Amount')
    ax.legend()
    st.pyplot(fig)

# Function to generate a concise summary of reconciliation breaks
def summarize_reconciliation_break(anomalies_df):
    summary = "Reconciliation Break Summary:\n"
    for idx, row in anomalies_df.iterrows():
        summary += f"Anomaly detected at Index {row['Index']} with Transaction Amount {row['Transaction Amount']}.\n"
        summary += f"Root Cause: {row['Root Cause']}\n"
    return summary

# Function to display the results
def display_summary():
    anomalies = df_real_time[df_real_time['Anomaly Label'] == 'Anomaly']
    summary = summarize_reconciliation_break(anomalies)
    st.text(summary)

# Step 6: Gradio UI for displaying real-time anomaly detection result in an interactive manner
def gradio_interface(index):
    """Interactive Gradio interface to display transaction data and classification"""
    row = df_real_time.iloc[index]
    anomaly_details = {
        "Index": row['Index'],
        "Transaction Amount": row['Transaction Amount'],
        "Anomaly Label": row['Anomaly Label'],
        "Root Cause": row['Root Cause']
    }
    return anomaly_details

# Step 7: Streamlit App Layout
def main():
    st.title('Smarter Reconciliation and Anomaly Detection System')

    st.sidebar.header("Select options:")
    st.sidebar.subheader("Choose your visualization:")
    plot_transactions()

    st.sidebar.subheader("Reconciliation Break Summary:")
    display_summary()

    # Gradio Interface
    gradio_interface_demo = gr.Interface(fn=gradio_interface,
                                        inputs=gr.inputs.Slider(minimum=0, maximum=len(df_real_time)-1, default=0, label="Select Transaction Index"),
                                        outputs="json", live=True)
    st.subheader("Interactive Anomaly Details:")
    st.gradio(gradio_interface_demo)

if __name__ == "__main__":
    main()


AttributeError: module 'gradio' has no attribute 'inputs'