In [1]:
import json
import threading
import collections
from typing import Deque, Dict

import pandas as pd
from kafka import KafkaConsumer

import dash
from dash import dcc, html, dash_table, Output, Input
import dash_bootstrap_components as dbc
import plotly.express as px
import plotly.graph_objects as go
from sklearn.metrics import f1_score, precision_score, recall_score

In [None]:
# dashboard app for live fraud monitoring
"""
Tabs
----
* **Overview** – grouped bar + latest transactions table.
* **Scatter**  – amount × velocity scatter and cumulative fraud-percentage line.
* **Metrics**  – live **macro-F1** gauge comparing true vs predicted labels.
"""

import collections
import json
import threading
from collections import deque
from typing import Dict, Deque

import dash
import dash_bootstrap_components as dbc
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from dash import Input, Output, dash_table, dcc, html
from kafka import KafkaConsumer
from sklearn.metrics import f1_score, precision_score, recall_score

# ────────────────────────────
# Config
# ────────────────────────────
BOOTSTRAP_SERVERS = ["localhost:9092"]
TOPIC = "scored-transactions"
BUFFER_SIZE = 5_000
REFRESH_MS = 1_000  # 1-second refresh
LABEL_FILE = "test_label.csv"   # CSV with test samples labels

# ────────────────────────────
# Load true labels
# ────────────────────────────
labels_df = pd.read_csv(LABEL_FILE).astype({"transaction_id": str, "labels": int})
labels_df.rename(columns={labels_df.columns[1]: "true_label"}, inplace=True)
# map labels to strings for consistency
labels_df["true_label"] = labels_df["true_label"].map({
    1: "fraud",
    0: "no fraud"
})

# ────────────────────────────
# Kafka consumer (Live data gathering)
# ────────────────────────────
buf: Deque[Dict] = collections.deque(maxlen=BUFFER_SIZE)

def consume() -> None:
    consumer = KafkaConsumer(
        TOPIC,
        bootstrap_servers=BOOTSTRAP_SERVERS,
        value_deserializer=lambda m: json.loads(m.decode("utf-8")),
        auto_offset_reset="latest",
        enable_auto_commit=True,
    )
    for msg in consumer:
        buf.append(msg.value)

threading.Thread(target=consume, daemon=True).start()

# ────────────────────────────
# Dash app presentation
# ────────────────────────────
external_stylesheets = [dbc.themes.SPACELAB]
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
app.title = "Fraud Monitoring – Live"
px.defaults.template = "plotly_white"

# Utility for card-style graphs
def card(graph_id: str, title: str | None = None):
    return dbc.Card(
        dbc.CardBody([
            html.H5(title, className="card-title") if title else None,
            dcc.Graph(id=graph_id, config={"displayModeBar": False}),
        ]),
        className="shadow-sm rounded-4 mb-4",
    )

# Layout
app.layout = dbc.Container(
    [
        html.H2("Real-time Fraud Dashboard", className="fw-bold my-3 text-center"),
        dcc.Interval(id="refresh", interval=REFRESH_MS, n_intervals=0),
        dbc.Tabs([
            dbc.Tab(label="Overview", tab_id="overview", children=[
                card("fraud_graph"),
                html.H4("Latest Transactions", className="fw-semibold mt-3"),
                dash_table.DataTable(
                    id="recent_table",
                    page_size=10,
                    style_table={"overflowX": "auto", "borderRadius": "0.5rem"},
                    style_header={"backgroundColor": "#e9ecef", "fontWeight": "bold"},
                    style_cell={"textAlign": "left", "padding": "6px"},
                    style_data_conditional=[
                        {"if": {"column_id": "label", "filter_query": '{label} = "fraud"'},
                         "color": "#dc3545", "fontWeight": "600"},
                        {"if": {"state": "active"}, "backgroundColor": "#f1f3f4"},
                    ],
                    css=[{
                        "selector": ".dash-table-container .dash-spreadsheet-container .dash-spreadsheet tr",
                        "rule": "border-bottom: 1px solid #dee2e6;"
                    }]
                ),
            ]),
            dbc.Tab(label="Scatter", tab_id="scatter", children=[
                card("scatter_graph"),
                card("fraud_line_graph"),
            ]),
            dbc.Tab(label="Metrics", tab_id="metrics", children=[
                card("f1_graph", title="Macro-F1 Score (Live)"),
                card("precision_graph", title="Precision (Live)"),
                card("recall_graph", title="Recall (Live)"),
            ]),
        ], id="tabs", active_tab="overview", className="mb-4"),
    ],
    fluid=True,
)

# ────────────────────────────
# Get model metrics and update graphs
# ────────────────────────────
@app.callback(
    Output("fraud_graph", "figure"),
    Output("recent_table", "data"),
    Output("scatter_graph", "figure"),
    Output("fraud_line_graph", "figure"),
    Output("f1_graph", "figure"),
    Output("precision_graph", "figure"),
    Output("recall_graph", "figure"),
    Input("refresh", "n_intervals"),
)
def update_live(_: int):
    if not buf:
        empty = px.scatter(title="Waiting for data…")
        blank = go.Figure()
        return empty, [], empty, blank, blank, blank, blank

    # ---------------- Data prep ----------------
    df = pd.DataFrame(list(buf))
    df = df.drop_duplicates(subset=["transaction_id", "timestamp", "amount"], keep="last")

    df["transaction_id"] = df["transaction_id"].astype(str)
    df["pred"] = df["prediction"].apply(lambda x: "fraud" if x == 1 else "no fraud")
    df["label"] = df["pred"]  # for DataTable styling
    df["timestamp"] = pd.to_datetime(df["timestamp"])

    # ---------------- Bar chart ----------------
    counts = df.groupby(["location", "pred"]).size().reset_index(name="count")
    bar_fig = px.bar(
        counts,
        x="location",
        y="count",
        color="pred",
        color_discrete_sequence=px.colors.qualitative.Set2,
        barmode="group",
        title="Transactions by Location (live)",
    )
    bar_fig.update_layout(
        xaxis_title=None,
        yaxis_title="# Transactions",
        legend_title="Prediction",
        showlegend=True,
    )

    # ---------------- Scatter ----------------
    scatter_fig = px.scatter(
        df,
        x="amount",
        y="geo_anomaly_score",
        color="pred",
        color_discrete_sequence=px.colors.qualitative.Set2,
        opacity=0.75,
        hover_data=["transaction_id", "sender_account", "receiver_account"],
        title="Amount vs Geo Anomaly Score",
    )
    scatter_fig.update_layout(xaxis_title="Amount", yaxis_title="Geo Anomaly Score")

    # ---------------- Cumulative % line ----------------
    df_sorted = df.sort_values("timestamp").copy()
    df_sorted["cum_total_count"] = range(1, len(df_sorted) + 1)
    df_sorted["cum_fraud_count"] = (df_sorted["pred"] == "fraud").cumsum()
    df_sorted["pct_count"] = df_sorted["cum_fraud_count"] / df_sorted["cum_total_count"] * 100

    line_fig = go.Figure(go.Scatter(
        x=df_sorted["timestamp"],
        y=df_sorted["pct_count"],
        mode="lines",
        line=dict(color="#66c2a5", width=3),
        name="% Fraud Txns"
    ))
    line_fig.update_layout(
        title="Cumulative Fraud % of Transactions",
        xaxis_title="Time",
        yaxis_title="% Fraud Transactions",
        yaxis_range=[0, 100]
    )

    # ---------------- Model metrics ----------------
    joined = pd.merge(df[["transaction_id", "pred"]], labels_df, on="transaction_id", how="inner")
    if joined.empty:
        f1_val = 0.0
        precision = 0.0
        recall = 0.0
    else:
        # Ensure same label types
        joined["true_label"] = joined["true_label"].astype(str)
        f1_val = f1_score(joined["true_label"], joined["pred"], average="macro")
        precision = precision_score(joined["true_label"], joined["pred"], average="macro")
        recall = recall_score(joined["true_label"], joined["pred"], average="macro")

    f1_fig = go.Figure(go.Indicator(
        mode="gauge+number",
        value=f1_val,
        number={'valueformat': '.2f'},
        gauge={'axis': {'range': [0, 1]}},
        domain={'x': [0, 1], 'y': [0, 1]},
        title={'text': "Macro-F1 Score"}
    ))
    
    precision_fig = go.Figure(go.Indicator(
        mode="gauge+number",
        value=precision,
        number={'valueformat': '.2f'},
        gauge={'axis': {'range': [0, 1]}},
        domain={'x': [0, 1], 'y': [0, 1]},
        title={'text': "Precision"}
    ))
    
    recall_fig = go.Figure(go.Indicator(
        mode="gauge+number",
        value=recall,
        number={'valueformat': '.2f'},
        gauge={'axis': {'range': [0, 1]}},
        domain={'x': [0, 1], 'y': [0, 1]},
        title={'text': "Recall"}
    ))

    # ---------------- Latest transactions table ----------------
    display_cols = [
        "transaction_id", "timestamp", "sender_account", "receiver_account",
        "amount", "location", "label",
    ]
    recent = df[display_cols].sort_values("amount", ascending=False).head(10)
    recent["timestamp"] = recent["timestamp"].dt.strftime("%Y-%m-%d %H:%M:%S")

    return bar_fig, recent.to_dict("records"), scatter_fig, line_fig, f1_fig, precision_fig, recall_fig

# ────────────────────────────
# Main
# ────────────────────────────
if __name__ == "__main__":
    app.run(debug=True, host="0.0.0.0", port=8050)

8.221.136.170 - - [08/Jun/2025 16:44:48] code 400, message Bad request version ('À\x13À')

Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.

