In [27]:
import numpy as np
import pandas as pd

import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

from config import fetch_data


## **CUSTOMER BEHAVIOR CLUSTERING**

### Customer Behavior 3D Scatter

In [28]:
customer_behavior_3d_scatter_query = "SELECT * FROM gold.analytics_customer_behavior_clusters WHERE customer_count >= 20"
customer_behavior_3d_scatter_df = fetch_data(customer_behavior_3d_scatter_query)


fig = px.scatter_3d(
    customer_behavior_3d_scatter_df,
    x='avg_transactions_90d',
    y='avg_transaction_size',
    z='avg_total_balance',
    size='customer_count',
    color='customer_segment',
    hover_name='age_group',
    hover_data={
        'customer_count': ':,',
        'avg_transactions_90d': ':.1f',
        'avg_transaction_size': ':$,.2f',
        'avg_total_balance': ':$,.2f',
        'engagement_score': ':.2f'
    },
    title='Customer Behavior Clusters (3D)',
    labels={
        'avg_transactions_90d': 'Avg Transactions (90d)',
        'avg_transaction_size': 'Avg Transaction Size ($)',
        'avg_total_balance': 'Avg Total Balance ($)'
    },
    color_discrete_sequence=px.colors.qualitative.T10,  # vibrant and distinct
    size_max=25  # larger max marker size for visibility
)

fig.update_traces(
    marker=dict(
        opacity=0.8,
        line=dict(width=0.5, color='DarkSlateGrey')
    )
)

fig.update_layout(
    scene=dict(
        xaxis=dict(title='Transactions', backgroundcolor='rgb(240,240,240)', gridcolor='white', showbackground=True),
        yaxis=dict(title='Avg Amount ($)', backgroundcolor='rgb(240,240,240)', gridcolor='white', showbackground=True),
        zaxis=dict(title='Balance ($)', backgroundcolor='rgb(240,240,240)', gridcolor='white', showbackground=True)
    ),
    height=750,
    margin=dict(l=0, r=0, b=0, t=50),
    paper_bgcolor='white',
    plot_bgcolor='white',
    title_font=dict(size=22, family='Arial', color='black'),
    legend=dict(title='Customer Segment', font=dict(size=12))
)

# Optional: Set a better initial camera angle
fig.update_layout(
    scene_camera=dict(
        eye=dict(x=1.5, y=1.5, z=1)
    )
)

fig.show()

In [29]:
customer_behavior_3d_scatter_df

Unnamed: 0,customer_segment,age_group,income_bracket,customer_count,avg_transactions_90d,avg_transaction_size,avg_unique_categories,avg_accounts,avg_total_balance,mobile_usage_pct,weekend_activity_pct,avg_service_contacts,avg_satisfaction_score,engagement_score,last_updated
0,Premium,45-54,Very High,597,0.1,1390.07,0.1,1.8,-83253.38,19.72,26.76,0.5,0.96,7.50,2025-12-27 10:20:32.986899+00:00
1,Mass Market,25-34,Very High,595,0.1,1493.07,0.1,1.8,-88537.14,18.92,20.27,0.5,1.00,7.79,2025-12-27 10:20:32.986899+00:00
2,Premium,25-34,Very High,592,0.1,1392.72,0.1,1.8,-63824.91,13.64,24.24,0.5,0.87,7.35,2025-12-27 10:20:32.986899+00:00
3,Affluent,25-34,Very High,587,0.1,1393.82,0.1,1.7,-85268.68,25.86,34.48,0.6,0.99,7.50,2025-12-27 10:20:32.986899+00:00
4,Affluent,35-44,Very High,583,0.1,1480.58,0.1,1.8,-61962.87,19.18,24.66,0.4,0.82,7.42,2025-12-27 10:20:32.986899+00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69,Affluent,65+,Medium,37,0.1,1208.28,0.1,1.8,-84549.39,33.33,33.33,0.5,0.92,7.40,2025-12-27 10:20:32.986899+00:00
70,Mass Market,65+,Medium,30,0.1,2158.09,0.1,1.9,-161714.90,0.00,0.00,0.6,1.03,7.95,2025-12-27 10:20:32.986899+00:00
71,Premium,65+,Medium,29,0.1,1351.55,0.1,1.9,-36430.74,0.00,33.33,0.5,0.97,7.94,2025-12-27 10:20:32.986899+00:00
72,Business,25-34,Low,22,0.1,1458.33,0.1,2.0,-1460.41,0.00,0.00,0.4,0.95,8.42,2025-12-27 10:20:32.986899+00:00


### Customer Engagement Rader 

In [30]:
customer_engagement_radar_query = "SELECT * FROM gold.analytics_customer_behavior_clusters"
customer_engagement_radar_df = fetch_data(customer_engagement_radar_query)


segment_agg = customer_engagement_radar_df.groupby('customer_segment').agg({
    'avg_transactions_90d': 'mean',
    'avg_unique_categories': 'mean',
    'avg_accounts': 'mean',
    'mobile_usage_pct': 'mean',
    'avg_satisfaction_score': 'mean',
    'engagement_score': 'mean'
}).reset_index()

categories = ['Transactions', 'Categories', 'Accounts', 'Mobile %', 'Satisfaction', 'Engagement']

# Choose a vibrant color palette
colors = px.colors.qualitative.Vivid

fig = go.Figure()

for i, (_, row) in enumerate(segment_agg.iterrows()):
    values = [
        row['avg_transactions_90d'],
        row['avg_unique_categories'] * 10,  # Scale for visualization
        row['avg_accounts'] * 20,
        row['mobile_usage_pct'],
        row['avg_satisfaction_score'] * 20,
        row['engagement_score']
    ]
    fig.add_trace(go.Scatterpolar(
        r=values,
        theta=categories,
        fill='toself',
        name=row['customer_segment'],
        line=dict(color=colors[i % len(colors)], width=3),
        opacity=0.6,
        hovertemplate='<b>%{theta}</b><br>Value: %{r:.1f}<extra></extra>'
    ))

fig.update_layout(
    polar=dict(
        radialaxis=dict(visible=True, range=[0, 100], gridcolor='lightgrey', tickfont=dict(size=10)),
        angularaxis=dict(tickfont=dict(size=12))
    ),
    title=dict(
        text="Customer Engagement Profile by Segment",
        font=dict(size=20, family='Arial', color='black'),
        x=0.5
    ),
    legend=dict(title='Customer Segment', font=dict(size=12)),
    height=650,
    paper_bgcolor='white',
    plot_bgcolor='white'
)

fig.show()

In [31]:
customer_engagement_radar_df

Unnamed: 0,customer_segment,age_group,income_bracket,customer_count,avg_transactions_90d,avg_transaction_size,avg_unique_categories,avg_accounts,avg_total_balance,mobile_usage_pct,weekend_activity_pct,avg_service_contacts,avg_satisfaction_score,engagement_score,last_updated
0,Premium,45-54,Very High,597,0.1,1390.07,0.1,1.8,-83253.38,19.72,26.76,0.5,0.96,7.50,2025-12-27 10:20:32.986899+00:00
1,Mass Market,25-34,Very High,595,0.1,1493.07,0.1,1.8,-88537.14,18.92,20.27,0.5,1.00,7.79,2025-12-27 10:20:32.986899+00:00
2,Premium,25-34,Very High,592,0.1,1392.72,0.1,1.8,-63824.91,13.64,24.24,0.5,0.87,7.35,2025-12-27 10:20:32.986899+00:00
3,Affluent,25-34,Very High,587,0.1,1393.82,0.1,1.7,-85268.68,25.86,34.48,0.6,0.99,7.50,2025-12-27 10:20:32.986899+00:00
4,Affluent,35-44,Very High,583,0.1,1480.58,0.1,1.8,-61962.87,19.18,24.66,0.4,0.82,7.42,2025-12-27 10:20:32.986899+00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,Affluent,18-24,Low,10,0.1,598.60,0.1,2.0,-75693.98,0.00,0.00,0.5,1.00,8.34,2025-12-27 10:20:32.986899+00:00
92,Premium,65+,Low,8,0.1,2018.05,0.1,2.0,-32818.10,0.00,100.00,0.8,1.38,8.97,2025-12-27 10:20:32.986899+00:00
93,Affluent,65+,Low,8,0.3,610.65,0.3,1.5,-113691.69,0.00,50.00,0.5,1.75,7.94,2025-12-27 10:20:32.986899+00:00
94,Business,65+,Low,6,0.0,,0.0,2.2,-17107.58,,,0.0,0.00,7.22,2025-12-27 10:20:32.986899+00:00


## **TIME PATTERN ANALYSIS**

### Transaction Time Heatmap

In [32]:
transaction_time_heatmap_query = "SELECT * FROM gold.analytics_transaction_time_patterns"
transaction_time_heatmap_df = fetch_data(transaction_time_heatmap_query)

pivot = transaction_time_heatmap_df.pivot_table(
    values='transaction_count',
    index='transaction_hour',
    columns='day_of_week',
    aggfunc='sum'
)

# Day names
day_names = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
pivot.columns = day_names

fig = go.Figure(data=go.Heatmap(
    z=pivot.values,
    x=pivot.columns,
    y=pivot.index,
    colorscale='Viridis',
    text=pivot.values,
    texttemplate='%{text:,.0f}',
    textfont={"size": 9},
    hovertemplate='<b>%{x}</b><br>Hour: %{y}<br>Transactions: %{z:,}<extra></extra>',
    colorbar=dict(title="Transactions")
))

fig.update_layout(
    title="Transaction Patterns: Day of Week vs Hour of Day",
    xaxis_title="Day of Week",
    yaxis_title="Hour of Day",
    height=600
)

fig.show()

In [33]:
transaction_time_heatmap_df

Unnamed: 0,day_name,day_of_week,transaction_hour,channel,merchant_category,transaction_count,avg_amount,unique_customers,fraud_count,fraud_rate_pct,high_value_count,high_value_pct,international_count,international_pct,last_updated
0,Sunday,0.0,0.0,ATM,Entertainment,322,1591.93,318,0,0.00,0,0.0,161,50.00,2025-12-27 10:20:42.170486+00:00
1,Sunday,0.0,0.0,ATM,Gas Station,286,1630.16,281,0,0.00,0,0.0,148,51.75,2025-12-27 10:20:42.170486+00:00
2,Sunday,0.0,0.0,ATM,Grocery,302,1581.14,300,1,0.33,0,0.0,155,51.32,2025-12-27 10:20:42.170486+00:00
3,Sunday,0.0,0.0,ATM,Healthcare,326,1665.30,323,0,0.00,0,0.0,173,53.07,2025-12-27 10:20:42.170486+00:00
4,Sunday,0.0,0.0,ATM,Online Shopping,307,1640.37,305,0,0.00,0,0.0,146,47.56,2025-12-27 10:20:42.170486+00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
345,Saturday,6.0,0.0,POS,Restaurant,336,1610.32,334,0,0.00,0,0.0,164,48.81,2025-12-27 10:20:42.170486+00:00
346,Saturday,6.0,0.0,POS,Retail,313,1623.79,310,0,0.00,0,0.0,168,53.67,2025-12-27 10:20:42.170486+00:00
347,Saturday,6.0,0.0,POS,Services,320,1557.68,315,0,0.00,0,0.0,160,50.00,2025-12-27 10:20:42.170486+00:00
348,Saturday,6.0,0.0,POS,Travel,309,1583.00,305,1,0.32,0,0.0,142,45.95,2025-12-27 10:20:42.170486+00:00


### Hourly Channel Distribution

In [34]:
hourly_channel_distribution_query = """
    SELECT transaction_hour, channel, SUM(transaction_count) as total
    FROM gold.analytics_transaction_time_patterns
    GROUP BY transaction_hour, channel
    ORDER BY transaction_hour
"""
hourly_channel_distribution_df = fetch_data(hourly_channel_distribution_query)
hourly_channel_distribution_df['transaction_hour'] = hourly_channel_distribution_df['transaction_hour'].astype(int)

fig = px.area(
    hourly_channel_distribution_df,
    x='transaction_hour',
    y='total',
    color='channel',
    title='Channel Usage Distribution by Hour of Day',
    labels={'total': 'Transaction Count', 'transaction_hour': 'Hour of Day'},
    color_discrete_sequence=px.colors.qualitative.Set2  # optional: more distinct colors
)

fig.update_layout(
    height=500,
    xaxis=dict(dtick=1),  # show all hours on x-axis
    yaxis=dict(title='Transaction Count')
)

fig.show()

In [35]:
hourly_channel_distribution_df

Unnamed: 0,transaction_hour,channel,total
0,0,ATM,21911.0
1,0,Online,21692.0
2,0,Branch,21987.0
3,0,POS,21957.0
4,0,Mobile,22016.0


## **PRODUCT CROSS-SELL PATTERNS**

In [36]:
product_network_graph_query = "SELECT * FROM gold.analytics_product_cross_sell_patterns ORDER BY customer_count DESC"
product_network_graph_df = fetch_data(product_network_graph_query)

products = set(
    product_network_graph_df['product_1'].tolist() +
    product_network_graph_df['product_2'].tolist()
)

# Assign node positions along a circle
product_list = list(products)
node_ids = {prod: i for i, prod in enumerate(product_list)}

import numpy as np
theta = np.linspace(0, 2 * np.pi, len(product_list), endpoint=False)
node_x = np.cos(theta)
node_y = np.sin(theta)

# Build edges
edge_traces = []
max_weight = product_network_graph_df['customer_count'].max()

for _, row in product_network_graph_df.iterrows():
    i0 = node_ids[row['product_1']]
    i1 = node_ids[row['product_2']]

    edge_traces.append(
        go.Scatter(
            x=[node_x[i0], node_x[i1]],
            y=[node_y[i0], node_y[i1]],
            mode='lines',
            line=dict(
                width=1 + (row['customer_count'] / max_weight) * 6,
                color='rgba(150,150,150,0.5)'
            ),
            hoverinfo='text',
            text=f"{row['product_1']} ↔ {row['product_2']}<br>Customers: {row['customer_count']:,}",
            showlegend=False
        )
    )

# Node trace
node_trace = go.Scatter(
    x=node_x,
    y=node_y,
    mode='markers+text',
    marker=dict(
        size=22,
        color='skyblue',
        line=dict(width=1, color='DarkSlateGrey')
    ),
    text=product_list,
    textposition='top center',
    hoverinfo='text',
    showlegend=False
)

# Build figure
fig = go.Figure()

for trace in edge_traces:
    fig.add_trace(trace)

fig.add_trace(node_trace)

fig.update_layout(
    title=dict(
        text="Product Cross-Sell Network",
        x=0.5,
        font=dict(size=20)
    ),
    height=650,
    showlegend=False,
    hovermode='closest',
    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
    paper_bgcolor='white',
    plot_bgcolor='white'
)

fig.show()

In [37]:
product_network_graph_df

Unnamed: 0,product_1,category_1,product_2,category_2,customer_count,avg_combined_balance,avg_min_age_months,cross_sell_score,last_updated
0,Investment Account,INVESTMENT,Mortgage,LOAN,594,-20360.7,72.0,0.0,2025-12-27 10:20:37.696968+00:00
1,Auto Loan,LOAN,Investment Account,INVESTMENT,577,-14221.64,76.1,0.0,2025-12-27 10:20:37.696968+00:00
2,Credit Card,CREDIT,Investment Account,INVESTMENT,573,232579.06,73.9,3075.05,2025-12-27 10:20:37.696968+00:00
3,Auto Loan,LOAN,Credit Card,CREDIT,568,-264377.05,75.8,0.0,2025-12-27 10:20:37.696968+00:00
4,Investment Account,INVESTMENT,Personal Loan,LOAN,566,-5310.04,69.5,0.0,2025-12-27 10:20:37.696968+00:00
5,Auto Loan,LOAN,Mortgage,LOAN,559,-532178.8,72.0,0.0,2025-12-27 10:20:37.696968+00:00
6,Business Checking,DEPOSIT,Credit Card,CREDIT,559,40934.27,73.5,2578.16,2025-12-27 10:20:37.696968+00:00
7,Checking Account,DEPOSIT,Mortgage,LOAN,557,-199829.95,73.6,0.0,2025-12-27 10:20:37.696968+00:00
8,Investment Account,INVESTMENT,Savings Account,DEPOSIT,554,307639.67,78.8,3040.38,2025-12-27 10:20:37.696968+00:00
9,Auto Loan,LOAN,Premium Credit Card,CREDIT,552,-267890.32,72.0,0.0,2025-12-27 10:20:37.696968+00:00


### Product Affinity Matrix

In [38]:
product_affinity_matrix_query = "SELECT * FROM gold.analytics_product_cross_sell_patterns"
product_affinity_matrix_df = fetch_data(product_affinity_matrix_query)


products = sorted(
    set(product_affinity_matrix_df['product_1'].tolist() + product_affinity_matrix_df['product_2'].tolist()))
matrix = pd.DataFrame(0, index=products, columns=products)

for _, row in product_affinity_matrix_df.iterrows():
    matrix.loc[row['product_1'], row['product_2']] = row['customer_count']
    matrix.loc[row['product_2'], row['product_1']] = row['customer_count']

np.fill_diagonal(matrix.values, 0)
matrix_log = np.log1p(matrix)

hover_text = [
    [
        f"{r} × {c}<br>Customers: {matrix.loc[r, c]:,}"
        for c in matrix.columns
    ]
    for r in matrix.index
]

fig = go.Figure(data=go.Heatmap(
    z=matrix_log.values,
    x=matrix_log.columns,
    y=matrix_log.index,
    colorscale='Turbo',
    hovertext=hover_text,
    customdata=matrix.values,
    colorbar=dict(
        title=dict(
            text="Log(Customer Count)",
            side="right"
        )
    )

))

fig.update_layout(
    title=dict(
        text="Product Affinity Matrix (Cross-Sell Strength)",
        x=0.5,
        font=dict(size=20)
    ),
    height=750,
    xaxis=dict(
        title="Product B",
        tickangle=-45,
        tickfont=dict(size=10)
    ),
    yaxis=dict(
        title="Product A",
        tickfont=dict(size=10)
    ),
    paper_bgcolor='white',
    plot_bgcolor='white'
)

fig.show()

In [39]:
product_affinity_matrix_df

Unnamed: 0,product_1,category_1,product_2,category_2,customer_count,avg_combined_balance,avg_min_age_months,cross_sell_score,last_updated
0,Investment Account,INVESTMENT,Mortgage,LOAN,594,-20360.7,72.0,0.0,2025-12-27 10:20:37.696968+00:00
1,Auto Loan,LOAN,Investment Account,INVESTMENT,577,-14221.64,76.1,0.0,2025-12-27 10:20:37.696968+00:00
2,Credit Card,CREDIT,Investment Account,INVESTMENT,573,232579.06,73.9,3075.05,2025-12-27 10:20:37.696968+00:00
3,Auto Loan,LOAN,Credit Card,CREDIT,568,-264377.05,75.8,0.0,2025-12-27 10:20:37.696968+00:00
4,Investment Account,INVESTMENT,Personal Loan,LOAN,566,-5310.04,69.5,0.0,2025-12-27 10:20:37.696968+00:00
5,Auto Loan,LOAN,Mortgage,LOAN,559,-532178.8,72.0,0.0,2025-12-27 10:20:37.696968+00:00
6,Business Checking,DEPOSIT,Credit Card,CREDIT,559,40934.27,73.5,2578.16,2025-12-27 10:20:37.696968+00:00
7,Checking Account,DEPOSIT,Mortgage,LOAN,557,-199829.95,73.6,0.0,2025-12-27 10:20:37.696968+00:00
8,Investment Account,INVESTMENT,Savings Account,DEPOSIT,554,307639.67,78.8,3040.38,2025-12-27 10:20:37.696968+00:00
9,Auto Loan,LOAN,Premium Credit Card,CREDIT,552,-267890.32,72.0,0.0,2025-12-27 10:20:37.696968+00:00


## **MERCHANT SPENDING PATTERNS**

### Merchange Spending Sunburst

In [40]:
merchant_spending_sunburst_query = "SELECT * FROM gold.analytics_merchant_spending_patterns ORDER BY total_spend DESC"
merchant_spending_sunburst_df = fetch_data(merchant_spending_sunburst_query)


fig = px.sunburst(
    merchant_spending_sunburst_df,
    path=['category_group', 'category', 'customer_segment'],
    values='total_spend',
    color='avg_transaction_amount',
    hover_data={
        'transaction_count': ':,',
        'total_spend': ':$,.2f',
        'avg_transaction_amount': ':$,.2f',
        'unique_customers': ':,'
    },
    title='Merchant Spending Patterns: Category → Segment',
    color_continuous_scale='RdYlGn',
    labels={'total_spend': 'Total Spend ($)'}
)

fig.update_layout(height=700)
fig.show()

In [41]:
merchant_spending_sunburst_df

Unnamed: 0,category_group,category,region,customer_segment,age_group,transaction_count,unique_customers,unique_merchants,total_spend,avg_transaction_amount,transactions_per_customer,recurring_pct,last_updated
0,Professional Services,HEALTHCARE,Midwest,Affluent,35-44,147,137,135,275083.59,1871.32,1.07,6.12,2025-12-27 10:20:37.056329+00:00
1,Professional Services,HEALTHCARE,Southeast,Premium,55-64,144,133,123,258866.12,1797.68,1.08,9.03,2025-12-27 10:20:37.056329+00:00
2,Retail,RETAIL,Midwest,Affluent,55-64,150,137,134,251776.64,1678.51,1.09,8.00,2025-12-27 10:20:37.056329+00:00
3,Retail,ONLINE SHOPPING,Midwest,Premium,45-54,136,123,122,249279.33,1832.94,1.11,5.88,2025-12-27 10:20:37.056329+00:00
4,Essential Services,RESTAURANT,Midwest,Premium,55-64,151,137,131,243890.64,1615.17,1.10,6.62,2025-12-27 10:20:37.056329+00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,Retail,RETAIL,Midwest,Business,25-34,115,109,103,183866.82,1598.84,1.06,6.09,2025-12-27 10:20:37.056329+00:00
196,Essential Services,GROCERY,Southeast,Affluent,45-54,116,110,104,183834.93,1584.78,1.05,4.31,2025-12-27 10:20:37.056329+00:00
197,Lifestyle,ENTERTAINMENT,Midwest,Mass Market,55-64,119,115,107,183810.68,1544.63,1.03,9.24,2025-12-27 10:20:37.056329+00:00
198,Lifestyle,ENTERTAINMENT,Southeast,Premium,45-54,115,108,102,183439.87,1595.13,1.06,6.96,2025-12-27 10:20:37.056329+00:00


### Merchange Category Treemap

In [42]:
merchant_category_treemap_query = "SELECT * FROM gold.analytics_merchant_spending_patterns ORDER BY total_spend DESC LIMIT 40"
merchant_category_treemap_df = fetch_data(merchant_category_treemap_query)

fig = px.treemap(
    merchant_category_treemap_df,
    path=['region', 'category_group', 'category'],
    values='total_spend',
    color='transactions_per_customer',
    hover_data={
        'total_spend': ':$,.2f',
        'transaction_count': ':,',
        'transactions_per_customer': ':.2f',
        'unique_customers': ':,'
    },
    title='Merchant Spending Treemap: Region → Category',
    color_continuous_scale='Blues',
    labels={'total_spend': 'Total Spend',
            'transactions_per_customer': 'Trans/Customer'}
)

fig.update_layout(height=700)
fig.show()

In [43]:
merchant_category_treemap_df

Unnamed: 0,category_group,category,region,customer_segment,age_group,transaction_count,unique_customers,unique_merchants,total_spend,avg_transaction_amount,transactions_per_customer,recurring_pct,last_updated
0,Professional Services,HEALTHCARE,Midwest,Affluent,35-44,147,137,135,275083.59,1871.32,1.07,6.12,2025-12-27 10:20:37.056329+00:00
1,Professional Services,HEALTHCARE,Southeast,Premium,55-64,144,133,123,258866.12,1797.68,1.08,9.03,2025-12-27 10:20:37.056329+00:00
2,Retail,RETAIL,Midwest,Affluent,55-64,150,137,134,251776.64,1678.51,1.09,8.0,2025-12-27 10:20:37.056329+00:00
3,Retail,ONLINE SHOPPING,Midwest,Premium,45-54,136,123,122,249279.33,1832.94,1.11,5.88,2025-12-27 10:20:37.056329+00:00
4,Essential Services,RESTAURANT,Midwest,Premium,55-64,151,137,131,243890.64,1615.17,1.1,6.62,2025-12-27 10:20:37.056329+00:00
5,Lifestyle,ENTERTAINMENT,Midwest,Business,25-34,141,126,121,242672.33,1721.08,1.12,7.09,2025-12-27 10:20:37.056329+00:00
6,Professional Services,HEALTHCARE,Midwest,Mass Market,25-34,139,130,118,239007.19,1719.48,1.07,5.04,2025-12-27 10:20:37.056329+00:00
7,Retail,RETAIL,Midwest,Affluent,35-44,143,122,128,237167.34,1658.51,1.17,6.29,2025-12-27 10:20:37.056329+00:00
8,Essential Services,RESTAURANT,Midwest,Mass Market,35-44,152,134,125,237159.24,1560.26,1.13,11.18,2025-12-27 10:20:37.056329+00:00
9,Lifestyle,ENTERTAINMENT,Midwest,Business,45-54,141,129,125,236637.35,1678.28,1.09,3.55,2025-12-27 10:20:37.056329+00:00


## **CREDIT SCORE CORRELATION**

### Credit Score Behavior Scatter Matrix

In [44]:
credit_score_behavior_scatter_matrix_query = "SELECT * FROM gold.analytics_credit_score_behavior_correlation WHERE customer_count >= 20"
credit_score_behavior_scatter_matrix_df = fetch_data(credit_score_behavior_scatter_matrix_query)

dimensions = [
    'avg_credit_score',
    'avg_transactions',
    'avg_transaction_size',
    'avg_credit_utilization_pct',
    'avg_total_balance'
]

labels = {
    'avg_credit_score': 'Credit Score',
    'avg_transactions': 'Avg Transactions',
    'avg_transaction_size': 'Avg Transaction Size ($)',
    'avg_credit_utilization_pct': 'Credit Utilization (%)',
    'avg_total_balance': 'Avg Total Balance ($)'
}

fig = px.scatter_matrix(
    credit_score_behavior_scatter_matrix_df,
    dimensions=dimensions,
    color='credit_score_band',
    labels=labels,
    title='Credit Score vs Customer Behavior — Correlation Matrix',
    color_discrete_sequence=px.colors.qualitative.Bold,
    opacity=0.65
)

# Improve marker appearance
fig.update_traces(
    marker=dict(
        size=6,
        line=dict(width=0.3, color='DarkSlateGrey')
    ),
    diagonal_visible=False
)

# Layout polish
fig.update_layout(
    height=850,
    title=dict(
        x=0.5,
        font=dict(size=20)
    ),
    paper_bgcolor='white',
    plot_bgcolor='white',
    legend=dict(
        title='Credit Score Band',
        font=dict(size=12)
    )
)

fig.show()

In [45]:
credit_score_behavior_scatter_matrix_df

Unnamed: 0,credit_score_band,income_bracket,customer_segment,customer_count,avg_credit_score,avg_annual_income,avg_transactions,avg_transaction_size,avg_volatility,avg_credit_utilization_pct,avg_total_balance,avg_past_due_accounts,avg_late_payments,avg_missed_payments,avg_tenure_months,last_updated
0,Very Good,High,Premium,85,772.0,114282.72,0.1,1156.49,277.99,84.68,-110832.5,0.0,0.25,0.13,90.9,2025-12-27 10:20:32.840770+00:00
1,Very Good,Very High,Mass Market,351,771.0,334807.28,0.1,1516.49,94.2,61.47,-81930.04,0.0,0.34,0.07,93.4,2025-12-27 10:20:32.840770+00:00
2,Very Good,Medium,Mass Market,36,770.0,49005.06,0.1,1458.27,0.0,105.47,-47932.82,0.0,0.58,0.14,87.4,2025-12-27 10:20:32.840770+00:00
3,Very Good,Medium,Premium,43,770.0,53413.58,0.2,1126.45,0.0,37.38,-42255.41,0.0,0.21,0.16,97.1,2025-12-27 10:20:32.840770+00:00
4,Very Good,Very High,Affluent,350,770.0,324321.26,0.1,1307.63,8.33,69.48,-100335.05,0.0,0.24,0.09,94.4,2025-12-27 10:20:32.840770+00:00
5,Very Good,Very High,Business,362,769.0,320449.17,0.1,1630.25,35.49,65.71,-90981.93,0.0,0.31,0.12,92.9,2025-12-27 10:20:32.840770+00:00
6,Very Good,Very High,Premium,321,769.0,319209.27,0.1,1524.93,98.91,53.2,-128343.41,0.0,0.36,0.1,93.6,2025-12-27 10:20:32.840770+00:00
7,Very Good,Medium,Affluent,52,768.0,54239.19,0.1,1140.45,0.0,91.49,-67899.98,0.0,0.21,0.08,93.3,2025-12-27 10:20:32.840770+00:00
8,Very Good,High,Business,66,768.0,109798.79,0.0,1418.84,0.0,69.53,-70481.51,0.0,0.21,0.08,98.5,2025-12-27 10:20:32.840770+00:00
9,Very Good,High,Mass Market,80,767.0,116150.51,0.1,902.22,0.0,77.96,-55508.46,0.0,0.25,0.15,86.5,2025-12-27 10:20:32.840770+00:00


### Credit Score Risk Bubble

In [46]:
credit_score_risk_bubble_query = "SELECT * FROM gold.analytics_credit_score_behavior_correlation"
credit_score_risk_bubble_df = fetch_data(credit_score_risk_bubble_query)


fig = px.scatter(
    credit_score_risk_bubble_df,
    x='avg_credit_score',
    y='avg_past_due_accounts',
    size='customer_count',
    color='income_bracket',
    hover_name='credit_score_band',
    hover_data={
        'customer_count': ':,',
        'avg_credit_score': ':.0f',
        'avg_past_due_accounts': ':.2f',
        'avg_late_payments': ':.2f',
        'avg_credit_utilization_pct': ':.1f'
    },
    title='Credit Score vs Risk Indicators',
    labels={
        'avg_credit_score': 'Average Credit Score',
        'avg_past_due_accounts': 'Avg Past Due Accounts'
    },
    color_discrete_sequence=px.colors.qualitative.Vivid
)

fig.update_layout(height=600)
fig.show()

In [47]:
credit_score_risk_bubble_df

Unnamed: 0,credit_score_band,income_bracket,customer_segment,customer_count,avg_credit_score,avg_annual_income,avg_transactions,avg_transaction_size,avg_volatility,avg_credit_utilization_pct,avg_total_balance,avg_past_due_accounts,avg_late_payments,avg_missed_payments,avg_tenure_months,last_updated
0,Excellent,Low,Premium,1,800.0,20630.00,0.0,,,,-665127.20,0.0,0.00,0.00,6.0,2025-12-27 10:20:32.840770+00:00
1,Excellent,Very High,Premium,3,800.0,350363.00,0.0,,,20.21,59917.19,0.0,0.00,0.00,116.7,2025-12-27 10:20:32.840770+00:00
2,Excellent,Low,Mass Market,1,800.0,24957.00,1.0,1285.06,0.00,,-479046.96,0.0,0.00,0.00,11.0,2025-12-27 10:20:32.840770+00:00
3,Excellent,Very High,Mass Market,2,800.0,340396.00,0.5,1632.10,0.00,,-112959.11,0.0,0.00,0.00,153.0,2025-12-27 10:20:32.840770+00:00
4,Excellent,Medium,Mass Market,1,800.0,31064.00,0.0,,,,403878.85,0.0,0.00,0.00,136.0,2025-12-27 10:20:32.840770+00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69,Poor,Medium,Affluent,208,434.0,52891.80,0.1,1304.59,2.51,81.28,-138755.94,0.0,0.35,0.14,90.5,2025-12-27 10:20:32.840770+00:00
70,Poor,High,Premium,336,434.0,112337.95,0.1,1902.02,56.42,67.80,-117217.17,0.0,0.31,0.13,95.6,2025-12-27 10:20:32.840770+00:00
71,Poor,Low,Mass Market,40,427.0,24928.45,0.1,2296.74,0.00,86.59,-95238.28,0.0,0.20,0.10,91.9,2025-12-27 10:20:32.840770+00:00
72,Poor,Low,Affluent,41,420.0,25543.12,0.2,1346.17,255.70,70.82,-91754.80,0.0,0.49,0.07,84.1,2025-12-27 10:20:32.840770+00:00


## **GEOGRAPHIC CLUSTERING**

### Geographic Bubble Map

In [48]:
geographic_bubble_map_query = "SELECT * FROM gold.analytics_geographic_clustering ORDER BY customer_count DESC LIMIT 50"
geographic_bubble_map_df = fetch_data(geographic_bubble_map_query)

fig = px.scatter_geo(
    geographic_bubble_map_df,
    locations='state',
    locationmode='USA-states',
    size='customer_count',
    color='avg_clv',
    hover_name='city',
    hover_data={
        'customer_count': ':,',
        'avg_clv': ':$,.2f',
        'avg_credit_score': ':.0f',
        'avg_annual_income': ':$,.2f',
        'total_volume_90d': ':$,.2f'
    },
    scope='usa',
    title='Customer Geographic Distribution (Top 50 Cities)',
    color_continuous_scale='Plasma',
    size_max=30,
    labels={'avg_clv': 'Avg CLV', 'customer_count': 'Customers'}
)

fig.update_layout(height=600)
fig.show()

In [49]:
geographic_bubble_map_df

Unnamed: 0,state,city,customer_count,avg_credit_score,avg_annual_income,avg_clv,premium_pct,affluent_pct,total_transactions_90d,total_volume_90d,avg_transaction_amount,total_accounts,total_balance,avg_balance_per_account,avg_churn_risk_pct,last_updated
0,TX,HOUSTON,63,563.0,280437.73,46822.35,26.98,34.92,10,12094.29,1209.43,124,-3704272.0,-29873.16,52.37,2025-12-27 10:20:36.387283+00:00
1,NY,NEW YORK,47,552.0,260094.89,54274.85,21.28,17.02,6,8241.53,1373.59,77,-1330450.4,-17278.58,57.04,2025-12-27 10:20:36.387283+00:00
2,CA,LOS ANGELES,40,503.0,282021.75,53162.83,30.0,20.0,6,11273.05,1878.84,80,-3766735.3,-47084.19,44.18,2025-12-27 10:20:36.387283+00:00
3,TX,SAN ANTONIO,38,529.0,272815.68,47711.53,34.21,13.16,2,6517.94,3258.97,61,-1713580.96,-28091.49,50.32,2025-12-27 10:20:36.387283+00:00
4,MN,MINNEAPOLIS,37,592.0,241249.76,47694.95,37.84,16.22,9,14446.1,1605.12,75,-2195992.87,-29279.9,53.38,2025-12-27 10:20:36.387283+00:00
5,DC,WASHINGTON,36,562.0,248774.89,61292.78,30.56,27.78,4,5570.07,1392.52,68,-4340533.56,-63831.38,47.53,2025-12-27 10:20:36.387283+00:00
6,PA,PHILADELPHIA,36,519.0,243579.39,44858.67,36.11,22.22,6,12686.36,2114.39,70,-2493279.93,-35618.28,44.78,2025-12-27 10:20:36.387283+00:00
7,TX,DALLAS,35,555.0,262000.97,45938.37,20.0,40.0,3,5521.9,1840.63,75,-5641553.78,-75220.72,50.71,2025-12-27 10:20:36.387283+00:00
8,CO,DENVER,34,517.0,253108.59,42044.09,32.35,29.41,5,9252.33,1850.47,71,-3609086.93,-50832.21,47.65,2025-12-27 10:20:36.387283+00:00
9,PA,PITTSBURGH,33,556.0,263273.97,53597.36,30.3,21.21,5,8440.46,1688.09,48,-2628184.21,-54753.84,46.39,2025-12-27 10:20:36.387283+00:00


### Geographic Metrics Heatmap

In [50]:
geographic_metrics_heatmap_query = "SELECT * FROM gold.analytics_geographic_clustering ORDER BY customer_count DESC LIMIT 20"
geographic_metrics_heatmap_df = fetch_data(geographic_metrics_heatmap_query)


geographic_metrics_heatmap_df['location'] = geographic_metrics_heatmap_df['city'] + ', ' + geographic_metrics_heatmap_df['state']

metrics = ['avg_credit_score', 'avg_annual_income', 'avg_clv',
            'premium_pct', 'avg_churn_risk_pct']

z_data = []
for metric in metrics:
    z_data.append(geographic_metrics_heatmap_df[metric].values)

y_labels = [m.replace('avg_', '').replace('_', ' ').title() for m in metrics]

fig = go.Figure(data=go.Heatmap(
    z=z_data,
    x=geographic_metrics_heatmap_df['location'],
    y=y_labels,
    colorscale='RdYlGn',
    text=z_data,
    texttemplate='%{text:.0f}',
    textfont={"size": 8},
    hovertemplate='<b>%{y}</b><br>Location: %{x}<br>Value: %{z:.2f}<extra></extra>'
))

fig.update_layout(
    title="Geographic Performance Metrics (Top 20 Cities)",
    height=600,
    xaxis_title="Location",
    yaxis_title="Metric"
)

fig.show()

In [51]:
geographic_metrics_heatmap_df

Unnamed: 0,state,city,customer_count,avg_credit_score,avg_annual_income,avg_clv,premium_pct,affluent_pct,total_transactions_90d,total_volume_90d,avg_transaction_amount,total_accounts,total_balance,avg_balance_per_account,avg_churn_risk_pct,last_updated,location
0,TX,HOUSTON,63,563.0,280437.73,46822.35,26.98,34.92,10,12094.29,1209.43,124,-3704272.0,-29873.16,52.37,2025-12-27 10:20:36.387283+00:00,"HOUSTON, TX"
1,NY,NEW YORK,47,552.0,260094.89,54274.85,21.28,17.02,6,8241.53,1373.59,77,-1330450.4,-17278.58,57.04,2025-12-27 10:20:36.387283+00:00,"NEW YORK, NY"
2,CA,LOS ANGELES,40,503.0,282021.75,53162.83,30.0,20.0,6,11273.05,1878.84,80,-3766735.3,-47084.19,44.18,2025-12-27 10:20:36.387283+00:00,"LOS ANGELES, CA"
3,TX,SAN ANTONIO,38,529.0,272815.68,47711.53,34.21,13.16,2,6517.94,3258.97,61,-1713580.96,-28091.49,50.32,2025-12-27 10:20:36.387283+00:00,"SAN ANTONIO, TX"
4,MN,MINNEAPOLIS,37,592.0,241249.76,47694.95,37.84,16.22,9,14446.1,1605.12,75,-2195992.87,-29279.9,53.38,2025-12-27 10:20:36.387283+00:00,"MINNEAPOLIS, MN"
5,PA,PHILADELPHIA,36,519.0,243579.39,44858.67,36.11,22.22,6,12686.36,2114.39,70,-2493279.93,-35618.28,44.78,2025-12-27 10:20:36.387283+00:00,"PHILADELPHIA, PA"
6,DC,WASHINGTON,36,562.0,248774.89,61292.78,30.56,27.78,4,5570.07,1392.52,68,-4340533.56,-63831.38,47.53,2025-12-27 10:20:36.387283+00:00,"WASHINGTON, DC"
7,TX,DALLAS,35,555.0,262000.97,45938.37,20.0,40.0,3,5521.9,1840.63,75,-5641553.78,-75220.72,50.71,2025-12-27 10:20:36.387283+00:00,"DALLAS, TX"
8,CO,DENVER,34,517.0,253108.59,42044.09,32.35,29.41,5,9252.33,1850.47,71,-3609086.93,-50832.21,47.65,2025-12-27 10:20:36.387283+00:00,"DENVER, CO"
9,PA,PITTSBURGH,33,556.0,263273.97,53597.36,30.3,21.21,5,8440.46,1688.09,48,-2628184.21,-54753.84,46.39,2025-12-27 10:20:36.387283+00:00,"PITTSBURGH, PA"


## **SEASONAL PATTERNS**

### Seasonal Decomposition

In [52]:
seasonal_decomposition_query = "SELECT * FROM gold.analytics_seasonal_patterns ORDER BY year, month, day_of_week"
seasonal_decomposition_df = fetch_data(seasonal_decomposition_query)

monthly = seasonal_decomposition_df.groupby(['month', 'merchant_category']).agg({
    'transaction_count': 'sum',
    'total_volume': 'sum'
}).reset_index()

# Get top categories
top_categories = monthly.groupby('merchant_category')['total_volume'].sum().nlargest(5).index
monthly = monthly[monthly['merchant_category'].isin(top_categories)]

fig = px.line(
    monthly,
    x='month',
    y='total_volume',
    color='merchant_category',
    title='Seasonal Spending Patterns by Category (Top 5)',
    labels={'month': 'Month', 'total_volume': 'Total Volume ($)'},
    markers=True
)

fig.update_layout(
    height=500,
    xaxis=dict(tickmode='linear', tick0=1, dtick=1)
)
fig.show()


In [53]:
seasonal_decomposition_df

Unnamed: 0,month,month_name,quarter,day_of_week,day_name,merchant_category,transaction_count,total_volume,avg_amount,unique_customers,year,season,last_updated
0,12.0,December,4.0,0.0,Sunday,Gas Station,4,6576.23,1644.06,4,2023.0,Winter,2025-12-27 10:20:39.734953+00:00
1,12.0,December,4.0,0.0,Sunday,Entertainment,2,675.67,337.84,2,2023.0,Winter,2025-12-27 10:20:39.734953+00:00
2,12.0,December,4.0,0.0,Sunday,Utilities,2,1205.11,602.56,2,2023.0,Winter,2025-12-27 10:20:39.734953+00:00
3,12.0,December,4.0,0.0,Sunday,Services,5,5260.61,1052.12,5,2023.0,Winter,2025-12-27 10:20:39.734953+00:00
4,12.0,December,4.0,0.0,Sunday,Retail,2,4894.95,2447.48,2,2023.0,Winter,2025-12-27 10:20:39.734953+00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1708,12.0,December,4.0,6.0,Saturday,Restaurant,11,23946.41,2176.95,10,2025.0,Winter,2025-12-27 10:20:39.734953+00:00
1709,12.0,December,4.0,6.0,Saturday,Online Shopping,6,13057.67,2176.28,6,2025.0,Winter,2025-12-27 10:20:39.734953+00:00
1710,12.0,December,4.0,6.0,Saturday,Healthcare,9,18138.54,2015.39,9,2025.0,Winter,2025-12-27 10:20:39.734953+00:00
1711,12.0,December,4.0,6.0,Saturday,Grocery,10,14941.25,1494.13,10,2025.0,Winter,2025-12-27 10:20:39.734953+00:00
