# Plotly Express Visualization Cheat Sheet

🔹 Distribution Plots
- histogram: Frequency distribution → px.histogram()
- density: Probability density estimation → px.histogram(histnorm='density')
- rug: Individual observations → px.histogram(marginal='rug')

🔹 Relational Plots
- scatter: Relationship between variables → px.scatter()
- line: Trends over time → px.line()

🔹 Categorical Plots
- box: Distribution + outliers → px.box()
- violin: Distribution shape + density → px.violin()
- bar: Mean comparisons → px.bar()
- count: Category frequency → px.bar(value_counts)

🔹 Matrix Plots
- heatmap: Correlation/tabular data → px.imshow()

🔹 Regression Plots
- scatter + trendline: Linear regression → px.scatter(trendline='ols')

🔹 Interactive Features
- hover_data: Show extra info on hover → hover_data=df.columns
- marginal: Add rug, box, or violin to axes → marginal='box' or 'violin'
- animation_frame: Animate over time → animation_frame='year'
- facet_row / facet_col: Subplots by category → facet_row='group', facet_col='type'
"""


In [2]:
"""
Plotly Express Visualization Toolkit

Includes:
- Interactive distribution, relational, categorical, and regression plots
- Dummy dataset generator for testing
- Reproducible structure for dashboards and notebooks
"""

import plotly.express as px
import pandas as pd
import numpy as np

# -------------------------------
# 🔧 Dummy Data Generator
# -------------------------------
def generate_dummy_data(n=500, seed=42):
    np.random.seed(seed)
    df = pd.DataFrame({
        'AcceptedCoupon': np.random.binomial(1, 0.4, n),
        'Age': np.random.normal(40, 10, n).astype(int),
        'Income': np.random.normal(60000, 15000, n).astype(int),
        'MaritalStatus': np.random.choice(['Single', 'Married', 'Divorced'], n),
        'PassengerType': np.random.choice(['Business', 'Personal', 'Family'], n),
        'TripDuration': np.random.exponential(scale=3, size=n),
        'CouponType': np.random.choice(['Restaurant', 'Grocery', 'Retail'], n)
    })
    return df

# -------------------------------
# 📊 Plotting Functions
# -------------------------------
def plot_histogram(df, column, nbins=30, title=None):
    fig = px.histogram(df, x=column, nbins=nbins, title=title or f'Distribution of {column}')
    fig.update_layout(bargap=0.1)
    fig.show()

def plot_density(df, column, title=None):
    fig = px.histogram(df, x=column, marginal='rug', histnorm='density', title=title or f'Density of {column}')
    fig.show()

def plot_scatter(df, x, y, color=None, title=None):
    fig = px.scatter(df, x=x, y=y, color=color, title=title or f'{y} vs {x}', hover_data=df.columns)
    fig.show()

def plot_box(df, x, y, color=None, title=None):
    fig = px.box(df, x=x, y=y, color=color, title=title or f'{y} Distribution by {x}')
    fig.show()

def plot_violin(df, x, y, color=None, title=None):
    fig = px.violin(df, x=x, y=y, color=color, box=True, points='all', title=title or f'{y} Density by {x}')
    fig.show()

def plot_bar(df, x, y, title=None):
    agg_df = df.groupby(x)[y].mean().reset_index()
    fig = px.bar(agg_df, x=x, y=y, title=title or f'Mean {y} by {x}')
    fig.show()

def plot_count(df, x, title=None):
    count_df = df[x].value_counts().reset_index()
    count_df.columns = [x, 'Count']
    fig = px.bar(count_df, x=x, y='Count', title=title or f'Count of {x}')
    fig.show()

def plot_correlation_heatmap(df, title='Correlation Heatmap'):
    corr = df.corr(numeric_only=True)
    fig = px.imshow(corr, text_auto=True, color_continuous_scale='RdBu_r', title=title)
    fig.show()

def plot_regression(df, x, y, color=None, title=None):
    fig = px.scatter(df, x=x, y=y, color=color, trendline='ols', title=title or f'Regression: {y} vs {x}')
    fig.show()


In [3]:
if __name__ == "__main__":
    df = generate_dummy_data()

    print("🔹 Histogram: Age Distribution")
    plot_histogram(df, column='Age')

    print("🔹 Density Plot: Income")
    plot_density(df, column='Income')

    print("🔹 Scatter Plot: Income vs Age by Marital Status")
    plot_scatter(df, x='Age', y='Income', color='MaritalStatus')

    print("🔹 Boxplot: Income by Marital Status")
    plot_box(df, x='MaritalStatus', y='Income', color='MaritalStatus')

    print("🔹 Violin Plot: Trip Duration by Passenger Type")
    plot_violin(df, x='PassengerType', y='TripDuration', color='PassengerType')

    print("🔹 Bar Plot: Mean Coupon Acceptance by Type")
    plot_bar(df, x='CouponType', y='AcceptedCoupon')

    print("🔹 Count Plot: Passenger Type Frequency")
    plot_count(df, x='PassengerType')

    print("🔹 Heatmap: Correlation Matrix")
    plot_correlation_heatmap(df)

    print("🔹 Regression Plot: Income vs Age by Marital Status")
    plot_regression(df, x='Age', y='Income', color='MaritalStatus')



🔹 Histogram: Age Distribution


🔹 Density Plot: Income


🔹 Scatter Plot: Income vs Age by Marital Status


🔹 Boxplot: Income by Marital Status


🔹 Violin Plot: Trip Duration by Passenger Type


🔹 Bar Plot: Mean Coupon Acceptance by Type


🔹 Count Plot: Passenger Type Frequency


🔹 Heatmap: Correlation Matrix


🔹 Regression Plot: Income vs Age by Marital Status
