In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import altair as alt
import time
from IPython.display import HTML
import seaborn as sns

In [None]:
# Create a sample dataset
np.random.seed(42)
n = 1000
categories = ['Group A', 'Group B', 'Group C', 'Group D']
data = pd.DataFrame({
    'x': np.random.normal(0, 1, n),
    'y': np.random.normal(0, 1, n),
    'category': np.random.choice(categories, n),
    'size': np.random.uniform(10, 100, n),
    'metric': np.random.uniform(0, 100, n)
})

# Add a correlation between x and y for one category
mask = data['category'] == 'Group A'
data.loc[mask, 'y'] = data.loc[mask, 'x'] * 0.8 + np.random.normal(0, 0.5, mask.sum())

In [None]:
# Performance metrics
performance = {
    'library': [],
    'task': [],
    'time': [],
    'code_lines': []
}

# Function to measure performance
def measure_task(func, task_name, library_name, code_lines):
    start_time = time.time()
    result = func()
    end_time = time.time()
    performance['library'].append(library_name)
    performance['task'].append(task_name)
    performance['time'].append(end_time - start_time)
    performance['code_lines'].append(code_lines)
    return result

# 1. SCATTER PLOTS

In [None]:
print("Creating scatter plots...")

# Matplotlib scatter plot
def matplotlib_scatter():
    fig, ax = plt.subplots(figsize=(10, 6))
    
    for category, group in data.groupby('category'):
        ax.scatter(group['x'], group['y'], 
                   s=group['size']/3,  # Scale down the size for better visibility
                   alpha=0.6, 
                   label=category)
    
    ax.set_title('Scatter Plot with Matplotlib', fontsize=15)
    ax.set_xlabel('X Value', fontsize=12)
    ax.set_ylabel('Y Value', fontsize=12)
    ax.legend()
    ax.grid(alpha=0.3)
    
    plt.tight_layout()
    return fig

mpl_scatter = measure_task(matplotlib_scatter, 'Scatter Plot', 'Matplotlib', 14)

In [None]:
# Plotly scatter plot
def plotly_scatter():
    fig = px.scatter(data, x='x', y='y', 
                     color='category', 
                     size='size', 
                     opacity=0.6,
                     hover_data=['metric'],
                     title='Scatter Plot with Plotly',
                     width=800, height=600)
    
    fig.update_layout(
        xaxis_title='X Value',
        yaxis_title='Y Value',
        legend_title='Category',
        font=dict(size=12)
    )
    
    return fig

plotly_scatter = measure_task(plotly_scatter, 'Scatter Plot', 'Plotly', 12)
plotly_scatter

In [None]:
# Altair scatter plot
def altair_scatter():
    chart = alt.Chart(data).mark_circle(opacity=0.6).encode(
        x=alt.X('x', title='X Value'),
        y=alt.Y('y', title='Y Value'),
        color='category',
        size=alt.Size('size', scale=alt.Scale(range=[10, 300])),
        tooltip=['x', 'y', 'category', 'metric']
    ).properties(
        title='Scatter Plot with Altair',
        width=600,
        height=400
    ).interactive()
    
    return chart

altair_scatter = measure_task(altair_scatter, 'Scatter Plot', 'Altair', 12)
altair_scatter

# 2. HISTOGRAMS

In [None]:
print("Creating histograms...")

# Matplotlib histogram
def matplotlib_hist():
    fig, ax = plt.subplots(figsize=(10, 6))
    
    for category, group in data.groupby('category'):
        ax.hist(group['metric'], bins=20, alpha=0.5, label=category)
    
    ax.set_title('Histogram with Matplotlib', fontsize=15)
    ax.set_xlabel('Metric', fontsize=12)
    ax.set_ylabel('Frequency', fontsize=12)
    ax.legend()
    ax.grid(alpha=0.3)
    
    plt.tight_layout()
    return fig
mpl_hist = measure_task(matplotlib_hist, 'Histogram', 'Matplotlib', 13)

In [None]:
# Plotly histogram
def plotly_hist():
    fig = px.histogram(data, x='metric', color='category', 
                       barmode='overlay', opacity=0.7,
                       nbins=20,
                       width=800, height=600,
                       title='Histogram with Plotly')
    
    fig.update_layout(
        xaxis_title='Metric',
        yaxis_title='Frequency',
        legend_title='Category',
        font=dict(size=12)
    )
    
    return fig

plotly_hist = measure_task(plotly_hist, 'Histogram', 'Plotly', 11)
plotly_hist

In [None]:
# Altair histogram
def altair_hist():
    chart = alt.Chart(data).mark_bar(opacity=0.7).encode(
        alt.X('metric', bin=alt.Bin(maxbins=20), title='Metric'),
        alt.Y('count()', title='Frequency'),
        color='category',
        tooltip=['category', 'count()']
    ).properties(
        title='Histogram with Altair',
        width=600,
        height=400
    ).interactive()
    
    return chart

altair_hist = measure_task(altair_hist, 'Histogram', 'Altair', 11)
altair_hist

# 3. HEATMAPS

In [None]:
print("Creating heatmaps...")

# Create correlation data
corr_data = data.pivot_table(
    index='category', 
    columns=pd.cut(data['x'], bins=5), 
    values='metric', 
    aggfunc='mean'
).fillna(0)

# Matplotlib heatmap
def matplotlib_heatmap():
    fig, ax = plt.subplots(figsize=(10, 6))
    
    im = ax.imshow(corr_data, cmap='viridis')
    
    # Add colorbar
    cbar = ax.figure.colorbar(im, ax=ax)
    cbar.ax.set_ylabel('Mean Metric', rotation=-90, va="bottom")
    
    # Show all ticks and label them
    ax.set_xticks(np.arange(len(corr_data.columns)))
    ax.set_yticks(np.arange(len(corr_data.index)))
    ax.set_xticklabels([str(col) for col in corr_data.columns])
    ax.set_yticklabels(corr_data.index)
    
    # Rotate the tick labels and set their alignment
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")
    
    # Loop over data dimensions and create text annotations
    for i in range(len(corr_data.index)):
        for j in range(len(corr_data.columns)):
            ax.text(j, i, f"{corr_data.iloc[i, j]:.1f}",
                    ha="center", va="center", color="white" if corr_data.iloc[i, j] > 50 else "black")
    
    ax.set_title("Heatmap with Matplotlib")
    fig.tight_layout()
    return fig

mpl_heatmap = measure_task(matplotlib_heatmap, 'Heatmap', 'Matplotlib', 24)

In [None]:
# Plotly heatmap
def plotly_heatmap():
    fig = go.Figure(data=go.Heatmap(
        z=corr_data.values,
        x=[str(col) for col in corr_data.columns],
        y=corr_data.index,
        colorscale='Viridis',
        text=[[f"{val:.1f}" for val in row] for row in corr_data.values],
        hoverinfo='text'
    ))
    
    fig.update_layout(
        title='Heatmap with Plotly',
        xaxis_title='X Range',
        yaxis_title='Category',
        font=dict(size=12),
        width=800, height=600,
    )
    
    return fig

plotly_heatmap = measure_task(plotly_heatmap, 'Heatmap', 'Plotly', 16)
plotly_heatmap

In [None]:
# Altair heatmap
def altair_heatmap():
    # Convert to long format for Altair
    corr_long = corr_data.reset_index().melt(id_vars='category', var_name='x_range', value_name='mean_metric')
    
    chart = alt.Chart(corr_long).mark_rect().encode(
        x=alt.X('x_range:O', title='X Range'),
        y=alt.Y('category:O', title='Category'),
        color=alt.Color('mean_metric:Q', scale=alt.Scale(scheme='viridis')),
        tooltip=['category', 'x_range', 'mean_metric']
    ).properties(
        title='Heatmap with Altair',
        width=600,
        height=400
    ).interactive()
    
    # Add text labels
    text = alt.Chart(corr_long).mark_text(color='white').encode(
        x=alt.X('x_range:O'),
        y=alt.Y('category:O'),
        text=alt.Text('mean_metric:Q', format='.1f'),
        color=alt.condition(
            alt.datum.mean_metric > 50,
            alt.value('white'),
            alt.value('black')
        )
    )
    
    return chart + text

altair_heatmap = measure_task(altair_heatmap, 'Heatmap', 'Altair', 22)


# 4. INTERACTIVE PLOTS

In [None]:
print("Creating interactive plots...")

# Create time series data
dates = pd.date_range(start='2022-01-01', periods=100, freq='D')
ts_data = pd.DataFrame({
    'date': dates,
    'value_a': np.cumsum(np.random.normal(0, 1, 100)),
    'value_b': np.cumsum(np.random.normal(0, 1, 100)),
    'value_c': np.cumsum(np.random.normal(0, 1, 100)),
    'event': np.random.choice(['Yes', 'No'], 100, p=[0.1, 0.9])
})

# Matplotlib interactive (limited)
def matplotlib_interactive():
    fig, ax = plt.subplots(figsize=(10, 6))
    
    ax.plot(ts_data['date'], ts_data['value_a'], label='Series A')
    ax.plot(ts_data['date'], ts_data['value_b'], label='Series B')
    ax.plot(ts_data['date'], ts_data['value_c'], label='Series C')
    
    # Mark events
    events = ts_data[ts_data['event'] == 'Yes']
    ax.scatter(events['date'], events['value_a'], color='red', s=50, zorder=5, label='Events')
    
    ax.set_title('Time Series with Matplotlib (Limited Interactivity)', fontsize=15)
    ax.set_xlabel('Date', fontsize=12)
    ax.set_ylabel('Value', fontsize=12)
    ax.legend()
    ax.grid(alpha=0.3)
    
    plt.xticks(rotation=45)
    plt.tight_layout()
    return fig

mpl_interactive = measure_task(matplotlib_interactive, 'Interactive Plot', 'Matplotlib', 17)

In [None]:
# Plotly interactive
def plotly_interactive():
    fig = go.Figure()
    
    fig.add_trace(go.Scatter(
        x=ts_data['date'], y=ts_data['value_a'],
        mode='lines',
        name='Series A'
    ))
    
    fig.add_trace(go.Scatter(
        x=ts_data['date'], y=ts_data['value_b'],
        mode='lines',
        name='Series B'
    ))
    
    fig.add_trace(go.Scatter(
        x=ts_data['date'], y=ts_data['value_c'],
        mode='lines',
        name='Series C'
    ))
    
    # Add events as markers
    events = ts_data[ts_data['event'] == 'Yes']
    fig.add_trace(go.Scatter(
        x=events['date'], y=events['value_a'],
        mode='markers',
        marker=dict(size=10, color='red'),
        name='Events'
    ))
    
    fig.update_layout(
        title='Interactive Time Series with Plotly',
        xaxis_title='Date',
        yaxis_title='Value',
        hovermode='closest',
        legend_title='Series',
        font=dict(size=12),
        width=800, height=600
        
    )
    
    # Add range slider and buttons
    fig.update_layout(
        xaxis=dict(
            rangeselector=dict(
                buttons=list([
                    dict(count=7, label="1w", step="day", stepmode="backward"),
                    dict(count=1, label="1m", step="month", stepmode="backward"),
                    dict(count=3, label="3m", step="month", stepmode="backward"),
                    dict(step="all")
                ])
            ),
            rangeslider=dict(visible=True),
            type="date"
        )
    )
    
    return fig

plotly_interactive = measure_task(plotly_interactive, 'Interactive Plot', 'Plotly', 42)
plotly_interactive

In [None]:
# Altair interactive
def altair_interactive():
    # Base chart for lines
    line_chart = alt.Chart(ts_data).encode(
        x=alt.X('date:T', title='Date')
    ).properties(
        width=600,
        height=400,
        title='Time Series with Altair (Interactive)'
    )
    
    # Add the three line series
    series_a = line_chart.mark_line(color='#1f77b4').encode(
        y=alt.Y('value_a:Q', title='Value'),
        tooltip=['date:T', alt.Tooltip('value_a:Q', title='Value A')]
    )
    
    series_b = line_chart.mark_line(color='#ff7f0e').encode(
        y=alt.Y('value_b:Q'),
        tooltip=['date:T', alt.Tooltip('value_b:Q', title='Value B')]
    )
    
    series_c = line_chart.mark_line(color='#2ca02c').encode(
        y=alt.Y('value_c:Q'),
        tooltip=['date:T', alt.Tooltip('value_c:Q', title='Value C')]
    )
    
    # Add event markers
    events = line_chart.mark_circle(color='red', size=60).encode(
        y=alt.Y('value_a:Q'),
        opacity=alt.condition(
            'datum.event == "Yes"',
            alt.value(1),
            alt.value(0)
        ),
        tooltip=['date:T', 'event:N']
    )
    
    # Combine all layers and make interactive
    combined = alt.layer(series_a, series_b, series_c, events).interactive()
    
    return combined
    
altair_interactive = measure_task(altair_interactive, 'Interactive Plot', 'Altair', 33)
altair_interactive

In [None]:
# Create performance comparison DataFrame
perf_df = pd.DataFrame(performance)

# Create summary table
summary = pd.pivot_table(
    perf_df, 
    values=['time', 'code_lines'],
    index=['task'], 
    columns=['library'],
    aggfunc='mean'
)

# Calculate relative performance (normalized to Matplotlib)
rel_perf = summary.copy()
for metric in ['time', 'code_lines']:
    for lib in ['Plotly', 'Altair']:
        rel_perf[(metric, lib)] = summary[(metric, 'Matplotlib')] / summary[(metric, lib)]

# Create comparison table
comparison = pd.DataFrame({
    'Feature': [
        'Code Simplicity',
        'Interactivity',
        'Web Integration',
        'Static Plots',
        'Customization',
        'Learning Curve',
        'Performance',
        'Community Support'
    ],
    'Matplotlib': [
        '★★☆☆☆',
        '★☆☆☆☆',
        '★☆☆☆☆',
        '★★★★★',
        '★★★★★',
        '★★★☆☆',
        '★★★★☆',
        '★★★★★'
    ],
    'Plotly': [
        '★★★★☆',
        '★★★★★',
        '★★★★★',
        '★★★★☆',
        '★★★★☆',
        '★★★☆☆',
        '★★★☆☆',
        '★★★★☆'
    ],
    'Altair': [
        '★★★★★',
        '★★★★☆',
        '★★★★☆',
        '★★★☆☆',
        '★★★☆☆',
        '★★★★☆',
        '★★★☆☆',
        '★★★☆☆'
    ]
})

# Print results
print("\n--- Performance Summary ---")
print(summary)
print("\n--- Relative Performance (compared to Matplotlib) ---")
print(rel_perf)
print("\n--- Feature Comparison ---")
print(comparison)

# Create visualization of performance metrics
plt.figure(figsize=(12, 10))

# Execution time comparison
plt.subplot(2, 1, 1)
sns.barplot(x='task', y='time', hue='library', data=perf_df)
plt.title('Execution Time Comparison', fontsize=15)
plt.xlabel('Task', fontsize=12)
plt.ylabel('Time (seconds)', fontsize=12)
plt.xticks(rotation=45)
plt.legend(title='Library')

# Code lines comparison
plt.subplot(2, 1, 2)
sns.barplot(x='task', y='code_lines', hue='library', data=perf_df)
plt.title('Code Complexity (Number of Lines)', fontsize=15)
plt.xlabel('Task', fontsize=12)
plt.ylabel('Lines of Code', fontsize=12)
plt.xticks(rotation=45)
plt.legend(title='Library')

plt.tight_layout()
plt.savefig('visualization_libraries_comparison.png')

print("\nComparison complete! Results saved to 'visualization_libraries_comparison.png'")

# Display the plots (in Jupyter Notebook)
# For Matplotlib, just display the figures
# For Plotly, use plotly_scatter.show(), plotly_hist.show(), etc.
# For Altair, use display(altair_scatter), display(altair_hist), etc.