In [1]:
import pandas as pd
import plotly.express as px
from google.cloud import bigquery
import plotly.io as pio
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# 设置BigQuery客户端
client = bigquery.Client(project='brazil-olist')

def fetch_delivery_time_data():
    query = """
    SELECT 
        state,
        city,
        avg_delivery_days,
        delivery_category,
        order_count
    FROM `brazil-olist.dbt_output.fct_delivery_time_by_zip`
    WHERE avg_delivery_days IS NOT NULL
    ORDER BY avg_delivery_days DESC
    """
    return client.query(query).to_dataframe()

def create_enhanced_visualizations(df):
    # 1. 各州平均送货时间热力图
    state_avg = df.groupby('state')['avg_delivery_days'].mean().reset_index()
    state_avg = state_avg.sort_values('avg_delivery_days', ascending=False)
    
    # 创建热力图
    fig_heatmap = go.Figure(data=go.Heatmap(
        z=state_avg['avg_delivery_days'],
        x=state_avg['state'],
        colorscale='Reds',  # 使用红色系表示时间长短
        colorbar=dict(title='平均送货天数'),
        hoverinfo='x+z',
        text=state_avg.apply(lambda r: f"{r['state']}: {r['avg_delivery_days']:.1f}天", axis=1),
        hovertemplate='%{text}<extra></extra>'
    ))
    
    fig_heatmap.update_layout(
        title='巴西各州平均送货时间热力图',
        xaxis_title='州',
        height=500,
        margin=dict(t=60, b=80, l=40, r=40)
    )
    
    # 2. 送货时间分布地图
    # 按州分组计算平均送货时间
    state_data = df.groupby(['state']).agg(
        avg_delivery_days=('avg_delivery_days', 'mean'),
        location_count=('city', 'count')
    ).reset_index()
    
    # 创建巴西地图
    fig_map = px.choropleth(
        state_data,
        geojson="https://raw.githubusercontent.com/codeforamerica/click_that_hood/master/public/data/brazil-states.geojson",
        featureidkey="properties.sigla",  # 州缩写属性
        locations="state",
        color="avg_delivery_days",
        color_continuous_scale="Reds",
        range_color=(df['avg_delivery_days'].min(), df['avg_delivery_days'].max()),
        hover_name="state",
        hover_data={
            "avg_delivery_days": ":.1f天",
            "location_count": True
        },
        labels={'avg_delivery_days': '平均送货天数'},
        title="巴西各州平均送货时间"
    )
    
    fig_map.update_geos(
        fitbounds="locations",
        visible=False,
        projection_type="mercator"
    )
    
    fig_map.update_layout(
        margin=dict(l=0, r=0, t=40, b=0),
        height=600,
        coloraxis_colorbar=dict(
            title="天数",
            thickness=20
        )
    )
    
    # 3. 送货时间分类饼图
    category_counts = df['delivery_category'].value_counts().reset_index()
    category_counts.columns = ['category', 'count']
    
    # 创建饼图
    fig_pie = px.pie(
        category_counts,
        names='category',
        values='count',
        hole=0.4,
        title='送货时间分布',
        color='category',
        color_discrete_sequence=px.colors.sequential.Reds[::-1]
    )
    
    fig_pie.update_traces(
        textposition='inside',
        textinfo='percent+label',
        hovertemplate="%{label}: %{value}个地区<br>(%{percent})"
    )
    
    # 4. 创建综合仪表板
    fig = make_subplots(
        rows=2, cols=2,
        specs=[
            [{"type": "bar"}, {"type": "pie"}],
            [{"type": "choropleth", "colspan": 2}, None]
        ],
        subplot_titles=("各州平均送货时间", "送货时间分布", "地理分布"),
        vertical_spacing=0.15,
        horizontal_spacing=0.1
    )
    
    # 添加热力图
    fig.add_trace(
        go.Bar(
            x=state_avg['state'],
            y=state_avg['avg_delivery_days'],
            marker_color=state_avg['avg_delivery_days'],
            marker_colorscale='Reds',
            hovertemplate="%{x}: %{y:.1f}天<extra></extra>"
        ),
        row=1, col=1
    )
    
    # 添加饼图
    fig.add_trace(fig_pie.data[0], row=1, col=2)
    
    # 添加地图
    fig.add_trace(fig_map.data[0], row=2, col=1)
    
    # 更新布局
    fig.update_layout(
        title_text="巴西送货时间分析仪表板",
        height=900,
        showlegend=True,
        coloraxis=fig_map.layout.coloraxis,
        coloraxis_colorbar=dict(
            len=0.5,
            y=0.25
        )
    )
    
    # 更新坐标轴
    fig.update_xaxes(title_text="州", row=1, col=1, tickangle=45)
    fig.update_yaxes(title_text="平均送货天数", row=1, col=1)
    
    return fig

def main():
    # 获取数据
    delivery_df = fetch_delivery_time_data()
    
    if delivery_df.empty:
        print("未找到数据。请检查您的BigQuery表。")
        return
    
    print(f"获取到 {len(delivery_df)} 条送货时间记录")
    
    # 分析摘要
    print("\n送货时间摘要统计:")
    print(f"全国平均送货时间: {delivery_df['avg_delivery_days'].mean():.1f} 天")
    print(f"最短送货时间: {delivery_df['avg_delivery_days'].min():.1f} 天")
    print(f"最长送货时间: {delivery_df['avg_delivery_days'].max():.1f} 天")
    
    # 按州分析
    state_stats = delivery_df.groupby('state').agg(
        avg_days=('avg_delivery_days', 'mean'),
        location_count=('city', 'count')
    ).sort_values('avg_days', ascending=False)
    
    print("\n各州平均送货时间排名:")
    print(state_stats.head(10).to_string())
    
    # 创建增强的可视化
    fig = create_enhanced_visualizations(delivery_df)
    
    # 保存为HTML文件
    pio.write_html(fig, "enhanced_delivery_analysis.html", auto_open=True)
    
    # 额外保存各州热力图
    state_avg = delivery_df.groupby('state')['avg_delivery_days'].mean().reset_index()
    state_avg = state_avg.sort_values('avg_delivery_days')
    
    fig_heatmap = px.bar(
        state_avg,
        x='avg_delivery_days',
        y='state',
        orientation='h',
        color='avg_delivery_days',
        color_continuous_scale='Reds',
        title='巴西各州平均送货时间',
        labels={'avg_delivery_days': '平均送货天数', 'state': '州'}
    )
    
    fig_heatmap.update_layout(yaxis_categoryorder='total ascending')
    pio.write_html(fig_heatmap, "state_delivery_times.html", auto_open=True)

if __name__ == "__main__":
    main()

获取到 6575 条送货时间记录

送货时间摘要统计:
全国平均送货时间: 9.1 天
最短送货时间: 1.3 天
最长送货时间: 52.0 天

各州平均送货时间排名:
        avg_days  location_count
state                           
AP     26.365058               5
AL     22.582116              25
PA     20.248487              75
AM     18.371963               6
RR     18.271667               2
MA     18.237594              47
CE     17.894387              82
SE     17.871026              24
AC     17.638901               6
PB     16.836479              35
