In [10]:
import pandas as pd
import plotly.express as px
from google.cloud import bigquery
import plotly.io as pio

# 设置BigQuery客户端
client = bigquery.Client(project='brazil-olist')  

# 从BigQuery中获取数据
def fetch_geo_revenue_data():
    query = """
    SELECT 
        zip_code_prefix,
        avg_latitude,
        avg_longitude,
        city,
        state,
        total_revenue,
        order_count
    FROM `brazil-olist.dbt_output.fct_geo_revenue`  
    WHERE avg_latitude IS NOT NULL AND avg_longitude IS NOT NULL
      AND total_revenue > 1000
    """
    return client.query(query).to_dataframe()

# 创建交互式地图可视化
def create_revenue_map(df):
    # 缩放收入值用于点的大小（避免过大或过小的点）
    max_revenue = df['total_revenue'].max()
    size_factor = max(1, 100 / max_revenue)  # 动态调整大小因子
    
    fig = px.scatter_mapbox(
        df,
        lat="avg_latitude",
        lon="avg_longitude",
        size="total_revenue",
        size_max=30,  # 最大点大小
        # color="state",  # 按州着色
        # 替换color参数
        color="total_revenue",
        color_continuous_scale=px.colors.sequential.Plasma,
        hover_name="city",
        hover_data={
            "zip_code_prefix": True,
            "total_revenue": ":,.2f",  # 格式化收入显示
            "order_count": True,
            "avg_latitude": False,
            "avg_longitude": False
        },
        zoom=3,  # 初始缩放级别（巴西范围）
        height=800,
        title="Revenue by Geographic Location"
    )
    
    # 更新地图样式和布局
    fig.update_layout(
        # mapbox_style="carto-positron",
        # 可用样式：open-street-map, carto-positron, stamen-terrain等
        mapbox_style="open-street-map",
        mapbox_center={"lat": -14.2350, "lon": -51.9253},  # 巴西中心坐标
        margin={"r": 0, "t": 40, "l": 0, "b": 0},
        legend_title_text="State",
        hoverlabel=dict(
            bgcolor="white",
            font_size=12,
            font_family="Arial"
        )
    )
    
    # 自定义悬停提示格式
    fig.update_traces(
        hovertemplate="<b>%{hovertext}</b><br><br>" +
                      "Zip Prefix: %{customdata[0]}<br>" +
                      "Revenue: R$%{customdata[1]:,.2f}<br>" +
                      "Orders: %{customdata[2]}"
    )
    
    return fig

# 主程序
if __name__ == "__main__":
    # 获取数据
    revenue_df = fetch_geo_revenue_data()
    
    # 检查数据是否为空
    if revenue_df.empty:
        print("No data found. Please check your BigQuery table.")
    else:
        print(f"Retrieved {len(revenue_df)} geographic revenue records")
        
        # 创建可视化
        fig = create_revenue_map(revenue_df)
        
        # 保存为HTML文件
        pio.write_html(fig, "geo_revenue_map.html", auto_open=True)
        
        # 在Jupyter中直接显示（取消注释）
        # fig.show()


BigQuery Storage module not found, fetch data with the REST endpoint instead.



Retrieved 4816 geographic revenue records



*scatter_mapbox* is deprecated! Use *scatter_map* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/

