In [None]:
import pandas as pd
import plotly.express as px
import numpy as np

# Read the data
df = pd.read_csv('data/birdflu.csv')

# Ensure FIPS codes are properly formatted
df['FIPS Codes'] = df['FIPS Codes'].astype(str).str.zfill(5)

# Create log-transformed flock size for better color distribution
df['Log_Flock_Size'] = np.log1p(df['Flock Size'])  # log1p handles zero values

# Calculate quantiles based on log-transformed data
quantiles = np.quantile(df['Log_Flock_Size'], [0, 0.2, 0.4, 0.6, 0.8, 1])
quantile_labels = np.exp(quantiles) - 1  # Convert back to original scale for labels

# Create choropleth map
fig = px.choropleth_map(df,
    geojson='https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json',
    locations='FIPS Codes',
    color='Log_Flock_Size',
    color_continuous_scale="Reds",
    range_color=[df['Log_Flock_Size'].min(), df['Log_Flock_Size'].max()],
    map_style='carto-positron',  # Changed from mapbox_style
    zoom=3.6,
    center={"lat": 37.0902, "lon": -95.7129},
    opacity=0.7,
    hover_data={
        'FIPS Codes': False,
        'Log_Flock_Size': False,
        'Flock Size': ':,.0f',
        'County Name': True,
        'State': True
    },
    hover_name=None,
    labels={
        'Flock Size': 'Flock Size',
        'County Name': 'County',
        'State': 'State'
    },
    title="Bird Flu Cases by Flock Size Across US Counties (Log Scale)"
)

# Update colorbar with specific values
fig.update_layout(
    coloraxis_colorbar=dict(
        title="Flock Size",
        tickvals=[np.log1p(0), np.log1p(100), np.log1p(100000), np.log1p(12000000)],
        ticktext=["0", "100", "100,000", "12,000,000"]
    )
)

# Update traces
fig.update_traces(
    marker_line_color='rgb(200,200,200)',
    marker_line_width=0.5
)

# Show the map
fig.show()


*choropleth_mapbox* is deprecated! Use *choropleth_map* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/



In [None]:
# Create summary statistics
state_summary = df.groupby('State').agg({
    'Flock Size': ['sum', 'mean', 'count'],
    'Outbreaks': 'sum'
}).round(2)

# Flatten column names
state_summary.columns = ['Total Flock Size', 'Average Flock Size', 'Number of Cases', 'Total Outbreaks']

# Sort by total flock size
state_summary = state_summary.sort_values('Total Flock Size', ascending=False)

# Display top 10 states
print("Top 10 States by Total Affected Flock Size:")
print(state_summary.head(10))