In [None]:
import pandas as pd
import geojson
import altair as alt
import json

In [None]:
# preprocessing data

population = pd.read_csv('data/Chicago_Population_Counts.csv')
locations = pd.read_csv('data/Flu_Shot_Locations_-_2014_-_Present.csv')
flu_levels = pd.read_csv('data/Influenza_Risk_Level_by_ZIP_Code.csv')
locations_per_zip = locations.groupby('Postal Code').size().reset_index(name='Flu Clinic Location Count')

merged_data = pd.merge(population, locations_per_zip, left_on='Geography', right_on='Postal Code', how='inner')
merged_data.drop(columns=['Postal Code'], inplace=True)

with open('data/Boundaries - ZIP Codes.geojson') as f:
    geojson_data = json.load(f)

zip_codes = []
for feature in geojson_data['features']:
    zip_code = feature['properties']['zip']
    geometry = feature['geometry']
    zip_codes.append({
        'zip_code': zip_code,
        'geometry': geometry
    })

geo_df = pd.DataFrame(zip_codes)

geo_df['zip_code'] = geo_df['zip_code'].astype(str)
merged_data['Geography'] = merged_data['Geography'].astype(str)

final_merged_df = pd.merge(geo_df, merged_data, left_on='zip_code', right_on='Geography', how='inner')
final_merged_df.drop(columns=['Geography'], inplace=True)

flu_levels['ZIP_Code'] = flu_levels['ZIP_Code'].astype(str)
final_merged = pd.merge(final_merged_df, flu_levels, left_on='zip_code', right_on='ZIP_Code', how='inner')
final_merged.drop(columns=['ZIP_Code'], inplace=True)

In [None]:
# Flu Clinic Location Counts by ZIP Code in Chicago

geojson_data = {
    "type": "FeatureCollection",
    "features": []
}

for _, row in final_merged.iterrows():
    feature = {
        "type": "Feature",
        "geometry": row['geometry'],
        "properties": {
            "zip_code": row['zip_code'],
            "flu_clinic_count": row['Flu Clinic Location Count']
        }
    }
    geojson_data["features"].append(feature)

choropleth = alt.Chart(alt.Data(values=geojson_data['features'])).mark_geoshape(
    fillOpacity=0.8,
    stroke="black",
    strokeWidth=0.5
).encode(
    color=alt.Color(
        'properties.flu_clinic_count:Q', 
        title='Flu Clinic Count', 
        scale=alt.Scale(scheme='blues')
    ),
    tooltip=[
        alt.Tooltip('properties.zip_code:N', title='ZIP Code'),
        alt.Tooltip('properties.flu_clinic_count:Q', title='Flu Clinic Count', format='.0f')
    ]
).project(
    type="mercator"
).properties(
    width=700,
    height=500,
    title='Flu Clinic Location Counts by ZIP Code in Chicago'
)

choropleth

In [None]:
# Average ILI Activity Level by ZIP Code in Chicago

geojson_data = {
    "type": "FeatureCollection",
    "features": []
}

for _, row in final_merged.iterrows():
    feature = {
        "type": "Feature",
        "geometry": row['geometry'],
        "properties": {
            "zip_code": row['zip_code'],
            "avg_ili_activity_level": row['ILI_Activity_Level']
        }
    }
    geojson_data["features"].append(feature)

choropleth2 = alt.Chart(alt.Data(values=geojson_data['features'])).mark_geoshape(
    fillOpacity=0.8,
    stroke="black",
    strokeWidth=0.5
).encode(
    color=alt.Color(
        'properties.avg_ili_activity_level:Q', 
        title='Avg ILI Activity Level', 
        scale=alt.Scale(
            scheme='plasma',
            domainMid=0,
            nice=True
        )
    ),
    tooltip=[
        alt.Tooltip('properties.zip_code:N', title='ZIP Code'),
        alt.Tooltip('properties.avg_ili_activity_level:Q', title='Avg ILI Activity Level', format='.2f')
    ]
).project(
    type="mercator"
).properties(
    width=700,
    height=500,
    title='Average ILI Activity Level by ZIP Code in Chicago'
)

choropleth2

In [None]:
# Ratio of ILI Activity Level to Flu Clinics by ZIP Code in Chicago

avg_ili_levels = final_merged.groupby('zip_code')['ILI_Activity_Level'].mean().reset_index()
avg_ili_levels.rename(columns={'ILI_Activity_Level': 'Avg_ILI_Activity_Level'}, inplace=True)

flu_clinic_counts = final_merged.groupby('zip_code')['Flu Clinic Location Count'].count().reset_index()
flu_clinic_counts.rename(columns={'Flu Clinic Location Count': 'Flu_Clinic_Count'}, inplace=True)

ili_flu_ratio = avg_ili_levels.merge(flu_clinic_counts, on='zip_code', how='left')
ili_flu_ratio['Flu_Clinic_Count'].fillna(1, inplace=True)
ili_flu_ratio['ILI_Flu_Ratio'] = ili_flu_ratio['Avg_ILI_Activity_Level'] / ili_flu_ratio['Flu_Clinic_Count']

geojson_data = {
    "type": "FeatureCollection",
    "features": []
}

for _, row in final_merged.iterrows():
    zip_code = row['zip_code']
    flu_ratio_data = ili_flu_ratio[ili_flu_ratio['zip_code'] == zip_code]
    ili_flu_ratio_value = flu_ratio_data['ILI_Flu_Ratio'].values[0] if not flu_ratio_data.empty else 0
    feature = {
        "type": "Feature",
        "geometry": row['geometry'],
        "properties": {
            "zip_code": zip_code,
            "ili_flu_ratio": ili_flu_ratio_value
        }
    }
    geojson_data["features"].append(feature)

choropleth_ratio = alt.Chart(alt.Data(values=geojson_data['features'])).mark_geoshape(
    fillOpacity=0.8,
    stroke="black",
    strokeWidth=0.5
).encode(
    color=alt.Color(
        'properties.ili_flu_ratio:Q', 
        title='ILI Activity / Flu Clinics Ratio', 
        scale=alt.Scale(
            scheme='viridis',
            nice=True
        )
    ),
    tooltip=[
        alt.Tooltip('properties.zip_code:N', title='ZIP Code'),
        alt.Tooltip('properties.ili_flu_ratio:Q', title='ILI/Flu Clinic Ratio', format='.2f')
    ]
).project(
    type="mercator"
).properties(
    width=700,
    height=500,
    title='Ratio of ILI Activity Level to Flu Clinics by ZIP Code in Chicago'
)

choropleth_ratio

In [None]:
# Population-to-Clinic Ratio by ZIP Code in Chicago

final_merged['Population-to-Clinic Ratio'] = final_merged['Population - Total'] / final_merged['Flu Clinic Location Count']
final_merged['Population-to-Clinic Ratio'] = final_merged['Population-to-Clinic Ratio'].replace([float('inf'), -float('inf')], None)
final_merged['Population-to-Clinic Ratio'].fillna(0, inplace=True)

geojson_data = {
    "type": "FeatureCollection",
    "features": []
}

for _, row in final_merged.iterrows():
    zip_code = row['zip_code']
    pop_clinic_ratio = row['Population-to-Clinic Ratio']
    feature = {
        "type": "Feature",
        "geometry": row['geometry'],
        "properties": {
            "zip_code": zip_code,
            "Population-to-Clinic Ratio": pop_clinic_ratio
        }
    }
    geojson_data["features"].append(feature)

choropleth = alt.Chart(alt.Data(values=geojson_data['features'])).mark_geoshape(
    fillOpacity=0.8,
    stroke="black",
    strokeWidth=0.5
).encode(
    color=alt.Color(
        'properties.Population-to-Clinic Ratio:Q', 
        title='Population per Clinic',
        scale=alt.Scale(scheme="reds")
    ),
    tooltip=[
        alt.Tooltip('properties.zip_code:N', title='ZIP Code'),
        alt.Tooltip('properties.Population-to-Clinic Ratio:Q', title='Population per Clinic', format='.2f')
    ]
).project(
    type="mercator"
).properties(
    width=700,
    height=500,
    title='Population-to-Clinic Ratio by ZIP Code in Chicago'
)

choropleth

In [None]:
# Total Minority Population per ZIP Code in Chicago

final_merged['Total_Minority_Population'] = (final_merged['Population - Latinx'] +
                                              final_merged['Population - Asian Non-Latinx'] +
                                              final_merged['Population - Black Non-Latinx'])

geojson_data = {
    "type": "FeatureCollection",
    "features": []
}

for _, row in final_merged.iterrows():
    zip_code = row['zip_code']
    minority_population = row['Total_Minority_Population']
    feature = {
        "type": "Feature",
        "geometry": row['geometry'],
        "properties": {
            "zip_code": zip_code,
            "Total_Minority_Population": minority_population
        }
    }
    geojson_data["features"].append(feature)

choropleth_minority_population = alt.Chart(alt.Data(values=geojson_data['features'])).mark_geoshape(
    fillOpacity=0.8,
    stroke="black",
    strokeWidth=0.5
).encode(
    color=alt.Color(
        'properties.Total_Minority_Population:Q', 
        title='Total Minority Population',
        scale=alt.Scale(scheme="purples")
    ),
    tooltip=[
        alt.Tooltip('properties.zip_code:N', title='ZIP Code'),
        alt.Tooltip('properties.Total_Minority_Population:Q', title='Total Minority Population', format='.0f')
    ]
).project(
    type="mercator"
).properties(
    width=700,
    height=500,
    title='Total Minority Population per ZIP Code in Chicago'
)

choropleth_minority_population

In [None]:
# Flu clinic count by demographic

merged_data = pd.merge(population, locations_per_zip, left_on='Geography', right_on='Postal Code', how='inner')
merged_data.drop(columns=['Postal Code'], inplace=True)

demographic_columns = [
    'Population - Latinx',
    'Population - Asian Non-Latinx',
    'Population - Black Non-Latinx',
    'Population - White Non-Latinx'
]

melted_data = merged_data.melt(
    id_vars=['Geography', 'Flu Clinic Location Count'],
    value_vars=demographic_columns,
    var_name='Demographic Group',
    value_name='Population'
)

scatter_plot = alt.Chart(melted_data).mark_circle(size=60).encode(
    x=alt.X('Population:Q', title='Population'),
    y=alt.Y('Flu Clinic Location Count:Q', title='Flu Clinic Count'),
    color=alt.Color('Demographic Group:N', legend=None),
    tooltip=[
        alt.Tooltip('Geography:N', title='Zip Code'),
        alt.Tooltip('Population:Q', title='Population'),
        alt.Tooltip('Flu Clinic Location Count:Q', title='Flu Clinic Count')
    ]
)

regression_line = alt.Chart(melted_data).mark_line(color='red').transform_regression(
    'Population', 'Flu Clinic Location Count', groupby=['Demographic Group']
).encode(
    x='Population:Q',
    y='Flu Clinic Location Count:Q'
)

faceted_plot = (scatter_plot + regression_line).properties(
    width=200,
    height=300
).facet(
    facet=alt.Facet('Demographic Group:N', title=None),
    columns=2 
).resolve_scale(
    y='independent',
    x='shared'
)

faceted_plot

In [None]:
# ILI activity by demographic

alt.data_transformers.disable_max_rows()
avg_ili_levels = final_merged.groupby('zip_code')['ILI_Activity_Level'].mean().reset_index()
avg_ili_levels.rename(columns={'ILI_Activity_Level': 'Avg_ILI_Activity_Level'}, inplace=True)

final_merged = pd.merge(final_merged, avg_ili_levels, on='zip_code', how='left')

demographic_columns = [
    'Population - Latinx',
    'Population - Asian Non-Latinx',
    'Population - Black Non-Latinx',
    'Population - White Non-Latinx'
]

melted_data = final_merged.melt(
    id_vars=['zip_code', 'Avg_ILI_Activity_Level'],
    value_vars=demographic_columns,
    var_name='Demographic Group',
    value_name='Population'
)

scatter_plot = alt.Chart(melted_data).mark_circle(size=60).encode(
    x=alt.X('Population:Q', title='Population'),
    y=alt.Y('Avg_ILI_Activity_Level:Q', title='Average ILI Activity Level'),
    color=alt.Color('Demographic Group:N', legend=None),
    tooltip=[
        alt.Tooltip('zip_code:N', title='Zip Code'),
        alt.Tooltip('Population:Q', title='Population'),
        alt.Tooltip('Avg_ILI_Activity_Level:Q', title='Average ILI Activity Level')
    ]
)

regression_line = alt.Chart(melted_data).mark_line(color='red').transform_regression(
    'Population', 'Avg_ILI_Activity_Level', groupby=['Demographic Group']
).encode(
    x='Population:Q',
    y='Avg_ILI_Activity_Level:Q'
)

faceted_plot = (scatter_plot + regression_line).properties(
    width=200,
    height=300
).facet(
    facet=alt.Facet('Demographic Group:N', title=None),
    columns=2 
).resolve_scale(
    y='independent',
    x='shared'
)

faceted_plot


In [None]:
# ILI Activity Level vs Flu Clinic Location Count

avg_ili_levels = final_merged.groupby('zip_code')['ILI_Activity_Level'].mean().reset_index()
avg_ili_levels.rename(columns={'ILI_Activity_Level': 'Avg_ILI_Activity_Level'}, inplace=True)

zip_aggregated = pd.merge(avg_ili_levels, final_merged[['zip_code', 'Flu Clinic Location Count']].drop_duplicates(), on='zip_code')

scatter_plot = alt.Chart(zip_aggregated).mark_circle(size=100).encode(
    x=alt.X('Avg_ILI_Activity_Level:Q', title='Average ILI Activity Level'),
    y=alt.Y('Flu Clinic Location Count:Q', title='Flu Clinic Location Count'),
    tooltip=['zip_code', 'Avg_ILI_Activity_Level', 'Flu Clinic Location Count'],
    color=alt.Color('zip_code:N', title='ZIP Code')
).properties(
    title='ILI Activity Level vs Flu Clinic Location Count',
    width=800,
    height=500
)

regression_line = alt.Chart(zip_aggregated).transform_regression(
    'Avg_ILI_Activity_Level', 'Flu Clinic Location Count', method='linear'
).mark_line(color='red').encode(
    x=alt.X('Avg_ILI_Activity_Level:Q'),
    y=alt.Y('Flu Clinic Location Count:Q')
)

scatter_with_regression = scatter_plot + regression_line

scatter_with_regression.display()