In [80]:
import pandas as pd
import folium
from IPython.display import display, IFrame

In [4]:
US_states_json= "../data/processed/us-states.json"

In [37]:
# 1. Import necessary libraries
import pandas as pd
import folium
import json


In [45]:
# 2. Load the GeoJSON and CSV datasets
#    - us-states.json contains state boundaries
#    - correct_dtypes_merged_fcc_census_1940.csv has 1939 station counts + 1940 census
#    - correct_dtypes_merged_fcc_census_1950.csv has 1950 station counts + 1950 census
with open('../data/processed/us-states.json') as f:
    states_geo = json.load(f)  # :contentReference[oaicite:0]{index=0}:contentReference[oaicite:1]{index=1}

df1940 = pd.read_csv('../data/processed/correct_dtypes_merged_fcc_census_1940.csv')
df1950 = pd.read_csv('../data/processed/correct_dtypes_merged_fcc_census_1950.csv')


In [47]:
print(df1940.columns)
print(df1950.columns)

Index(['state', 'total', 'Population_1940', 'radio_per_100k'], dtype='object')
Index(['State', 'AM', 'FM_Commercial', 'FM_Educational', 'TV', 'Total',
       'Population_1950', 'total_radio', 'radio_per_100k'],
      dtype='object')


In [50]:
# Suppose after inspection your key columns are:
#   'state'              – state name matching the GeoJSON "properties.name"
#   'total_stations'     – number of radio stations in that year
#   'population'         – census population for that year
#   'stations_per_100k'  – computed stations per 100,000 people
#
# If your columns differ, rename them like this:
df1940 = df1940.rename(columns={
    'total': 'total_stations',
    'Population_1940': 'population',
    'radio_per_100k': 'stations_per_100k'
})
df1950 = df1950.rename(columns={
    'State': 'state',
    'total_radio': 'total_stations',
    'Population_1950': 'population',
    'radio_per_100k': 'stations_per_100k'
})



In [52]:
print(df1940.columns)
print(df1950.columns)

Index(['state', 'total_stations', 'population', 'stations_per_100k'], dtype='object')
Index(['state', 'AM', 'FM_Commercial', 'FM_Educational', 'TV', 'Total',
       'population', 'total_stations', 'stations_per_100k'],
      dtype='object')


In [133]:
# 4. Define a function to create and save a choropleth map
def make_choropleth(df, value_column, legend, outfile, bins=None):
    """
    df            : DataFrame with 'state' + value_column
    value_column  : name of column to visualize
    legend        : legend title (string)
    outfile       : filename to save the HTML map to
    bins          : list of numeric break-points for your color scale
    """
    m = folium.Map(location=[37.8, -96], zoom_start=4)

    # pass bins into threshold_scale; if None, folium will choose automatically
    folium.Choropleth(
        geo_data=states_geo,
        data=df,
        columns=['state', value_column],
        key_on='feature.properties.name',
        fill_color='YlGnBu',
        threshold_scale=bins,
        fill_opacity=0.7,
        line_opacity=0.2,
        legend_name=legend,
        reset=True   # ensure bins are reapplied if you reuse the map object
    ).add_to(m)


    folium.LayerControl().add_to(m)
    m.save(outfile)
    print(f"Saved map to {outfile}")
    
    display(IFrame(src=outfile, width='100%', height=500))



In [None]:
# 5. Generate the four maps
#    Map A: Total stations in 1939 (using 1940 census file)

In [108]:
make_choropleth(
    df1940, 
    value_column='total_stations', 
    legend='Total Radio Stations (1939)', 
    outfile='../output/stations_1939_total.html'
)

#map color was spectral: because its a diverging spectrum and there's a lot of middling values, and extremes: keep it spectral



Saved map to ../output/stations_1939_total.html


In [135]:
#    Map B: Stations per 100k in 1939
import numpy as np
qs = df1940['stations_per_100k'].quantile([0,.2,.4,.6,.8,1])
quantile_bins = list(qs)
make_choropleth(df1940,
                'stations_per_100k',
                'Stations per 100k (1939)',
                'per100k_quantiles.html',
                bins=quantile_bins)

#changed color palette to YlGnBu in the 

## to get a more accurate picture, quantile classification was used to prevent the classification bias discussed in "Lying With Maps"

Saved map to per100k_quantiles.html


In [125]:
#    Map C: Total stations in 1950
make_choropleth(
    df1950, 
    value_column='total_stations', 
    legend='Total Radio Stations (1950)', 
    outfile='../output/stations_1950_total.html'
)


Saved map to ../output/stations_1950_total.html


In [142]:
#    Map D: Stations per 100k in 1950
# 2) Compute the 0%, 20%, 40%, 60%, 80%, 100% quantiles
qs = df1950['stations_per_100k'].quantile([0, .2, .4, .6, .8, 1.0])
quantile_bins = qs.tolist()
print("Quantile breakpoints:", quantile_bins)

# 3) Call your choropleth function with these bins
make_choropleth(
    df1950,
    value_column='stations_per_100k',
    legend='Radio Stations per 100k (1950)',
    outfile='../output/per100k_1950_quantiles.html',
    bins=quantile_bins
)

Quantile breakpoints: [0.7238390603824476, 1.8177227094033332, 2.1885060883857226, 2.5104349328886246, 3.1550412022926935, 7.773450556967732]
Saved map to ../output/per100k_1950_quantiles.html
