In [11]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# pandas options
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 500)

# suppress warnings
import warnings
warnings.filterwarnings('ignore')

| Variable Name                 | Description                                                               |
|-------------------------------|---------------------------------------------------------------------------|
| avganncount                   | Average number of cancer cases diagnosed annually.                       |
| avgdeathsperyear              | Average number of deaths due to cancer per year.                         |
| target_deathrate              | Target death rate due to cancer.                                         |
| incidencerate                 | Incidence rate of cancer.                                                |
| medincome                     | Median income in the region.                                             |
| popest2015                    | Estimated population in 2015.                                            |
| povertypercent                | Percentage of population below the poverty line.                         |
| studypercap                   | Per capita number of cancer-related clinical trials conducted.           |
| binnedinc                     | Binned median income.                                                    |
| medianage                     | Median age in the region.                                                |
| pctprivatecoveragealone       | Percentage of population covered by private health insurance alone.      |
| pctempprivcoverage            | Percentage of population covered by employee-provided private health insurance. |
| pctpubliccoverage             | Percentage of population covered by public health insurance.             |
| pctpubliccoveragealone        | Percentage of population covered by public health insurance only.        |
| pctwhite                      | Percentage of White population.                                          |
| pctblack                      | Percentage of Black population.                                          |
| pctasian                      | Percentage of Asian population.                                          |
| pctotherrace                  | Percentage of population belonging to other races.                       |
| pctmarriedhouseholds          | Percentage of married households.                                        |
| birthrate                     | Birth rate in the region.                                                |


In [12]:
# load data
cancer_stats = pd.read_csv('data/cancer_reg_stats.csv')
regional_stats = pd.read_csv('data/cancer_avg-household-size.csv')

In [13]:
# join on geography
cancer_stats = cancer_stats.merge(regional_stats, on='geography', how='inner')

In [14]:
statefips_dict = {
    1: "AL",  # Alabama
    2: "AK",  # Alaska
    4: "AZ",  # Arizona
    5: "AR",  # Arkansas
    6: "CA",  # California
    8: "CO",  # Colorado
    9: "CT",  # Connecticut
    10: "DE",  # Delaware
    11: "DC",  # District of Columbia
    12: "FL",  # Florida
    13: "GA",  # Georgia
    15: "HI",  # Hawaii
    16: "ID",  # Idaho
    17: "IL",  # Illinois
    18: "IN",  # Indiana
    19: "IA",  # Iowa
    20: "KS",  # Kansas
    21: "KY",  # Kentucky
    22: "LA",  # Louisiana
    23: "ME",  # Maine
    24: "MD",  # Maryland
    25: "MA",  # Massachusetts
    26: "MI",  # Michigan
    27: "MN",  # Minnesota
    28: "MS",  # Mississippi
    29: "MO",  # Missouri
    30: "MT",  # Montana
    31: "NE",  # Nebraska
    32: "NV",  # Nevada
    33: "NH",  # New Hampshire
    34: "NJ",  # New Jersey
    35: "NM",  # New Mexico
    36: "NY",  # New York
    37: "NC",  # North Carolina
    38: "ND",  # North Dakota
    39: "OH",  # Ohio
    40: "OK",  # Oklahoma
    41: "OR",  # Oregon
    42: "PA",  # Pennsylvania
    44: "RI",  # Rhode Island
    45: "SC",  # South Carolina
    46: "SD",  # South Dakota
    47: "TN",  # Tennessee
    48: "TX",  # Texas
    49: "UT",  # Utah
    50: "VT",  # Vermont
    51: "VA",  # Virginia
    53: "WA",  # Washington
    54: "WV",  # West Virginia
    55: "WI",  # Wisconsin
    56: "WY",  # Wyoming
}

In [15]:
cancer_stats

Unnamed: 0,avganncount,avgdeathsperyear,target_deathrate,incidencerate,medincome,popest2015,povertypercent,studypercap,binnedinc,medianage,medianagemale,medianagefemale,geography,percentmarried,pctnohs18_24,pcths18_24,pctsomecol18_24,pctbachdeg18_24,pcths25_over,pctbachdeg25_over,pctemployed16_over,pctunemployed16_over,pctprivatecoverage,pctprivatecoveragealone,pctempprivcoverage,pctpubliccoverage,pctpubliccoveragealone,pctwhite,pctblack,pctasian,pctotherrace,pctmarriedhouseholds,birthrate,statefips,countyfips,avghouseholdsize
0,1397.000000,469,164.9,489.800000,61898,260131,11.2,499.748204,"(61494.5, 125635]",39.3,36.9,41.7,"Kitsap County, Washington",52.5,11.5,39.5,42.1,6.9,23.2,19.6,51.9,8.0,75.1,,41.6,32.9,14.0,81.780529,2.594728,4.821857,1.843479,52.856076,6.118831,53,35,2.54
1,173.000000,70,161.3,411.600000,48127,43269,18.6,23.111234,"(48021.6, 51046.4]",33.0,32.2,33.7,"Kittitas County, Washington",44.5,6.1,22.4,64.0,7.5,26.0,22.7,55.9,7.8,70.2,53.8,43.6,31.1,15.3,89.228509,0.969102,2.246233,3.741352,45.372500,4.333096,53,37,2.34
2,102.000000,50,174.7,349.700000,49348,21026,14.6,47.560164,"(48021.6, 51046.4]",45.0,44.0,45.8,"Klickitat County, Washington",54.2,24.0,36.6,,9.5,29.0,16.0,45.9,7.0,63.7,43.5,34.9,42.1,21.1,90.922190,0.739673,0.465898,2.747358,54.444868,3.729488,53,39,2.62
3,427.000000,202,194.8,430.400000,44243,75882,17.1,342.637253,"(42724.4, 45201]",42.8,42.2,43.4,"Lewis County, Washington",52.7,20.2,41.2,36.1,2.5,31.6,9.3,48.3,12.1,58.4,40.3,35.0,45.3,25.0,91.744686,0.782626,1.161359,1.362643,51.021514,4.603841,53,41,2.52
4,57.000000,26,144.4,350.100000,49955,10321,12.5,0.000000,"(48021.6, 51046.4]",48.3,47.8,48.9,"Lincoln County, Washington",57.8,14.9,43.0,40.0,2.0,33.4,15.0,48.2,4.8,61.6,43.9,35.1,44.0,22.7,94.104024,0.270192,0.665830,0.492135,54.027460,6.796657,53,43,2.34
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3042,1962.667684,15,149.6,453.549422,46961,6343,12.4,0.000000,"(45201, 48021.6]",44.2,41.1,48.8,"Ellsworth County, Kansas",51.0,20.9,27.9,,8.4,32.2,15.2,51.7,4.3,78.3,54.9,44.6,31.7,13.2,90.280811,3.837754,0.327613,1.700468,51.063830,7.773512,20,53,2.08
3043,1962.667684,43,150.1,453.549422,48609,37118,18.8,377.175494,"(48021.6, 51046.4]",30.4,29.3,31.4,"Finney County, Kansas",52.6,26.7,33.9,35.6,3.8,23.1,12.4,70.1,4.6,64.5,53.3,48.6,28.8,17.7,75.706245,2.326771,4.044920,14.130288,52.007937,8.186470,20,55,2.90
3044,1962.667684,46,153.9,453.549422,51144,34536,15.0,1968.959926,"(51046.4, 54545.6]",30.9,30.5,31.2,"Ford County, Kansas",54.8,19.7,44.5,33.3,2.5,23.0,12.8,64.8,6.4,62.0,52.6,47.8,26.6,16.8,87.961629,2.313188,1.316472,5.680705,55.153949,7.809192,20,57,3.04
3045,1962.667684,52,175.0,453.549422,50745,25609,13.3,0.000000,"(48021.6, 51046.4]",39.0,36.9,40.5,"Franklin County, Kansas",58.8,10.9,37.2,,4.1,36.1,14.4,,6.9,75.9,56.3,49.6,29.5,14.0,92.905681,1.176562,0.244632,2.131790,58.484232,7.582938,20,59,2.56


In [16]:
import pandas as pd
import json
import folium

# Load the us-states.json file
with open('data/us-states.json') as f:
    us_states = json.load(f)

# Map the statefips codes to the corresponding states in the cancer_stats DataFrame
cancer_stats['state'] = cancer_stats['statefips'].map(statefips_dict)

# Create a choropleth map
m = folium.Map(location=[48, -102], zoom_start=3,
               zoom_control=False,
               scrollWheelZoom=False,
               dragging=False)
# List of metrics
metrics = ['avganncount', 'avgdeathsperyear', 'target_deathrate', 'incidencerate', 'medincome', 'popest2015', 'povertypercent', 'studypercap', 'medianage', 'pctprivatecoveragealone', 'pctempprivcoverage', 'pctpubliccoverage', 'pctpubliccoveragealone', 'pctwhite', 'pctblack', 'pctasian', 'pctotherrace', 'pctmarriedhouseholds', 'birthrate']

# For each metric, create a Choropleth layer and add it to the Map object
for i, metric in enumerate(metrics):
    choropleth = folium.Choropleth(
        geo_data=us_states,
        name=metric,
        data=cancer_stats,
        columns=['state', metric],
        key_on='feature.id',
        fill_color='YlGn',
        fill_opacity=0.7,
        line_opacity=0.2,
        legend_name=metric,
        show=(i==0)  # Only show the first layer
    ).add_to(m)

# Add a LayerControl object to the Map object
folium.LayerControl().add_to(m)

# limit the map to the US
m.fit_bounds([[49, -67], [24, -125]])

m