In [3]:
# Import dependencies
import numpy as np
import pandas as pd
import folium

In [4]:
# Read CSV
pd.set_option('display.max_columns', None)
raw_data = pd.read_csv("../Resources/Nutrition__Physical_Activity__and_Obesity_-_Behavioral_Risk_Factor_Surveillance_System.csv")
raw_data.head()

Unnamed: 0,YearStart,YearEnd,LocationAbbr,LocationDesc,Datasource,Class,Topic,Question,Data_Value_Unit,Data_Value_Type,Data_Value,Data_Value_Alt,Data_Value_Footnote_Symbol,Data_Value_Footnote,Low_Confidence_Limit,High_Confidence_Limit,Sample_Size,Total,Age(years),Education,Gender,Income,Race/Ethnicity,GeoLocation,ClassID,TopicID,QuestionID,DataValueTypeID,LocationID,StratificationCategory1,Stratification1,StratificationCategoryId1,StratificationID1
0,2012,2012,WY,Wyoming,Behavioral Risk Factor Surveillance System,Obesity / Weight Status,Obesity / Weight Status,Percent of adults aged 18 years and older who ...,,Value,48.5,48.5,,,32.3,64.9,69,,,,,,American Indian/Alaska Native,"(43.235541343, -108.109830353)",OWS,OWS1,Q037,VALUE,56,Race/Ethnicity,American Indian/Alaska Native,RACE,RACENAA
1,2012,2012,DC,District of Columbia,Behavioral Risk Factor Surveillance System,Obesity / Weight Status,Obesity / Weight Status,Percent of adults aged 18 years and older who ...,,Value,31.6,31.6,,,24.0,40.4,243,,,Less than high school,,,,"(38.890371385, -77.031961127)",OWS,OWS1,Q036,VALUE,11,Education,Less than high school,EDU,EDUHS
2,2011,2011,AL,Alabama,Behavioral Risk Factor Surveillance System,Obesity / Weight Status,Obesity / Weight Status,Percent of adults aged 18 years and older who ...,,Value,35.2,35.2,,,30.7,40.0,598,,25 - 34,,,,,"(32.840571122, -86.631860762)",OWS,OWS1,Q036,VALUE,1,Age (years),25 - 34,AGEYR,AGEYR2534
3,2013,2013,US,National,Behavioral Risk Factor Surveillance System,Physical Activity,Physical Activity - Behavior,Percent of adults who engage in no leisure-tim...,,Value,27.9,27.9,,,27.6,28.3,266452,,,,Female,,,,PA,PA1,Q047,VALUE,59,Gender,Female,GEN,FEMALE
4,2011,2011,US,National,Behavioral Risk Factor Surveillance System,Physical Activity,Physical Activity - Behavior,Percent of adults who engage in no leisure-tim...,,Value,16.9,16.9,,,16.0,17.8,20923,,18 - 24,,,,,,PA,PA1,Q047,VALUE,59,Age (years),18 - 24,AGEYR,AGEYR1824


In [5]:
# Remove unneeded fields
trim_data = raw_data[["YearStart", "LocationAbbr", "LocationDesc", "Question", "Data_Value", "Age(years)", "Education",
                     "Race/Ethnicity", "Gender", "Income", "Total", "StratificationCategory1", "StratificationCategoryId1", "StratificationID1"]]
trim_data.head()

Unnamed: 0,YearStart,LocationAbbr,LocationDesc,Question,Data_Value,Age(years),Education,Race/Ethnicity,Gender,Income,Total,StratificationCategory1,StratificationCategoryId1,StratificationID1
0,2012,WY,Wyoming,Percent of adults aged 18 years and older who ...,48.5,,,American Indian/Alaska Native,,,,Race/Ethnicity,RACE,RACENAA
1,2012,DC,District of Columbia,Percent of adults aged 18 years and older who ...,31.6,,Less than high school,,,,,Education,EDU,EDUHS
2,2011,AL,Alabama,Percent of adults aged 18 years and older who ...,35.2,25 - 34,,,,,,Age (years),AGEYR,AGEYR2534
3,2013,US,National,Percent of adults who engage in no leisure-tim...,27.9,,,,Female,,,Gender,GEN,FEMALE
4,2011,US,National,Percent of adults who engage in no leisure-tim...,16.9,18 - 24,,,,,,Age (years),AGEYR,AGEYR1824


In [6]:
# Get only data for adult obesity
trim_data = trim_data.loc[(trim_data["Question"] == "Percent of adults aged 18 years and older who have obesity")]

In [7]:
# Drop more unnecessary fields
overall_data = trim_data[["YearStart", "LocationAbbr", "LocationDesc", "Data_Value", "Total"]]

In [14]:
# Make a list of years and dataframe names for the for loop
year_list = [2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018]
df_list = ["obes_2011", "obes_2012", "obes_2013", "obes_2014", "obes_2015", "obes_2016", "obes_2017", "obes_2018"]

In [15]:
# Create separate dataframes for data from each year and put them all into the df_list
for i, year in enumerate(year_list):
    # Get rid of data that doesn't come from the contiguous 48 states
    df_list[i] = overall_data.loc[(overall_data["YearStart"] == year) & (overall_data["LocationDesc"] != "National") 
                        & (overall_data["LocationDesc"] != "District of Columbia") 
                        & (overall_data["LocationDesc"] != "Puerto Rico") 
                        & (overall_data["LocationDesc"] != "Guam")
                        & (overall_data["LocationDesc"] != "Virgin Islands")
                        & (overall_data["LocationDesc"] != "Alaska")
                        & (overall_data["LocationDesc"] != "Hawaii")]
    df_list[i] = df_list[i].dropna()

In [8]:
# Create a name for each choropleth map to be created
map_name_list = ["map_2011", "map_2012", "map_2013", "map_2014", "map_2015", "map_2016", "map_2017", "map_2018"]
maps_list = []

In [9]:
# Create bins for obesity % that will remain consistent across the different maps
bin_list = [20, 23, 26, 29, 32, 35, 38, 41]

# import JSON resource that plots polygons for each state on the map
state_geo = ("../Resources/us-states.json")

# create a map for each dataframe, center the map, and append it to a list
for maps in map_name_list:
    maps = folium.Map(location=[39, -95], zoom_start=5)
    maps_list.append(maps)

# Configure the chorpleth maps for each year
for i, df in enumerate(df_list):
    map_name_list[i]= maps_list[i].choropleth(
                        geo_data=state_geo,
                        bins = bin_list,
                        data=df_list[i],
                        columns=['LocationAbbr', 'Data_Value'],
                        key_on='feature.id',
                        fill_color='YlGn',
                        fill_opacity=0.7,
                        line_opacity=0.2,
                        legend_name=(f'{year_list[i]} Obesity Rate (%)')
                        )
    # Add the configurations to the base maps
    folium.LayerControl().add_to(maps_list[i])



In [10]:
# Create a counter to be used in the for loop which saves and names the maps
map_count = 2011

# Loop through the maps_list and save each map
for each in maps_list:
    each.save(f'{map_count}.html')
    map_count = map_count + 1