In [1]:
# Dependencies
import pandas as pd

## State Information - Data Extraction

### State names and State IDs

In [2]:
# Get State names and id
url = 'https://cdn.anychart.com/samples/maps-in-dashboard/states-of-united-states-dashboard-with-multi-select/data.json'
us_state_data = pd.read_json(path_or_buf=url)
us_state_data.head()

Unnamed: 0,name,id,flag,capital,largest_city,statehood,population,area,land_area,water_area,house_seats,labels
0,Alabama,US.AL,http://static.anychart.com/images/maps_samples...,Montgomery,Birmingham,"December 14, 1819",4833722,135767,131170,4597,7,
1,Alaska,US.AK,http://static.anychart.com/images/maps_samples...,Juneau,Anchorage,"January 3, 1959",735132,1723337,1477950,245383,1,
2,Arizona,US.AZ,http://static.anychart.com/images/maps_samples...,Phoenix,Phoenix,"February 14, 1912",6626624,295233,294207,1026,9,
3,Arkansas,US.AR,http://static.anychart.com/images/maps_samples...,Little Rock,Little Rock,"June 15, 1836",2959373,137733,134770,2960,4,
4,California,US.CA,http://static.anychart.com/images/maps_samples...,Sacramento,Los Angeles,"September 9, 1850",38332521,423968,403466,20502,53,


In [3]:
us_state_ids = us_state_data[["name", "id"]].copy()

us_state_ids = us_state_ids.rename(columns={'name': 'state'})

print(len(us_state_ids))
us_state_ids.head()

50


Unnamed: 0,state,id
0,Alabama,US.AL
1,Alaska,US.AK
2,Arizona,US.AZ
3,Arkansas,US.AR
4,California,US.CA


### All Years and States

In [4]:
# Create a list that contains all years and states of interest  
states = list(us_state_ids['state'])
years = ['2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019']

state_year = [(year, state) for state in states for year in years]

In [5]:
print(len(state_year))
print(state_year)

550
[('2009', 'Alabama'), ('2010', 'Alabama'), ('2011', 'Alabama'), ('2012', 'Alabama'), ('2013', 'Alabama'), ('2014', 'Alabama'), ('2015', 'Alabama'), ('2016', 'Alabama'), ('2017', 'Alabama'), ('2018', 'Alabama'), ('2019', 'Alabama'), ('2009', 'Alaska'), ('2010', 'Alaska'), ('2011', 'Alaska'), ('2012', 'Alaska'), ('2013', 'Alaska'), ('2014', 'Alaska'), ('2015', 'Alaska'), ('2016', 'Alaska'), ('2017', 'Alaska'), ('2018', 'Alaska'), ('2019', 'Alaska'), ('2009', 'Arizona'), ('2010', 'Arizona'), ('2011', 'Arizona'), ('2012', 'Arizona'), ('2013', 'Arizona'), ('2014', 'Arizona'), ('2015', 'Arizona'), ('2016', 'Arizona'), ('2017', 'Arizona'), ('2018', 'Arizona'), ('2019', 'Arizona'), ('2009', 'Arkansas'), ('2010', 'Arkansas'), ('2011', 'Arkansas'), ('2012', 'Arkansas'), ('2013', 'Arkansas'), ('2014', 'Arkansas'), ('2015', 'Arkansas'), ('2016', 'Arkansas'), ('2017', 'Arkansas'), ('2018', 'Arkansas'), ('2019', 'Arkansas'), ('2009', 'California'), ('2010', 'California'), ('2011', 'California'),

In [6]:
# Create Dataframe to store data
state_year_df = pd.DataFrame(columns=['year', 'state'])

# Store data in dataframe
for year, state in state_year:
    state_year_df = state_year_df.append({'year': year, 'state': state}, ignore_index=True)

print(len(state_year_df))
state_year_df.head()

550


Unnamed: 0,year,state
0,2009,Alabama
1,2010,Alabama
2,2011,Alabama
3,2012,Alabama
4,2013,Alabama


### State Location

In [7]:
# Get location (lat, lng) info
location_url = 'https://developers.google.com/public-data/docs/canonical/states_csv'
state_location = pd.read_html(location_url)
state_location_df = state_location[0]

state_location_df = state_location_df.rename(columns={'state': 'state_code', 'name': 'state'})

print(len(state_location_df))
state_location_df.head()

52


Unnamed: 0,state_code,latitude,longitude,state
0,AK,63.588753,-154.493062,Alaska
1,AL,32.318231,-86.902298,Alabama
2,AR,35.20105,-91.831833,Arkansas
3,AZ,34.048928,-111.093731,Arizona
4,CA,36.778261,-119.417932,California


## State Information - Data Transfromation

In [8]:
# Merge state_id and state_location
state_loc_code = us_state_ids.merge(state_location_df, how='left', on=['state'])

print(len(state_loc_code))
state_loc_code.head()

50


Unnamed: 0,state,id,state_code,latitude,longitude
0,Alabama,US.AL,AL,32.318231,-86.902298
1,Alaska,US.AK,AK,63.588753,-154.493062
2,Arizona,US.AZ,AZ,34.048928,-111.093731
3,Arkansas,US.AR,AR,35.20105,-91.831833
4,California,US.CA,CA,36.778261,-119.417932


In [9]:
# Merge location-info dataframe to state-year dataframe
state_info_df = state_year_df.merge(state_loc_code, how='left', on=['state'])

print(len(state_info_df))
state_info_df.head()

550


Unnamed: 0,year,state,id,state_code,latitude,longitude
0,2009,Alabama,US.AL,AL,32.318231,-86.902298
1,2010,Alabama,US.AL,AL,32.318231,-86.902298
2,2011,Alabama,US.AL,AL,32.318231,-86.902298
3,2012,Alabama,US.AL,AL,32.318231,-86.902298
4,2013,Alabama,US.AL,AL,32.318231,-86.902298


In [10]:
# Store CSV file
filepath = '../../output_file/state_non_race_info.csv'
state_info_df.to_csv(filepath, index=False)