In [None]:
import requests
import pandas as pd
# interactive pandas tables
%load_ext google.colab.data_table

# Census API for Econ Data

In [None]:
state_codes = [
    ("Alabama", "01"),
    ("Alaska", "02"),
    ("Arizona", "04"),
    ("Arkansas", "05"),
    ("California", "06"),
    ("Colorado", "08"),
    ("Connecticut", "09"),
    ("Delaware", "10"),
    ("Florida", "12"),
    ("Georgia", "13"),
    ("Hawaii", "15"),
    ("Idaho", "16"),
    ("Illinois", "17"),
    ("Indiana", "18"),
    ("Iowa", "19"),
    ("Kansas", "20"),
    ("Kentucky", "21"),
    ("Louisiana", "22"),
    ("Maine", "23"),
    ("Maryland", "24"),
    ("Massachusetts", "25"),
    ("Michigan", "26"),
    ("Minnesota", "27"),
    ("Mississippi", "28"),
    ("Missouri", "29"),
    ("Montana", "30"),
    ("Nebraska", "31"),
    ("Nevada", "32"),
    ("New Hampshire", "33"),
    ("New Jersey", "34"),
    ("New Mexico", "35"),
    ("New York", "36"),
    ("North Carolina", "37"),
    ("North Dakota", "38"),
    ("Ohio", "39"),
    ("Oklahoma", "40"),
    ("Oregon", "41"),
    ("Pennsylvania", "42"),
    ("Rhode Island", "44"),
    ("South Carolina", "45"),
    ("South Dakota", "46"),
    ("Tennessee", "47"),
    ("Texas", "48"),
    ("Utah", "49"),
    ("Vermont", "50"),
    ("Virginia", "51"),
    ("Washington", "53"),
    ("West Virginia", "54"),
    ("Wisconsin", "55"),
    ("Wyoming", "56")
]


In [None]:
state_abbr_map = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY'
}

# unemployment

## 'series' error is returned when the api key runs out of requests. i create 4 keys to get the data i needed.

In [None]:
%%capture

def get_unemployment_data(api_key, state_code, year, state_name):
    base_url = "https://api.bls.gov/publicAPI/v2/timeseries/data/"
    series_id = "LAUST"
    query = f"{series_id}{state_code}0000000000003"
    headers = {"Content-Type": "application/json"}

    data = {
        "registrationKey": api_key,
        "seriesid": [query],
        "startyear": year,
        "endyear": year
    }

    response = requests.post(base_url, headers=headers, json=data)

    if response.status_code == 200:

        result = response.json()
        df = pd.DataFrame(result['Results']['series'])
        df2 = pd.DataFrame(df['data'][0])
        df2['state_name'] = state_name

        return df2


    else:
        return None


monthly_unemployment = pd.DataFrame(columns=['index','year','period','periodName','value','footnotes','state_name'])

years = list(range(2013,2023))[::1]
for year in years:
  print(year)

  for pair in state_codes:
    state_name, state_code = pair

    api_key = "5a660d640ebc46f8a8485e9119cd8e6b"

    unemployment_df = get_unemployment_data(api_key, state_code, year, state_name)
    monthly_unemployment = pd.concat([monthly_unemployment, unemployment_df], ignore_index=True)

monthly_unemployment

In [None]:
monthly_unemployment['value'] = pd.to_numeric(monthly_unemployment['value'], errors='coerce')
average_employment_df = monthly_unemployment.groupby(['year', 'state_name'])['value'].mean().reset_index()
average_employment_df['value'] = average_employment_df['value'].astype(int).round(3)
average_employment_df['year'] = average_employment_df['year'].astype(int)
average_employment_df['state_name'] = average_employment_df['state_name'].map(state_abbr_map)

# inflation

In [None]:
def get_inflation_data(api_key, state_name):
    base_url = "https://api.bls.gov/publicAPI/v2/timeseries/data/"
    series_id = "CUUR0000SA0"  # Series ID for unemployment rate
    query = f"{series_id}"  # Construct the complete series ID
    headers = {"Content-Type": "application/json"}

    # Request parameters
    data = {
        "registrationKey": api_key,
        "seriesid": [query],
        "startyear": 2004,
        "endyear": 2022
    }

    response = requests.post(base_url, headers=headers, json=data)

    if response.status_code == 200:

        result = response.json()

        df = pd.DataFrame(result['Results']['series'])

        df2 = pd.DataFrame(df['data'][0])

        return df2


    else:


        return None

In [None]:
api_key = "fcc8db463ea070b8f12935d522a4caadf6bc7246"
inflation_df_national = get_inflation_data(api_key, state_name)
inflation_df_national['value'] = pd.to_numeric(inflation_df_national['value'], errors='coerce')
average_inflation_df = inflation_df_national.groupby(['year'])['value'].mean().reset_index()
average_inflation_df['value'] = average_inflation_df['value'].astype(int)
average_inflation_df['year'] = average_inflation_df['year'].astype(int)

In [None]:
#inf = pd.read_excel("inf.xlsx")
inf['value'] = pd.to_numeric(inf['value'], errors='coerce')
average_inflation_df = inf.groupby(['year'])['value'].mean().reset_index()
average_inflation_df['value'] = average_inflation_df['value'].astype(int)
average_inflation_df['year'] = average_inflation_df['year'].astype(int)

# combine employment and inflation

In [None]:
emp_inf_df = pd.merge(average_employment_df, average_inflation_df, on='year', how='left')
emp_inf_df.rename(columns={'value': 'inflation'}, inplace=True)
emp_inf_df = emp_inf_df.rename(columns={'value_x': 'employment', 'value_y': 'inflation'})
emp_inf_df.head(5)

# State Demographics

In [None]:
state_demographics = pd.DataFrame(columns=['year', 'state_name', "total_pop", "median_age", "white_pop", "black_pop", "male", "female","high_school", "bach", "born_in_state"])

In [None]:
import requests
import pandas as pd

def get_state_demographics(api_key, state_code, year, state_name):

    test_list = []

    base_url = "https://api.census.gov/data/"
    dataset = "acs/acs1"
    # Specify the variables for religious affiliation
    #variables = "B03002_001E,B03002_002E,B03002_003E,B03002_004E,B03002_005E,B03002_006E,B03002_007E"

    #varfinal = "B01003_001E", 'B01002_001E','B03002_003E', 'B03002_004E','B05003_002E','B05003_013E', 'B06009_003E', 'B06009_005E', 'B06009_007E'
    #varfinal2 = total_pop, median_age, white_pop, black_pop,male, female, high school, bach, born in state
    varfinal = ["B01003_001E", 'B01002_001E','B03002_003E', 'B03002_004E','B05003_002E','B05003_013E', 'B06009_002E', 'B06009_005E', 'B06009_007E']
    varfinal2 = ["total_pop", "median_age", "white_pop", "black_pop", "male", "female", "less_than_high_school", "bach", "born_in_state"]

    # Combine them into a tuple list
    combined_vars = list(zip(varfinal, varfinal2))

    #var2 = 'B06009_007E'#, 'B01002_001E'

    for pair in combined_vars:
      var, name = pair
      url = f"{base_url}{year}/{dataset}?get={var}&for=state:{state_code}&key={api_key}"


      response = requests.get(url)

      if response.status_code == 200:

          result = response.json()
          #print(result)
          value = float(result[1][0])
          value = int(value)
          test_list.append(value)
      else:
        print('welp')


    data = {
        "year": [year],
        "state_name": [state_name],
        "total_pop":[test_list[0]],
        "median_age":[test_list[1]],
        "white_pop":[test_list[2]],
        "black_pop":[test_list[3]],
        "male":[test_list[4]],
        "female":[test_list[5]],
        "high_school":[test_list[6]],
        "bach":[test_list[7]],
        "born_in_state":[test_list[8]]
    }


    # Create a DataFrame
    df = pd.DataFrame(data)
    return df


In [None]:
years = list(range(2014,2021))

for year in years:
  for pair in state_codes:
    state_name, state_code = pair
    api_key = 'fcc8db463ea070b8f12935d522a4caadf6bc7246'
    population_df4 = get_state_demographics(api_key, state_code, year, state_name)
    state_demographics = pd.concat([state_demographics, population_df4], ignore_index=True)

In [None]:
state_demographics['state_name'] = state_demographics['state_name'].map(state_abbr_map)
state_demographics = state_demographics.rename(columns={'state_name':'state'})
state_demographics.head(15)

# add inflation and employment to the state_demographics_df

In [None]:
state_demographics['state_name'] = state_demographics['state_name'].map(state_abbr_map)

In [None]:
merged_df = pd.merge(state_demographics, emp_inf_df, on=['state_name', 'year'], how='left')

# Save to excel

In [None]:
merged_df.to_excel("state_demographics.xlsx", index=False)