In [None]:
import requests
import pandas as pd
# interactive pandas tables
%load_ext google.colab.data_table

The google.colab.data_table extension is already loaded. To reload it, use:
  %reload_ext google.colab.data_table


# Census API for Econ Data

In [None]:
state_codes = [
    ("Alabama", "01"),
    ("Alaska", "02"),
    ("Arizona", "04"),
    ("Arkansas", "05"),
    ("California", "06"),
    ("Colorado", "08"),
    ("Connecticut", "09"),
    ("Delaware", "10"),
    ("Florida", "12"),
    ("Georgia", "13"),
    ("Hawaii", "15"),
    ("Idaho", "16"),
    ("Illinois", "17"),
    ("Indiana", "18"),
    ("Iowa", "19"),
    ("Kansas", "20"),
    ("Kentucky", "21"),
    ("Louisiana", "22"),
    ("Maine", "23"),
    ("Maryland", "24"),
    ("Massachusetts", "25"),
    ("Michigan", "26"),
    ("Minnesota", "27"),
    ("Mississippi", "28"),
    ("Missouri", "29"),
    ("Montana", "30"),
    ("Nebraska", "31"),
    ("Nevada", "32"),
    ("New Hampshire", "33"),
    ("New Jersey", "34"),
    ("New Mexico", "35"),
    ("New York", "36"),
    ("North Carolina", "37"),
    ("North Dakota", "38"),
    ("Ohio", "39"),
    ("Oklahoma", "40"),
    ("Oregon", "41"),
    ("Pennsylvania", "42"),
    ("Rhode Island", "44"),
    ("South Carolina", "45"),
    ("South Dakota", "46"),
    ("Tennessee", "47"),
    ("Texas", "48"),
    ("Utah", "49"),
    ("Vermont", "50"),
    ("Virginia", "51"),
    ("Washington", "53"),
    ("West Virginia", "54"),
    ("Wisconsin", "55"),
    ("Wyoming", "56")
]


In [None]:
state_abbr_map = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY'
}

# unemployment

In [None]:
def get_unemployment_data(api_key, state_code, year, state_name):
    base_url = "https://api.bls.gov/publicAPI/v2/timeseries/data/"
    series_id = "LAUST"  # Series ID for unemployment rate
    query = f"{series_id}{state_code}0000000000003"  # Construct the complete series ID
    headers = {"Content-Type": "application/json"}

    # Request parameters
    data = {
        "registrationKey": api_key,
        "seriesid": [query],
        "startyear": year,
        "endyear": year
    }

    response = requests.post(base_url, headers=headers, json=data)

    if response.status_code == 200:

        #print(response)
        result = response.json()
        #print(result)


        df = pd.DataFrame(result['Results']['series'])
        #print(df)
        df2 = pd.DataFrame(df['data'][0])
        #df2['year'] = year
        df2['state_name'] = state_name

        #df = df.append({"State": state, "Code": code}, ignore_index=True
        return df2


    else:
        #print("Error:", response.status_code)

        return None

# Example usage:
# Replace YOUR_API_KEY with your actual API key
monthly_unemployment = pd.DataFrame(columns=['index','year','period','periodName','value','footnotes','state_name'])

years = list(range(2013,2023))[::1]
for year in years:
  print(year)

  for pair in state_codes:
    state_name, state_code = pair
    #state_name = "MN"
    api_key = "5a660d640ebc46f8a8485e9119cd8e6b"
    #state_code = "27"  # Minnesota's state code
    #year = 2006
    unemployment_df = get_unemployment_data(api_key, state_code, year, state_name)
    monthly_unemployment = pd.concat([monthly_unemployment, unemployment_df], ignore_index=True)
    # monthly_unemployment.append(unemployment_df, ignore_index=True)
monthly_unemployment

2013
2014
2015
2016
2017
2018
2019
2020
2021
2022


KeyError: 'series'

In [None]:
monthly_unemployment_0411 = pd.read_excel("monthly_unemployment_0411.xlsx")

In [None]:
monthly_unemployment_0411['value'] = pd.to_numeric(monthly_unemployment_0411['value'], errors='coerce')
average_employment_df = monthly_unemployment_0411.groupby(['year', 'state_name'])['value'].mean().reset_index()
average_employment_df['value'] = average_employment_df['value'].astype(int).round(3)
average_employment_df['year'] = average_employment_df['year'].astype(int)

average_employment_df['state_name'] = average_employment_df['state_name'].map(state_abbr_map)
#demographics_df = demographics_df.rename(columns={'state_name':'state'})
# Display the new dataframe
print(average_employment_df)

     year state_name  value
0    2013         AL      7
1    2013         AK      7
2    2013         AZ      7
3    2013         AR      7
4    2013         CA      9
..    ...        ...    ...
471  2022         MI      4
472  2022         MN      2
473  2022         MS      3
474  2022         MO      2
475  2022         MT      2

[476 rows x 3 columns]


# inflation

In [None]:
def get_inflation_data(api_key, state_name):
    base_url = "https://api.bls.gov/publicAPI/v2/timeseries/data/"
    series_id = "CUUR0000SA0"  # Series ID for unemployment rate
    query = f"{series_id}"  # Construct the complete series ID
    headers = {"Content-Type": "application/json"}

    # Request parameters
    data = {
        "registrationKey": api_key,
        "seriesid": [query],
        "startyear": 2004,
        "endyear": 2022
    }

    response = requests.post(base_url, headers=headers, json=data)

    if response.status_code == 200:

        result = response.json()

        df = pd.DataFrame(result['Results']['series'])

        df2 = pd.DataFrame(df['data'][0])

        return df2


    else:


        return None

In [None]:
inf = pd.read_excel("inf.xlsx")

In [None]:
inf['value'] = pd.to_numeric(inf['value'], errors='coerce')
average_inflation_df = inf.groupby(['year'])['value'].mean().reset_index()
average_inflation_df['value'] = average_inflation_df['value'].astype(int)
average_inflation_df['year'] = average_inflation_df['year'].astype(int)
# Display the new dataframe
print(average_inflation_df)

    year  value
0   2004    188
1   2005    195
2   2006    201
3   2007    207
4   2008    215
5   2009    214
6   2010    218
7   2011    224
8   2012    229
9   2013    232
10  2014    236
11  2015    237
12  2016    240
13  2017    245
14  2018    251
15  2019    255
16  2020    258
17  2021    270
18  2022    292


In [None]:

api_key = "fcc8db463ea070b8f12935d522a4caadf6bc7246"
inflation_df_national = get_inflation_data(api_key, state_name)
inflation_df_national['value'] = pd.to_numeric(inflation_df_national['value'], errors='coerce')
average_inflation_df = inflation_df_national.groupby(['year'])['value'].mean().reset_index()
average_inflation_df['value'] = average_inflation_df['value'].astype(int)
average_inflation_df['year'] = average_inflation_df['year'].astype(int)
# Display the new dataframe
print(average_inflation_df)

KeyError: 'series'

# combine employment and inflation

In [None]:
emp_inf_df = pd.merge(average_employment_df, average_inflation_df, on='year', how='left')

# Rename the 'value' column to 'inflation'
emp_inf_df.rename(columns={'value': 'inflation'}, inplace=True)
emp_inf_df = emp_inf_df.rename(columns={'value_x': 'employment', 'value_y': 'inflation'})
# Display the merged dataframe
print(emp_inf_df)

     year state_name  employment  inflation
0    2013         AL           7        232
1    2013         AK           7        232
2    2013         AZ           7        232
3    2013         AR           7        232
4    2013         CA           9        232
..    ...        ...         ...        ...
471  2022         MI           4        292
472  2022         MN           2        292
473  2022         MS           3        292
474  2022         MO           2        292
475  2022         MT           2        292

[476 rows x 4 columns]


# State Demographics

In [None]:
state_demographics = pd.DataFrame(columns=['year', 'state_name', "total_pop", "median_age", "white_pop", "black_pop", "male", "female","high_school", "bach", "born_in_state"])

In [None]:
import requests
import pandas as pd

def get_religious_demographics(api_key, state_code, year, state_name):

    test_list = []

    base_url = "https://api.census.gov/data/"
    dataset = "acs/acs1"
    # Specify the variables for religious affiliation
    #variables = "B03002_001E,B03002_002E,B03002_003E,B03002_004E,B03002_005E,B03002_006E,B03002_007E"

    #varfinal = "B01003_001E", 'B01002_001E','B03002_003E', 'B03002_004E','B05003_002E','B05003_013E', 'B06009_003E', 'B06009_005E', 'B06009_007E'
    #varfinal2 = total_pop, median_age, white_pop, black_pop,male, female, high school, bach, born in state
    varfinal = ["B01003_001E", 'B01002_001E','B03002_003E', 'B03002_004E','B05003_002E','B05003_013E', 'B06009_002E', 'B06009_005E', 'B06009_007E']
    varfinal2 = ["total_pop", "median_age", "white_pop", "black_pop", "male", "female", "less_than_high_school", "bach", "born_in_state"]

    # Combine them into a tuple list
    combined_vars = list(zip(varfinal, varfinal2))

    #var2 = 'B06009_007E'#, 'B01002_001E'

    for pair in combined_vars:
      var, name = pair
      url = f"{base_url}{year}/{dataset}?get={var}&for=state:{state_code}&key={api_key}"


      response = requests.get(url)

      if response.status_code == 200:

          result = response.json()
          #print(result)
          value = float(result[1][0])
          value = int(value)
          test_list.append(value)
      else:
        print('welp')


    data = {
        "year": [year],
        "state_name": [state_name],
        "total_pop":[test_list[0]],
        "median_age":[test_list[1]],
        "white_pop":[test_list[2]],
        "black_pop":[test_list[3]],
        "male":[test_list[4]],
        "female":[test_list[5]],
        "high_school":[test_list[6]],
        "bach":[test_list[7]],
        "born_in_state":[test_list[8]]
    }


    # Create a DataFrame
    df = pd.DataFrame(data)
    return df


In [None]:
years = list(range(2014,2021))

for year in years:

  print(year)

  for pair in state_codes:
    state_name, state_code = pair
    print(state_name, year)
    api_key = 'fcc8db463ea070b8f12935d522a4caadf6bc7246'
    population_df4 = get_religious_demographics(api_key, state_code, year, state_name)
    state_demographics = pd.concat([state_demographics, population_df4], ignore_index=True)

2014
Alabama 2014
Alaska 2014
Arizona 2014
Arkansas 2014
California 2014
Colorado 2014
Connecticut 2014
Delaware 2014
Florida 2014
Georgia 2014
Hawaii 2014
Idaho 2014
Illinois 2014
Indiana 2014
Iowa 2014
Kansas 2014
Kentucky 2014
Louisiana 2014
Maine 2014
Maryland 2014
Massachusetts 2014
Michigan 2014
Minnesota 2014
Mississippi 2014
Missouri 2014
Montana 2014
Nebraska 2014
Nevada 2014
New Hampshire 2014
New Jersey 2014
New Mexico 2014
New York 2014
North Carolina 2014
North Dakota 2014
Ohio 2014
Oklahoma 2014
Oregon 2014
Pennsylvania 2014
Rhode Island 2014
South Carolina 2014
South Dakota 2014
Tennessee 2014
Texas 2014
Utah 2014
Vermont 2014
Virginia 2014
Washington 2014
West Virginia 2014
Wisconsin 2014
Wyoming 2014
2015
Alabama 2015
Alaska 2015
Arizona 2015
Arkansas 2015
California 2015
Colorado 2015
Connecticut 2015
Delaware 2015
Florida 2015
Georgia 2015
Hawaii 2015
Idaho 2015
Illinois 2015
Indiana 2015
Iowa 2015
Kansas 2015
Kentucky 2015
Louisiana 2015
Maine 2015
Maryland 2015
Mas

IndexError: list index out of range

In [None]:
state_demographics['state_name'] = state_demographics['state_name'].map(state_abbr_map)
state_demographics = state_demographics.rename(columns={'state_name':'state'})
state_demographics

Unnamed: 0,year,state_name,total_pop,median_age,white_pop,black_pop,male,female,high_school,bach,born_in_state
0,2014,Alabama,4849377,38,3205535,1285731,2347969,2501408,497759,478058,2143805
1,2014,Alaska,736732,33,455009,23608,385582,351150,33253,85634,123013
2,2014,Arizona,6731484,36,3777085,265671,3342946,3388538,618786,765784,1011729
3,2014,Arkansas,2966369,37,2176186,465816,1456778,1509591,288737,272411,1080675
4,2014,California,38802500,36,14849129,2155733,19264457,19538043,4593525,5120162,10235915
...,...,...,...,...,...,...,...,...,...,...,...
295,2019,Virginia,8535519,38,5212705,1625942,4201799,4333720,584712,1312800,2372752
296,2019,Washington,7614893,37,5126694,295239,3807572,3807321,440074,1204728,1884400
297,2019,West Virginia,1792147,42,1649468,64996,885861,906286,164805,161686,856957
298,2019,Wisconsin,5822434,39,4704609,366735,2892804,2929630,290862,829878,2672444


In [None]:
state_demographics.to_excel("state_demographics_alpha.xlsx", index=False)

# add inflation and employment to the state_demographics_df

In [None]:
pwd

'/content/github/Capstone'

In [None]:
state_demographics_old_df = pd.read_excel("state_demographics_old.xlsx")

In [None]:
state_demographics_old_df['state_name'] = state_demographics_old_df['state_name'].map(state_abbr_map)

In [None]:
merged_df = pd.merge(state_demographics_old_df, emp_inf_df, on=['state_name', 'year'], how='left')

In [None]:
merged_df.to_excel("state_demographics_0411_THREE.xlsx", index=False)