# Libs

In [15]:
import geopandas as gpd
import requests
import json
import pandas as pd
import os 
from tqdm import tqdm
from fredapi import Fred


## Set Pathing

In [8]:
# get root dir which ends in repo_name
repo_name = 'Fed_IT_Employment'
root = os.getcwd()
while os.path.basename(root) != repo_name:
    root = os.path.dirname(root)

# Get raw data directory
rdir = os.path.join( root, 'data', 'raw_data')
pdir = os.path.join( root, 'data', 'processed_data')

print(f"Base directory: {root}\nRaw data directory: {rdir}")

Base directory: /Users/coltonlapp/Dropbox/My Mac (Coltons-MacBook-Pro.local)/Desktop/Work/USDC/publicwork/Fed_IT_Employment
Raw data directory: /Users/coltonlapp/Dropbox/My Mac (Coltons-MacBook-Pro.local)/Desktop/Work/USDC/publicwork/Fed_IT_Employment/data/raw_data


## Get API Keys

In [9]:
# Function to load API keys from the JSON file
def load_api_keys(filepath=os.path.join(root, 'api_keys.json')):
    with open(filepath) as f:
        keys = json.load(f)
    return keys

# Load the API keys
api_keys = load_api_keys()

# Access the Census API key
CENSUS_API_KEY = api_keys.get('census_api_key')
if CENSUS_API_KEY is None:
    raise ValueError('The Census API key is missing. Please provide a valid key.')
else:  
    print('Census API key loaded successfully.')


Census API key loaded successfully.


## Get Shapefiles for States

In [10]:
# download shapefiles from github for state boundaries
state_shapefile_link = 'https://raw.githubusercontent.com/PublicaMundi/MappingAPI/master/data/geojson/us-states.json'


# Download the shapefile
r = requests.get(state_shapefile_link)
data = r.json()
states_shapes = gpd.GeoDataFrame.from_features(data['features'])

ids = [feature['id'] for feature in data['features']]
states_shapes['id'] = ids

states_shapes = states_shapes.drop(columns=['density'])
states_shapes = states_shapes.rename(columns={'name':'state', 'id':'fips'})

states_shapes.head()



Unnamed: 0,geometry,state,fips
0,"POLYGON ((-87.359 35.001, -85.607 34.985, -85....",Alabama,1
1,"MULTIPOLYGON (((-131.602 55.118, -131.569 55.2...",Alaska,2
2,"POLYGON ((-109.043 37.000, -109.048 31.332, -1...",Arizona,4
3,"POLYGON ((-94.474 36.502, -90.153 36.496, -90....",Arkansas,5
4,"POLYGON ((-123.233 42.006, -122.379 42.012, -1...",California,6


## Get Population Timeseries for States from FRED

In [11]:


# Your FRED API Key
FRED_API_KEY = api_keys.get('FRED_api_key')

# Initialize the FRED API client
fred = Fred(api_key=FRED_API_KEY)

# List of state abbreviations and FIPS codes
states = {
    'AL': '01', 'AK': '02', 'AZ': '04', 'AR': '05', 'CA': '06', 'CO': '08', 'CT': '09',
    'DE': '10', 'FL': '12', 'GA': '13', 'HI': '15', 'ID': '16', 'IL': '17', 'IN': '18',
    'IA': '19', 'KS': '20', 'KY': '21', 'LA': '22', 'ME': '23', 'MD': '24', 'MA': '25',
    'MI': '26', 'MN': '27', 'MS': '28', 'MO': '29', 'MT': '30', 'NE': '31', 'NV': '32',
    'NH': '33', 'NJ': '34', 'NM': '35', 'NY': '36', 'NC': '37', 'ND': '38', 'OH': '39',
    'OK': '40', 'OR': '41', 'PA': '42', 'RI': '44', 'SC': '45', 'SD': '46', 'TN': '47',
    'TX': '48', 'UT': '49', 'VT': '50', 'VA': '51', 'WA': '53', 'WV': '54', 'WI': '55',
    'WY': '56'
}

# Initialize an empty list to store the DataFrames
dfs = []

# Fetch data for each state with a progress bar
for state_abbr, fips in tqdm(states.items(), desc="Fetching population data from FRED", unit="state"):
    # Construct the FRED series ID for population (this is an example, adjust according to actual series IDs)
    series_id = f"{state_abbr}POP"
    
    try:
        # Fetch the population time series from FRED for the state
        population_series = fred.get_series(series_id, observation_start='2000-01-01', observation_end='2023-12-31')
        
        # Convert the series to a DataFrame
        df = population_series.reset_index()
        df.columns = ['date', 'population']

        # divide by 1000 to get millions of people
        df['population'] = df['population'] 
        
        # Add state abbreviation and FIPS code columns
        df['state_abbr'] = state_abbr
        df['fips'] = fips
        
        # Append the DataFrame to the list
        dfs.append(df)
    
    except Exception as e:
        print(f"Failed to fetch data for {state_abbr} ({fips}): {e}")


# Concatenate all the state DataFrames into one large DataFrame
df_combined = pd.concat(dfs, ignore_index=True)

# Show the combined DataFrame
print(df_combined.head())

# Optionally, save the combined data to a CSV file
df_combined.to_csv('state_population_2000_2023.csv', index=False)

Fetching population data from FRED: 100%|██████████| 50/50 [00:17<00:00,  2.86state/s]

        date  population state_abbr fips
0 2000-01-01    4452.173         AL   01
1 2001-01-01    4467.634         AL   01
2 2002-01-01    4480.089         AL   01
3 2003-01-01    4503.491         AL   01
4 2004-01-01    4530.729         AL   01





## Save to processed dir

In [17]:
df_combined.to_csv(os.path.join(pdir, 'state_population_2000_2023.csv'), index=False)

# save geodataframe as shapefile
os.mkdir(os.path.join(pdir, 'states_shapefile'))
states_shapes.to_file(os.path.join(pdir, 'states_shapefile', 'states_shapefile.shp'))