In [6]:
import requests
import pandas as pd
from sqlalchemy import create_engine

In [7]:
import json

Need to Initialize variables for the token and url values

In [9]:
token = '70e5b01b221e44558b7ea4b6bf6ba0be'
url = 'https://api.bls.gov/publicAPI/v2/timeseries'

Creating the key per BLS specifications and initiliazing a dictionary to specify which Series we would like to gather

In [11]:
key = '?registrationkey={}'.format(token)

# Dictionary of series IDs from BLS website, with short description of what the series contains
series_dict = {
    'LNS14000025': 'National Men',
    'LNS14000026': 'National Women',
    'LNS14000003': 'National White',
    'LNS14000006': 'National Black/AA',
    'LNS14032183': 'National Asian',
    'LNS14000009': 'National Hispanic/Latino'
    }

The API call to gather the json data happens here

In [13]:
headers = {'Content-type': 'application/json'}

# Range of years to pull
dates = ('2019', '2023')

data = json.dumps({
    "seriesid": list(series_dict.keys()),
    "startyear": dates[0],
    "endyear": dates[1]})

response = requests.post(
    '{}{}'.format(f'{url}/data/', key),
    headers=headers,
    data=data).json()['Results']['series']


Defining functions to programmatically pull data out of json for use in a dataframe

In [15]:
# First function is to pull the year and month values from the json data
def parse_json_time(series):
  df = pd.DataFrame()

  # initialize blank lists for years and months
  year = list()
  month = list()

  # iterate through each item in the data and append needed values
  for i in series['data']:
    year.append(i['year'])
    month.append(i['periodName'])

  # use each least created in the for loop to create a column
  df['Year'] = year
  df['Month'] = month
  return df


# Second function is to pull only the data values from the json data
def parse_json_value(series):

  # Initialze blank list for the values
  values = list()

  # Iterate through each item and add just what is in the value key to the list
  for i in series['data']:
    values.append(i['value'])

  # return the list of values to be added to the dataframe in next step
  return values

In [16]:
# Initialized datafram with just the months and years gathered
df = parse_json_time(response[0])
df.head()

Unnamed: 0,Year,Month
0,2023,December
1,2023,November
2,2023,October
3,2023,September
4,2023,August


In [17]:
# Iterate through every series in the json data with the parse_json_value function
# This will create a list that can be added to the dataframe as a column
# The column name for each list is the value associated with the seriesID key pulled from the jason data

for i in response:
  df[series_dict[i['seriesID']]] = parse_json_value(i)

Final unemployment dataframe includes a columns for year and month, and for each series values

In [19]:
df.head()

Unnamed: 0,Year,Month,National Men,National Women,National White,National Black/AA,National Asian,National Hispanic/Latino
0,2023,December,3.5,3.3,3.5,5.2,3.1,5.0
1,2023,November,3.7,3.1,3.3,5.8,3.5,4.6
2,2023,October,3.7,3.2,3.5,5.8,3.1,4.8
3,2023,September,3.8,3.1,3.4,5.7,2.9,4.6
4,2023,August,3.7,3.2,3.4,5.3,3.2,4.9


In [20]:
df['Date'] = pd.to_datetime(df['Year'] + ' ' + df['Month'])

  df['Date'] = pd.to_datetime(df['Year'] + ' ' + df['Month'])


In [21]:
df = df.drop(['Year', 'Month'], axis=1)

In [22]:
numeric_columns = ['National Men',	'National Women',	'National White',	'National Black/AA',	'National Asian',	'National Hispanic/Latino']

In [23]:
for i in numeric_columns:
  df[i] = pd.to_numeric(df[i])

In [24]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 60 entries, 0 to 59
Data columns (total 7 columns):
 #   Column                    Non-Null Count  Dtype         
---  ------                    --------------  -----         
 0   National Men              60 non-null     float64       
 1   National Women            60 non-null     float64       
 2   National White            60 non-null     float64       
 3   National Black/AA         60 non-null     float64       
 4   National Asian            60 non-null     float64       
 5   National Hispanic/Latino  60 non-null     float64       
 6   Date                      60 non-null     datetime64[ns]
dtypes: datetime64[ns](1), float64(6)
memory usage: 3.4 KB


In [25]:
df.head()

Unnamed: 0,National Men,National Women,National White,National Black/AA,National Asian,National Hispanic/Latino,Date
0,3.5,3.3,3.5,5.2,3.1,5.0,2023-12-01
1,3.7,3.1,3.3,5.8,3.5,4.6,2023-11-01
2,3.7,3.2,3.5,5.8,3.1,4.8,2023-10-01
3,3.8,3.1,3.4,5.7,2.9,4.6,2023-09-01
4,3.7,3.2,3.4,5.3,3.2,4.9,2023-08-01


In [26]:
df.to_csv('demos_df.csv')

In [27]:
DATABASE_URL = 'postgresql://postgres.gouknruvfnjedjxvfpim:fufca5-jUppob-xoncek@aws-0-us-east-2.pooler.supabase.com:6543/postgres'
engine = create_engine(DATABASE_URL)
engine

Engine(postgresql://postgres.gouknruvfnjedjxvfpim:***@aws-0-us-east-2.pooler.supabase.com:6543/postgres)

In [28]:
with engine.connect() as conn:
    df.to_sql("unemployment_demographics", conn, index=False, if_exists='replace')

In [29]:
pd.read_sql('SELECT * FROM unemployment_demographics', engine).head()

Unnamed: 0,National Men,National Women,National White,National Black/AA,National Asian,National Hispanic/Latino,Date
0,3.5,3.3,3.5,5.2,3.1,5.0,2023-12-01
1,3.7,3.1,3.3,5.8,3.5,4.6,2023-11-01
2,3.7,3.2,3.5,5.8,3.1,4.8,2023-10-01
3,3.8,3.1,3.4,5.7,2.9,4.6,2023-09-01
4,3.7,3.2,3.4,5.3,3.2,4.9,2023-08-01
