## Download LAUS data from BLS 
  - state and county level labor market stats 
  - the [link](https://download.bls.gov/pub/time.series/la/la.series) to the series names
  - or to find the format of series id [here](https://www.bls.gov/help/hlpforma.htm#LA)
  - [information about the data](https://download.bls.gov/pub/time.series/la/la.txt)
  
  
  - 'Registered users may request up to 20 years per query'
  - So to avoid overload, 19 years of data for each extract

In [1]:
with open("BLS_API_KEY.txt", "r") as file:
    API_KEY = file.read()

In [2]:
## get all data series id to download remotely 

import pandas as pd

laus_series =  pd.read_csv("../OtherData/bls/laus_series.txt",
                           sep ='\t',
                           low_memory=False
                          )

In [3]:
## select data series to download 

area_type_code = 'A' ## state 
measure_code = 3 ## UE rate 
seasonal = 'S' ## seasonal adjusted 

new_column_names = {col: col.strip().replace(' ', '_') for col in laus_series.columns}
laus_series.rename(columns=new_column_names, inplace=True)

series_ids = list((laus_series[(laus_series['area_type_code']==area_type_code) &
               (laus_series['measure_code']==measure_code) & 
               (laus_series['seasonal']==seasonal)]['series_id']))

series_ids  = [series_id.strip().replace(' ', '_') for series_id in series_ids]


In [4]:
import requests
import json

def download_from_bls(series_ids = series_ids,
                      start_year = 2004,
                     end_year = 2023,
                     key = API_KEY ):
    """
    return a list of data rows  
    """
    headers = {'Content-type': 'application/json'}
    data = json.dumps({"seriesid": series_ids,
                       "startyear":str(start_year), 
                       "endyear":str(end_year),
                      "registrationkey":key})

    p = requests.post('https://api.bls.gov/publicAPI/v2/timeseries/data/', 
                      data= data, 
                      headers=headers)

    json_data = json.loads(p.text)

    table_data =  []
    for series in json_data['Results']['series']:
        seriesId = series['seriesID']
        for item in series['data']:
            year = item['year']
            period = item['period']
            value = item['value']
            footnotes=""
            for footnote in item['footnotes']:
                if footnote:
                    footnotes = footnotes + footnote['text'] + ','
            if 'M01' <= period <= 'M12':
                table_data.append([seriesId,
                           year,
                           period,
                           value,
                           footnotes[0:-1]])
    return table_data


In [5]:
DOWNLOAD = False
if DOWNLOAD:
    sample_list = [(2004,2023),
                   (1984,2003),
                   (1976,1983)]

    table_data = []
    for i,sample_year in enumerate(sample_list):
        this_extact = download_from_bls(series_ids = series_ids,
                                      start_year = sample_year[0],
                                     end_year = sample_year[1],
                                     key = API_KEY )
        print('length of this extact is '+str(len(this_extact)))
        table_data += this_extact
    print('Total length of data: '+str(len(table_data)))
    
    # Create a Pandas DataFrame from the extracted data
    names = ["series_id","year","period","value","footnotes"]
    df_all = pd.DataFrame(table_data, columns=names)
    df_all.to_pickle('../OtherData/bls/laus_raw.pkl')
else:
    df_all = pd.read_pickle('../OtherData/bls/laus_raw.pkl')

In [6]:
# Extract data from PrettyTable

df_all['state'] = df_all['series_id'].str[5:7].astype(int)

In [7]:
## how many states have data?

len(df_all['state'].unique())

50

In [8]:
df_all

Unnamed: 0,series_id,year,period,value,footnotes,state
0,LASST010000000000003,2023,M07,2.1,Preliminary.,1
1,LASST010000000000003,2023,M06,2.2,,1
2,LASST010000000000003,2023,M05,2.2,,1
3,LASST010000000000003,2023,M04,2.2,,1
4,LASST010000000000003,2023,M03,2.3,,1
...,...,...,...,...,...,...
28545,LASST550000000000003,1976,M05,5.5,,55
28546,LASST550000000000003,1976,M04,5.6,,55
28547,LASST550000000000003,1976,M03,5.7,,55
28548,LASST550000000000003,1976,M02,5.8,,55


In [9]:
## some data cleaning  

df_all['date'] = pd.to_datetime(df_all['year'].astype(str) + df_all['period'], format='%YM%m')
df_all = df_all.drop(columns=['series_id','year','period','footnotes'])
df_all = df_all.rename(columns={'state':'statecode'})
df_all = df_all.rename(columns ={'value':'ue_rate'})
df_all['ue_rate'] = df_all['ue_rate'].astype(float) 

In [14]:
fips_to_state = {
    1: 'Alabama',
    2: 'Alaska',
    4: 'Arizona',
    5: 'Arkansas',
    6: 'California',
    8: 'Colorado',
    9: 'Connecticut',
    10: 'Delaware',
    11: 'District of Columbia',
    12: 'Florida',
    13: 'Georgia',
    15: 'Hawaii',
    16: 'Idaho',
    17: 'Illinois',
    18: 'Indiana',
    19: 'Iowa',
    20: 'Kansas',
    21: 'Kentucky',
    22: 'Louisiana',
    23: 'Maine',
    24: 'Maryland',
    25: 'Massachusetts',
    26: 'Michigan',
    27: 'Minnesota',
    28: 'Mississippi',
    29: 'Missouri',
    30: 'Montana',
    31: 'Nebraska',
    32: 'Nevada',
    33: 'New Hampshire',
    34: 'New Jersey',
    35: 'New Mexico',
    36: 'New York',
    37: 'North Carolina',
    38: 'North Dakota',
    39: 'Ohio',
    40: 'Oklahoma',
    41: 'Oregon',
    42: 'Pennsylvania',
    44: 'Rhode Island',
    45: 'South Carolina',
    46: 'South Dakota',
    47: 'Tennessee',
    48: 'Texas',
    49: 'Utah',
    50: 'Vermont',
    51: 'Virginia',
    53: 'Washington',
    54: 'West Virginia',
    55: 'Wisconsin',
    56: 'Wyoming'
}


In [15]:
df_all['state_name'] = df_all['statecode'].map(fips_to_state)

In [17]:
df_all

Unnamed: 0,ue_rate,statecode,date,state_name
0,2.1,1,2023-07-01,Alabama
1,2.2,1,2023-06-01,Alabama
2,2.2,1,2023-05-01,Alabama
3,2.2,1,2023-04-01,Alabama
4,2.3,1,2023-03-01,Alabama
...,...,...,...,...
28545,5.5,55,1976-05-01,Wisconsin
28546,5.6,55,1976-04-01,Wisconsin
28547,5.7,55,1976-03-01,Wisconsin
28548,5.8,55,1976-02-01,Wisconsin
