## Download LAUS data from BLS 
  - state and county level labor market stats 
  - the [link](https://download.bls.gov/pub/time.series/la/la.series) to the data to be stored as laus.txt
  - [information about the data](https://download.bls.gov/pub/time.series/la/la.txt)

In [58]:
with open("BLS_API_KEY.txt", "r") as file:
    API_KEY = file.read()

In [44]:
## get all data series id to download remotely 

import pandas as pd

laus_series =  pd.read_csv("../OtherData/bls/laus_series.txt",
                           sep ='\t',
                           low_memory=False
                          )

In [45]:
## select data 

area_type_code = 'A' ## state 
measure_code = 3 ## UE rate 
seasonal = 'S' ## seasonal adjusted 

new_column_names = {col: col.strip().replace(' ', '_') for col in laus_series.columns}
laus_series.rename(columns=new_column_names, inplace=True)

series_ids = list((laus_series[(laus_series['area_type_code']==area_type_code) &
               (laus_series['measure_code']==measure_code) & 
               (laus_series['seasonal']==seasonal)]['series_id']))

series_ids  = [series_id.strip().replace(' ', '_') for series_id in series_ids]


In [46]:
import requests
import json

headers = {'Content-type': 'application/json'}
data = json.dumps({"seriesid": series_ids,
                   "startyear":"1976", 
                   "endyear":"2023",
                  "registrationkey":API_KEY})

p = requests.post('https://api.bls.gov/publicAPI/v2/timeseries/data/', 
                  data=data, 
                  headers=headers)

json_data = json.loads(p.text)

table_data =  []
for series in json_data['Results']['series']:
    seriesId = series['seriesID']
    for item in series['data']:
        year = item['year']
        period = item['period']
        value = item['value']
        footnotes=""
        for footnote in item['footnotes']:
            if footnote:
                footnotes = footnotes + footnote['text'] + ','
        if 'M01' <= period <= 'M12':
            table_data.append([seriesId,
                       year,
                       period,
                       value,
                       footnotes[0:-1]])
        

In [57]:
print('Total length of data: '+str(len(table_data)))

Total length of data: 12000


In [69]:
# Create a Pandas DataFrame from the extracted data
names = ["series_id","year","period","value","footnotes"]
df_all = pd.DataFrame(table_data, columns=names)

In [70]:
# Extract data from PrettyTable

df_all['state'] = df_all['series_id'].str[5:7].astype(int)

In [71]:
## how many states have data?

len(df_all['state'].unique())

50

In [72]:
df_all

Unnamed: 0,series_id,year,period,value,footnotes,state
0,LASST010000000000003,1995,M12,5.5,,1
1,LASST010000000000003,1995,M11,5.6,,1
2,LASST010000000000003,1995,M10,5.7,,1
3,LASST010000000000003,1995,M09,5.8,,1
4,LASST010000000000003,1995,M08,5.9,,1
...,...,...,...,...,...,...
11995,LASST550000000000003,1976,M05,5.5,,55
11996,LASST550000000000003,1976,M04,5.6,,55
11997,LASST550000000000003,1976,M03,5.7,,55
11998,LASST550000000000003,1976,M02,5.8,,55


In [73]:
df_all['date'] = pd.to_datetime(df_all['year'].astype(str) + df_all['period'], format='%YM%m')

In [74]:
df_all = df_all.drop(columns=['series_id','year','period'])

In [75]:
df_all

Unnamed: 0,value,footnotes,state,date
0,5.5,,1,1995-12-01
1,5.6,,1,1995-11-01
2,5.7,,1,1995-10-01
3,5.8,,1,1995-09-01
4,5.9,,1,1995-08-01
...,...,...,...,...
11995,5.5,,55,1976-05-01
11996,5.6,,55,1976-04-01
11997,5.7,,55,1976-03-01
11998,5.8,,55,1976-02-01
