In [1]:
import pandas as pd

# Import US la.data.64.County and la.area

The Local Area Unemployment Statistics (LAUS) program produces monthly estimates for Census regions and divisions, states (plus DC and Puerto Rico), metropolitan and micropolitan statistical areas, all counties, and cities with a population of 25,000 or more. You can access these estimates using the database tools at https://www.bls.gov/lau/data.htm. 

See https://download.bls.gov/pub/time.series/la/

To connect the county names, you can connect the series_id field in the county file with the same field in the “la.series” file, then use the area_code field in that file to connect to the “la.area” file, which contains the area names in the area_text field. Alternatively, since the area code is embedded within the series_id, you can remove the “LAU” from the front of the series id and the last two digits (03, 04, 05, or 06) from the end (giving you, for example, CN0100100000000). You can then directly connect that resulting area code to the area_code field in the “la.area” file. 

Month 13 (M13) is the annual average. M01 through M12 are January-December, as you would expect.

The last two digits are the measure codes:

03 – unemployment rate

04 – unemployment (level)

05 – employment

06 – labor force


In [2]:
url_1 = 'https://download.bls.gov/pub/time.series/la/la.area'
url_2 = 'https://download.bls.gov/pub/time.series/la/la.data.64.County'

In [3]:
la_area = pd.read_csv(url_1, sep='\t', dtype=None)

In [4]:
la_area.head()

Unnamed: 0,area_type_code,area_code,area_text,display_level,selectable,sort_sequence
0,A,ST0100000000000,Alabama,0,T,1
1,A,ST0200000000000,Alaska,0,T,146
2,A,ST0400000000000,Arizona,0,T,188
3,A,ST0500000000000,Arkansas,0,T,252
4,A,ST0600000000000,California,0,T,378


In [5]:
la_data_64_County = pd.read_csv(url_2, sep='\t', dtype=str)
la_data_64_County.columns = ['series_id', 'year', 'period', 'value', 'footnote_codes'] 
la_data_64_County['year']= la_data_64_County['year'].astype(int) 
la_data_64_County['value']= la_data_64_County['value'].astype(str) 

In [6]:
la_data_64_County.head()

Unnamed: 0,series_id,year,period,value,footnote_codes
0,LAUCN010010000000003,1990,M01,6.4,
1,LAUCN010010000000003,1990,M02,6.6,
2,LAUCN010010000000003,1990,M03,5.8,
3,LAUCN010010000000003,1990,M04,6.6,
4,LAUCN010010000000003,1990,M05,6.0,


In [7]:
# Create new variables
# Keep 15 characters: CN190 25000 00000
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.str.slice.html 
la_data_64_County['area_code'] = la_data_64_County['series_id'].str[3:18]
la_data_64_County['series'] = la_data_64_County['series_id'].str[19:].astype(int)
la_data_64_County['month'] = la_data_64_County['period'].str[1:].astype(int)

In [8]:
is_unemployment = la_data_64_County['series'] == 3
is_unemployment.value_counts()

False    3696924
True     1232308
Name: series, dtype: int64

In [9]:
is_month = la_data_64_County['month'] != 13
is_month.value_counts()

True     4556004
False     373228
Name: month, dtype: int64

In [10]:
la_data_64_County = la_data_64_County[is_unemployment & is_month]
la_data_64_County.describe()

Unnamed: 0,year,series,month
count,1139001.0,1139001.0,1139001.0
mean,2004.257,3.0,6.44913
std,8.518575,0.0,3.451675
min,1990.0,3.0,1.0
25%,1997.0,3.0,3.0
50%,2004.0,3.0,6.0
75%,2012.0,3.0,9.0
max,2019.0,3.0,12.0


In [11]:
US_counties_03 = pd.merge(la_data_64_County,la_area,on='area_code',indicator=True)
US_counties_03.rename(columns={'_merge':'source'}, inplace=True)

In [12]:
US_counties_03.shape

(1139001, 14)

In [13]:
# LAUCN010010000000003
US_counties_03['source'].value_counts()

both          1139001
right_only          0
left_only           0
Name: source, dtype: int64

In [14]:
US_counties_03['series'].value_counts()

3    1139001
Name: series, dtype: int64

In [15]:
#data["Team"].str.split("t", n = 1, expand = True) 
US_counties_03['county_name'] = US_counties_03['area_text'].str.split(', ', n = 1, expand = True)[0].astype(str)
US_counties_03['state'] = US_counties_03['area_text'].str.split(', ', n = 1, expand = True)[1].astype(str)

In [16]:
US_counties_03['county_name'] = US_counties_03['county_name'].str.split(' Borough', n = 1, expand = True)[0]
US_counties_03['county_name'] = US_counties_03['county_name'].str.split('/', n = 1, expand = True)[0]

In [17]:
US_counties_03.dtypes

series_id           object
year                 int32
period              object
value               object
footnote_codes      object
area_code           object
series               int32
month                int32
area_type_code      object
area_text           object
display_level        int64
selectable          object
sort_sequence        int64
source            category
county_name         object
state               object
dtype: object

In [18]:
#replace state="DC" if county_name=="District of Columbia"
#replace county_name="DoÃ±a Ana County" if county_name=="Dona Ana County"
US_counties_03.loc[US_counties_03['county_name'] == "District of Columbia", 'state'] = 'DC'
US_counties_03.loc[US_counties_03['county_name'] == "Dona Ana County", 'county_name'] = 'DoÃ±a Ana County'

In [19]:
US_counties_03.dtypes

series_id           object
year                 int32
period              object
value               object
footnote_codes      object
area_code           object
series               int32
month                int32
area_type_code      object
area_text           object
display_level        int64
selectable          object
sort_sequence        int64
source            category
county_name         object
state               object
dtype: object

In [20]:
US_counties_03=US_counties_03.drop(['period','footnote_codes','display_level','selectable','sort_sequence','source','area_type_code'],axis=1)

In [21]:
# Unique by state county_name year month
US_counties_03.to_stata('US_counties_03.dta')