In [44]:
# Dependencies and Setup
import pandas as pd
import numpy as np
from datetime import datetime,date

# File to Load
LNSDF=pd.read_pickle('resources/LNS2020.pkl',compression='gzip')

# Preview of the DataFrame
LNSDF.head()

Unnamed: 0,series_id,year,value,Month,lfst_code,periodicity_code,series_title,activity_code,ages_code,class_code,duration_code,education_code,entr_code,expr_code,indy_text
0,LNS11000000,2015,157030.0,1,10,M,(Seas) Civilian Labor Force Level,0,0,0,0,0,0,0,All Industries
1,LNS11000000,2015,156644.0,2,10,M,(Seas) Civilian Labor Force Level,0,0,0,0,0,0,0,All Industries
2,LNS11000000,2015,156643.0,3,10,M,(Seas) Civilian Labor Force Level,0,0,0,0,0,0,0,All Industries
3,LNS11000000,2015,157060.0,4,10,M,(Seas) Civilian Labor Force Level,0,0,0,0,0,0,0,All Industries
4,LNS11000000,2015,157651.0,5,10,M,(Seas) Civilian Labor Force Level,0,0,0,0,0,0,0,All Industries


In [45]:
# Check for data types
LNSDF.dtypes

series_id            object
year                  int64
value               float64
Month                 int64
lfst_code             int64
periodicity_code     object
series_title         object
activity_code         int64
ages_code            object
class_code            int64
duration_code         int64
education_code       object
entr_code             int64
expr_code             int64
indy_text            object
dtype: object

In [46]:
# Check column names for spaces
LNSDF.columns

Index(['series_id', 'year', 'value', 'Month', 'lfst_code', 'periodicity_code',
       'series_title', 'activity_code', 'ages_code', 'class_code',
       'duration_code', 'education_code', 'entr_code', 'expr_code',
       'indy_text'],
      dtype='object')

In [47]:
# Identify incomplete rows
LNSDF.count()

series_id           2080
year                2080
value               2080
Month               2080
lfst_code           2080
periodicity_code    2080
series_title        2080
activity_code       2080
ages_code           2080
class_code          2080
duration_code       2080
education_code      2080
entr_code           2080
expr_code           2080
indy_text           2080
dtype: int64

In [48]:
# Check series_id values and counts
LNSDF['series_id'].value_counts()

LNS11000000    65
LNS11000002    65
LNS13023653    65
LNS13008396    65
LNS13000089    65
LNS13027662    65
LNS13023621    65
LNS13008516    65
LNS13008636    65
LNS13008876    65
LNS13000000    65
LNS13024230    65
LNS13000036    65
LNS13000003    65
LNS13027659    65
LNS13000001    65
LNS13026638    65
LNS13026637    65
LNS13000006    65
LNS13000091    65
LNS13027689    65
LNS13000009    65
LNS13032183    65
LNS13023569    65
LNS13000012    65
LNS13025699    65
LNS13000093    65
LNS13027660    65
LNS13008756    65
LNS11000001    65
LNS13000002    65
LNS13023705    65
Name: series_id, dtype: int64

In [49]:
# Isolate to series data needed  - demographic unemployment data
series = LNSDF['series_id'].unique()
print(series)

['LNS11000000' 'LNS11000001' 'LNS11000002' 'LNS13000000' 'LNS13000001'
 'LNS13000002' 'LNS13000003' 'LNS13000006' 'LNS13000009' 'LNS13000012'
 'LNS13000036' 'LNS13000089' 'LNS13000091' 'LNS13000093' 'LNS13008396'
 'LNS13008516' 'LNS13008636' 'LNS13008756' 'LNS13008876' 'LNS13023569'
 'LNS13023621' 'LNS13023653' 'LNS13023705' 'LNS13024230' 'LNS13025699'
 'LNS13026637' 'LNS13026638' 'LNS13027659' 'LNS13027660' 'LNS13027662'
 'LNS13027689' 'LNS13032183']


In [50]:
# Create a new df with filtered series IDs
condition = LNSDF['series_id'].isin(series)
LNSSeriesDF = LNSDF[condition]
LNSSeriesDF.head()

Unnamed: 0,series_id,year,value,Month,lfst_code,periodicity_code,series_title,activity_code,ages_code,class_code,duration_code,education_code,entr_code,expr_code,indy_text
0,LNS11000000,2015,157030.0,1,10,M,(Seas) Civilian Labor Force Level,0,0,0,0,0,0,0,All Industries
1,LNS11000000,2015,156644.0,2,10,M,(Seas) Civilian Labor Force Level,0,0,0,0,0,0,0,All Industries
2,LNS11000000,2015,156643.0,3,10,M,(Seas) Civilian Labor Force Level,0,0,0,0,0,0,0,All Industries
3,LNS11000000,2015,157060.0,4,10,M,(Seas) Civilian Labor Force Level,0,0,0,0,0,0,0,All Industries
4,LNS11000000,2015,157651.0,5,10,M,(Seas) Civilian Labor Force Level,0,0,0,0,0,0,0,All Industries


In [51]:
# New df to isolate columns needed
unemp_df = LNSSeriesDF[['year','Month','value','series_title']]

#Cast year and month as strings
unemp_df['date']=unemp_df['year'].astype(str) + "-"+ unemp_df['Month'].astype(str) 
unemp_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """


Unnamed: 0,year,Month,value,series_title,date
0,2015,1,157030.0,(Seas) Civilian Labor Force Level,2015-1
1,2015,2,156644.0,(Seas) Civilian Labor Force Level,2015-2
2,2015,3,156643.0,(Seas) Civilian Labor Force Level,2015-3
3,2015,4,157060.0,(Seas) Civilian Labor Force Level,2015-4
4,2015,5,157651.0,(Seas) Civilian Labor Force Level,2015-5


unemp_df = pd.pivot_table(unemp_df,index=["date"],values=["value"],columns=["series_title"])
unemp_df.head()

unemp_df.reset_index(inplace=True)

In [52]:
# Rename columns
unemp_df.rename(columns={'Month':'month','value':'number_of_persons','series_title':'category'}, inplace=True)
unemp_df.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


Unnamed: 0,year,month,number_of_persons,category,date
0,2015,1,157030.0,(Seas) Civilian Labor Force Level,2015-1
1,2015,2,156644.0,(Seas) Civilian Labor Force Level,2015-2
2,2015,3,156643.0,(Seas) Civilian Labor Force Level,2015-3
3,2015,4,157060.0,(Seas) Civilian Labor Force Level,2015-4
4,2015,5,157651.0,(Seas) Civilian Labor Force Level,2015-5


In [53]:
# View categories for renaming
print(unemp_df['category'].unique())

['(Seas) Civilian Labor Force Level'
 '(Seas) Civilian Labor Force Level - Men'
 '(Seas) Civilian Labor Force Level - Women' '(Seas) Unemployment Level'
 '(Seas) Unemployment Level - Men' '(Seas) Unemployment Level - Women'
 '(Seas) Unemployment Level - White'
 '(Seas) Unemployment Level - Black or African American'
 '(Seas) Unemployment Level - Hispanic or Latino'
 '(Seas) Unemployment Level - 16-19 yrs.'
 '(Seas) Unemployment Level - 20-24 yrs.'
 '(Seas) Unemployment Level - 25-34 yrs.'
 '(Seas) Unemployment Level - 35-44 yrs.'
 '(Seas) Unemployment Level - 45-54 yrs.'
 '(Seas) Number Unemployed for Less than 5 Weeks'
 '(Seas) Number Unemployed for 15 Weeks & over'
 '(Seas) Number Unemployed for 27 Weeks & over'
 '(Seas) Number Unemployed for 5-14 Weeks'
 '(Seas) Number Unemployed for 15-26 Weeks'
 '(Seas) Unemployment Level - New Entrants'
 '(Seas) Unemployment Level - Job Losers'
 '(Seas) Unemployment Level - Job Losers on Layoff'
 '(Seas) Unemployment Level - Job Leavers'
 '(Seas)

In [56]:
unemp_df['category'].replace({"(Seas) Civilian Labor Force Level":"Labor Force Total",
                                  "(Seas) Civilian Labor Force Level - Men":"Labor Force Men",
                                  "(Seas) Civilian Labor Force Level - Women":"Labor Force Women",
                                  "(Seas) Unemployment Level":"Unemployment Total",
                                  "(Seas) Unemployment Level - Men":"Men",
                                  "(Seas) Unemployment Level - Women":"Women",
                                  "(Seas) Unemployment Level - White":"Caucasian",
                                  "(Seas) Unemployment Level - Black or African American":"African American",
                                  "(Seas) Unemployment Level - Hispanic or Latino":"Hispanic",
                                  "(Seas) Unemployment Level - Asian":"Asian",
                                  "(Seas) Unemployment Level - 16-19 yrs.":"16-19",
                                  "(Seas) Unemployment Level - 20-24 yrs.":"20-24",
                                  "(Seas) Unemployment Level - 25-34 yrs.":"25-34",
                                  "(Seas) Unemployment Level - 35-44 yrs.":"35-44",
                                  "(Seas) Unemployment Level - 45-54 yrs.":"45-54",
                                  "(Seas) Unemployment Level - New Entrants":"New Entrants",
                                  "(Seas) Unemployment Level - Job Losers":"Job Losers",
                                  "(Seas) Unemployment Level - Job Losers on Layoff":"Job Losers on Layoff",
                                  "(Seas) Unemployment Level - Job Leavers":"Job Leavers",
                                  "(Seas) Unemployment Level - Job Losers Not on Layoff":"Job Losers not on Layoff",
                                  "(Seas) Unemployment Level - Persons who Completed Temporary Jobs":"Persons Completed Temp. Jobs",
                                  "(Seas) Unemployment Level - Permanent Job Losers":"Permanent Job Losers",
                                  "(Seas) Number Unemployed for Less than 5 Weeks":"Unemployed Less than 5 Weeks",
                                  "(Seas) Number Unemployed for 5-14 Weeks":"Unemployed 5-14 Weeks",
                                  "(Seas) Number Unemployed for 15 Weeks & over":"Unemployed 15 Weeks & over",
                                  "(Seas) Number Unemployed for 15-26 Weeks":"Unemployed 15-26 Weeks",
                                  "(Seas) Number Unemployed for 27 Weeks & over":"Unemployed 27 Weeks & over",
                                  "(Seas) Unemployment Level - 55 yrs. & over":"55 & Over",
                                  "(Seas) Unemployment Level - Less than a High School Diploma, 25 yrs. & over":"Less than HS Diploma",
                                  "(Seas) Unemployment Level - High School Graduates, No College, 25 yrs. & over":"HS no College",
                                  "(Seas) Unemployment Level - Some College or Associate Degree, 25 yrs. & over":"Some College or Associate Degree",
                                  "(Seas) Unemployment Level - Bachelor's degree and higher, 25 yrs. & over":"Bachelors degree and higher"
                                 },inplace=True)

In [57]:
# See updated categories
unemp_df['category'].value_counts()

Women                               65
20-24                               65
Hispanic                            65
55 & Over                           65
Unemployed 27 Weeks & over          65
Job Losers on Layoff                65
Unemployed 5-14 Weeks               65
African American                    65
25-34                               65
Some College or Associate Degree    65
HS No College                       65
Asian                               65
Labor Force Total                   65
Labor Force Men                     65
Job Losers                          65
Unemployed 15-26 Weeks              65
Persons Completed Temp. Jobs        65
Job Leavers                         65
Less than HS Diploma                65
New Entrants                        65
Unemployed Less than 5 Weeks        65
35-44                               65
Caucasian                           65
Permanent Job Losers                65
16-19                               65
Unemployed 15 Weeks & ove

In [59]:
# Export clean data to CSV for analysis
output_file = "Resources/Demographics_Data.csv"
unemp_df.to_csv(output_file, index=False)

#mode='a'

In [37]:
# get Unemployment level
# LNS13000000 all
# LNS13000001 Men
# LNS13000002  Women
#condition = LNSDF['series_id'].isin(['LNS13000000','LNS13000001','LNS13000002'])
#allUnemploymentDF = LNSDF[condition]
#allUnemploymentDF.head()

In [30]:
#df = allUnemploymentDF[['series_id','year','Month','value']]
#df['date']=df['year'].astype(str) + "-"+ df['Month'].astype(str) 
#df.head()

In [31]:
#df = pd.pivot_table(df,index=['date'],values=['value'],columns=['series_id'])
#df.head()

In [32]:
#df.reset_index(inplace=True)
#df.columns

In [33]:
#ax = df.plot(xticks=df.index)