In [1]:
import requests
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as st
from config import bls_api_key

# Set data range for all API calls
start_year = "2008"
end_year = "2020"

# Demographic unemployment rate Data Pull

In [None]:
# Pull gender, race, educational attainment, and age data from Bureau of Labor Statistics

url = "https://api.bls.gov/publicAPI/v2/timeseries/data/"
headers = {"content-type": "application/json"}

target_series = ["LNS14000000", 
                 "LNS14000001", 
                 "LNS14000002", 
                 "LNS14000003", 
                 "LNS14000006", 
                 "LNS14032183", 
                 "LNS14000009",
                 "LNS14027659", 
                 "LNS14027660", 
                 "LNS14027689",
                 "LNS14027662", 
                 "LNS14024887",
                 "LNS14000089", 
                 "LNS14000091",
                 "LNS14000093",
                 "LNS14024230",
                ]

start_year = "2008"
end_year = "2018"

parameters = json.dumps({"seriesid": target_series,
             "startyear": start_year,
             "endyear": end_year,
             "registrationkey": bls_api_key})

p = requests.post(url, data=parameters, headers=headers)

json_data = json.loads(p.text)

json_data

In [None]:
#fill fit year and month into columns
years = []
months = []

for data_point in np.arange(len(json_data["Results"]["series"][0]["data"])):
        years.append(json_data["Results"]["series"][0]["data"][data_point]["year"])
        months.append(json_data["Results"]["series"][0]["data"][data_point]["periodName"])

In [None]:
# Create DataFrame with place holders for unemployment categories

layoffs_df = pd.DataFrame({"years": years,
                           "months": months,
                           "LNS14000000":"", 
                           "LNS14000000":"", 
                           "LNS14000001":"", 
                           "LNS14000002":"", 
                           "LNS14000003":"", 
                           "LNS14000006":"", 
                           "LNS14032183":"", 
                           "LNS14000009":"",
                           "LNS14027659":"", 
                           "LNS14027660":"", 
                           "LNS14027689":"",
                           "LNS14027662":"", 
                           "LNS14024887":"",
                           "LNS14000089":"", 
                           "LNS14000091":"",
                           "LNS14000093":"",
                           "LNS14024230":""  
                          })
layoffs_df.set_index(["years", "months","years","months","LNS14000000","LNS14000000","LNS14000001","LNS14000002","LNS14000003","LNS14000006",
                       "LNS14032183","LNS14000009","LNS14027659","LNS14027660","LNS14027689","LNS14027662","LNS14024887",
                       "LNS14000089","LNS14000091","LNS14000093","LNS14024230"])
layoffs_df.head()

In [None]:
# Fills in values

for series in np.arange(len(json_data["Results"]["series"])):
    for data_point in np.arange(len(json_data["Results"]["series"][series]["data"])):
        layoffs_df.loc[data_point, json_data["Results"]["series"][series]["seriesID"]] =\
        json_data["Results"]["series"][series]["data"][data_point]["value"]
   
      
layoffs_df.head()

In [None]:
# Rename column labels

layoffs_df.rename(columns={"LNS14000000": "Unemployment Rate for 16yo and over",
                           "LNS14000001": "Unemployment Rate for Men 16yo and over",
                           "LNS14000002": "Unemployment Rate for Women 16yo and over",
                           "LNS14000003": "Unemployment Rate for White people 16yo and over",
                           "LNS14000006": "Unemployment Rate for African Americans 16yo and over",
                           "LNS14032183": "Unemployment Rate for Asian people 16yo and over",
                           "LNS14000009": "Unemployment Rate for Hispanic people 16yo and over",
                           "LNS14027659": "Unemployment Rate for 25yo and older with less than High School Diploma",
                           "LNS14027660": "Unemployment Rate for 25yo and older with High School Degree",
                           "LNS14027689": "Unemployment Rate for 25yo and older with some college",
                           "LNS14027662": "Unemployment Rate for 25yo and older with Bachelor's degree or higher",
                           "LNS14024887": "Unemployment Rate for 16-24yo",
                           "LNS14000089": "Unemployment Rate for 25-34yo",
                           "LNS14000091": "Unemployment Rate for 35-44yo",
                           "LNS14000093": "Unemployment Rate for 45-54yo",
                            "LNS14024230": "Unemployment Rate for 55yo and over"
                          },
                      inplace=True)
layoffs_df.head(24)

In [2]:
#Industy Data Pull
url = "https://api.bls.gov/publicAPI/v2/timeseries/data/"
headers = {"content-type": "application/json"}

target_series = ["CES0000000001",
                 "CES0500000001", 
                 "CES0600000001", 
                 "CES0700000001", 
                 "CES0800000001", 
                 "CES1000000001", 
                 "CES2000000001", 
                 "CES3000000001", 
                 "CES3100000001", 
                 "CES3200000001", 
                 "CES4000000001", 
                 "CES4142000001", 
                 "CES4200000001", 
                 "CES4300000001", 
                 "CES4422000001", 
                 "CES5000000001", 
                 "CES5500000001", 
                 "CES6000000001", 
                 "CES6500000001", 
                 "CES7000000001", 
                 "CES8000000001", 
                 "CES9000000001"
                ]
#start_year = "2008"
#end_year = "2020"

parameters = json.dumps({"seriesid": target_series,
             "startyear": start_year,
             "endyear": end_year,
             "registrationkey": bls_api_key})

p = requests.post(url, data=parameters, headers=headers)

json_data4 = json.loads(p.text)

json_data4

{'status': 'REQUEST_SUCCEEDED',
 'responseTime': 1589,
 'message': [],
 'Results': {'series': [{'seriesID': 'CES0000000001',
    'data': [{'year': '2020',
      'period': 'M03',
      'periodName': 'March',
      'latest': 'true',
      'value': '151786',
      'footnotes': [{'code': 'P', 'text': 'preliminary'}]},
     {'year': '2020',
      'period': 'M02',
      'periodName': 'February',
      'value': '152487',
      'footnotes': [{'code': 'P', 'text': 'preliminary'}]},
     {'year': '2020',
      'period': 'M01',
      'periodName': 'January',
      'value': '152212',
      'footnotes': [{}]},
     {'year': '2019',
      'period': 'M12',
      'periodName': 'December',
      'value': '151998',
      'footnotes': [{}]},
     {'year': '2019',
      'period': 'M11',
      'periodName': 'November',
      'value': '151814',
      'footnotes': [{}]},
     {'year': '2019',
      'period': 'M10',
      'periodName': 'October',
      'value': '151553',
      'footnotes': [{}]},
     {'year'

In [3]:
years = []
months = []

for data_point in np.arange(len(json_data4["Results"]["series"][0]["data"])):
        years.append(json_data4["Results"]["series"][0]["data"][data_point]["year"])
        months.append(json_data4["Results"]["series"][0]["data"][data_point]["periodName"])

In [4]:
# Create DataFrame with place holders for unemployment categories

industry_df = pd.DataFrame({"Years": years,
                           "Months": months,
                           "CES0000000001":"", 
                           "CES0500000001":"",
                           "CES0600000001":"", 
                           "CES0700000001":"",
                           "CES0800000001":"", 
                           "CES1000000001":"",
                           "CES2000000001":"",
                           "CES3000000001":"", 
                           "CES3100000001":"",
                           "CES3200000001":"",
                           "CES4000000001":"", 
                           "CES4142000001":"",
                           "CES4200000001":"",
                           "CES4300000001":"", 
                           "CES4422000001":"",
                           "CES5000000001":"",
                           "CES5500000001":"", 
                           "CES6000000001":"",
                           "CES6500000001":"",
                           "CES7000000001":"", 
                           "CES8000000001":"",
                           "CES9000000001":""
                          })
industry_df.set_index(["Years", "Months",
                      "CES0000000001",
                      "CES0500000001", 
                      "CES0600000001", 
                      "CES0700000001", 
                      "CES0800000001", 
                      "CES1000000001", 
                      "CES2000000001", 
                      "CES3000000001", 
                      "CES3100000001", 
                      "CES3200000001", 
                      "CES4000000001", 
                      "CES4142000001", 
                      "CES4200000001", 
                      "CES4300000001", 
                      "CES4422000001", 
                      "CES5000000001", 
                      "CES5500000001", 
                      "CES6000000001", 
                      "CES6500000001", 
                      "CES7000000001", 
                      "CES8000000001", 
                      "CES9000000001"
                     ])
industry_df.head()

Unnamed: 0,Years,Months,CES0000000001,CES0500000001,CES0600000001,CES0700000001,CES0800000001,CES1000000001,CES2000000001,CES3000000001,...,CES4200000001,CES4300000001,CES4422000001,CES5000000001,CES5500000001,CES6000000001,CES6500000001,CES7000000001,CES8000000001,CES9000000001
0,2020,March,,,,,,,,,...,,,,,,,,,,
1,2020,February,,,,,,,,,...,,,,,,,,,,
2,2020,January,,,,,,,,,...,,,,,,,,,,
3,2019,December,,,,,,,,,...,,,,,,,,,,
4,2019,November,,,,,,,,,...,,,,,,,,,,


In [5]:
# Fills in values for Idustry data

for series4 in np.arange(len(json_data4["Results"]["series"])):
    for data_point4 in np.arange(len(json_data4["Results"]["series"][series4]["data"])):
        industry_df.loc[data_point4, json_data4["Results"]["series"][series4]["seriesID"]] =\
        json_data4["Results"]["series"][series4]["data"][data_point4]["value"]
        
industry_df.head(10) 
#industry_df= industry_df.iloc[:,2:-1].astype(float)
industry_df.dtypes

Years            object
Months           object
CES0000000001    object
CES0500000001    object
CES0600000001    object
CES0700000001    object
CES0800000001    object
CES1000000001    object
CES2000000001    object
CES3000000001    object
CES3100000001    object
CES3200000001    object
CES4000000001    object
CES4142000001    object
CES4200000001    object
CES4300000001    object
CES4422000001    object
CES5000000001    object
CES5500000001    object
CES6000000001    object
CES6500000001    object
CES7000000001    object
CES8000000001    object
CES9000000001    object
dtype: object

In [6]:
# Rename columns to make more informative
industry_df.rename(columns={"CES0000000001":"Total Non-Farm",
                           "CES0500000001":"Total Private",
                           "CES0600000001":"Goods Production", 
                           "CES0700000001":"Service Production",
                           "CES0800000001":"Private Service Production", 
                           "CES1000000001":"Mining and Lodging",
                           "CES2000000001":"Construction",
                           "CES3000000001":"Manufacturing", 
                           "CES3100000001":"Durable Goods",
                           "CES3200000001":"NonDurable Good",
                           "CES4000000001":"Trade, Transportation and Utilities", 
                           "CES4142000001":"WholeSale Trade",
                           "CES4200000001":"Retail Trade",
                           "CES4300000001":"Transportation and Warehousing", 
                           "CES4422000001":"Utilities",
                           "CES5000000001":"Information",
                           "CES5500000001":"Financial Activities", 
                           "CES6000000001":"Professional and Business Services",
                           "CES6500000001":"Education and Health Services",
                           "CES7000000001":"Leisure and Hospitality", 
                           "CES8000000001":"Other Services",
                           "CES9000000001":"Government"},
                 inplace=True)

industry_df['Total Non-Farm'] = pd.to_numeric(industry_df['Total Non-Farm'])
industry_df['Total Private'] = pd.to_numeric(industry_df['Total Private'])
industry_df['Goods Production'] = pd.to_numeric(industry_df['Goods Production'])
industry_df['Service Production'] = pd.to_numeric(industry_df['Service Production'])
industry_df['Private Service Production'] = pd.to_numeric(industry_df['Private Service Production'])
industry_df['Mining and Lodging'] = pd.to_numeric(industry_df['Mining and Lodging'])
industry_df['Construction'] = pd.to_numeric(industry_df['Construction'])
industry_df['Manufacturing'] = pd.to_numeric(industry_df['Manufacturing'])
industry_df['Durable Goods'] = pd.to_numeric(industry_df['Durable Goods'])
industry_df['NonDurable Good'] = pd.to_numeric(industry_df['NonDurable Good'])
industry_df['Trade, Transportation and Utilities'] = pd.to_numeric(industry_df['Trade, Transportation and Utilities'])
industry_df['WholeSale Trade'] = pd.to_numeric(industry_df['WholeSale Trade'])
industry_df['Retail Trade'] = pd.to_numeric(industry_df['Retail Trade'])
industry_df['Transportation and Warehousing'] = pd.to_numeric(industry_df['Transportation and Warehousing'])
industry_df['Utilities'] = pd.to_numeric(industry_df['Utilities'])
industry_df['Information'] = pd.to_numeric(industry_df['Information'])
industry_df['Financial Activities'] = pd.to_numeric(industry_df['Financial Activities'])
industry_df['Professional and Business Services'] = pd.to_numeric(industry_df['Professional and Business Services'])
industry_df['Education and Health Services'] = pd.to_numeric(industry_df['Education and Health Services'])
industry_df['Leisure and Hospitality'] = pd.to_numeric(industry_df['Leisure and Hospitality'])
industry_df['Other Services'] = pd.to_numeric(industry_df['Other Services'])
industry_df['Government'] = pd.to_numeric(industry_df['Government'])

#Consolidate categories for manageability
industry_df['NewValue'] = industry_df['Total Non-Farm'] + industry_df['Total Private']

industry_df.head(10)
#industry_df.dtypes



Unnamed: 0,Years,Months,Total Non-Farm,Total Private,Goods Production,Service Production,Private Service Production,Mining and Lodging,Construction,Manufacturing,...,Transportation and Warehousing,Utilities,Information,Financial Activities,Professinoal and Business Services,Education and Health Services,Leisure and Hospitality,Other Services,Government,NewValue
0,2020,March,151786,129027,21152,130634,107875,708,7605,12839,...,5672.4,547.9,2899,8853,21507,24523,16393,5919,22759,280813
1,2020,February,152487,129740,21206,131281,108534,715,7634,12857,...,5677.3,547.1,2897,8854,21559,24599,16852,5943,22747,282227
2,2020,January,152212,129498,21149,131063,108349,712,7593,12844,...,5678.5,546.9,2894,8823,21523,24534,16808,5935,22714,281710
3,2019,December,151998,129319,21136,130862,108183,715,7555,12866,...,5655.9,547.9,2883,8814,21503,24465,16784,5925,22679,281317
4,2019,November,151814,129155,21131,130683,108024,724,7539,12868,...,5658.0,547.2,2874,8804,21481,24436,16744,5923,22659,280969
5,2019,October,151553,128908,21086,130467,107822,735,7541,12810,...,5635.4,546.2,2865,8792,21444,24363,16701,5907,22645,280461
6,2019,September,151368,128718,21106,130262,107612,731,7524,12851,...,5628.6,547.5,2866,8771,21402,24323,16631,5907,22650,280086
7,2019,August,151160,128523,21087,130073,107436,731,7508,12848,...,5618.1,548.9,2861,8768,21377,24262,16570,5910,22637,279683
8,2019,July,150953,128366,21085,129868,107281,736,7504,12845,...,5622.2,549.6,2862,8753,21337,24204,16528,5905,22587,279319
9,2019,June,150759,128206,21076,129683,107130,741,7497,12838,...,5622.7,550.5,2865,8732,21294,24131,16526,5896,22553,278965


In [16]:
grouped_industry = industry_df.groupby('Years').mean()
grouped_industry.head(13)

Unnamed: 0_level_0,Total Non-Farm,Total Private,Goods Production,Service Production,Private Service Production,Mining and Lodging,Construction,Manufacturing,Durable Goods,NonDurable Good,...,Transportation and Warehousing,Utilities,Information,Financial Activities,Professinoal and Business Services,Education and Health Services,Leisure and Hospitality,Other Services,Government,NewValue
Years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2008,137224.583333,114721.666667,21331.5,115893.083333,93390.166667,765.75,7162.333333,13403.416667,8462.75,4940.666667,...,4495.858333,558.783333,2983.166667,8204.916667,17798.0,19228.416667,13440.666667,5514.916667,22502.916667,251946.25
2009,131288.833333,108736.083333,18559.166667,112729.666667,90176.916667,694.0,6017.333333,11847.833333,7284.416667,4563.416667,...,4225.208333,559.866667,2803.5,7838.166667,16633.416667,19627.666667,13073.583333,5366.0,22552.75,240024.916667
2010,130336.75,107847.083333,17751.75,112585.0,90095.333333,704.75,5518.333333,11528.666667,7064.166667,4464.5,...,4179.95,552.783333,2707.25,7695.166667,16777.75,19972.583333,13041.916667,5330.75,22489.666667,238183.833333
2011,131921.666667,109828.833333,18045.25,113876.416667,91783.583333,788.25,5529.916667,11727.083333,7273.583333,4453.5,...,4291.633333,552.541667,2673.25,7696.25,17386.333333,20321.833333,13351.833333,5360.666667,22092.833333,241750.5
2012,134156.583333,112237.0,18419.916667,115736.666667,93817.083333,847.416667,5645.5,11927.0,7469.333333,4457.666667,...,4403.516667,552.733333,2674.916667,7783.416667,17992.083333,20769.166667,13770.416667,5430.166667,21919.583333,246393.583333
2013,136355.666667,114508.416667,18738.833333,117616.833333,95769.583333,862.583333,5857.083333,12019.166667,7546.583333,4472.583333,...,4485.75,551.85,2704.916667,7885.916667,18579.416667,21084.5,14255.333333,5483.0,21847.25,250864.083333
2014,138922.25,117047.166667,19225.833333,119696.416667,97821.333333,890.416667,6150.833333,12184.583333,7673.666667,4510.916667,...,4646.458333,551.225,2726.166667,7976.0,19122.75,21436.0,14690.583333,5566.583333,21875.083333,255969.416667
2015,141804.25,119777.25,19607.583333,122196.666667,100169.666667,812.666667,6460.0,12334.916667,7765.083333,4569.833333,...,4855.825,555.908333,2750.0,8123.416667,19689.666667,22025.0,15153.833333,5621.416667,22027.0,261581.5
2016,144329.416667,122099.833333,19747.75,124581.666667,102352.083333,668.166667,6726.916667,12352.666667,7713.166667,4639.5,...,5001.325,556.241667,2793.083333,8284.833333,20106.583333,22640.083333,15656.583333,5690.166667,22229.583333,266429.25
2017,146589.166667,124240.916667,20079.666667,126509.5,104161.25,675.416667,6965.333333,12438.916667,7739.75,4699.166667,...,5176.775,555.375,2812.083333,8449.416667,20504.833333,23189.416667,16052.75,5768.833333,22348.25,270830.083333


In [None]:
#Create date feld to hold date for indexing
date = []

for index, row in education_df.iterrows():
    datenumber = str(f"{row['Year']}-{row['Month #']}")
    date.append(datenumber)

education_df['Date'] = date

education_df = education_df[['Year', 'Month #', 'Month', 'Date', 'No High School Diploma', 'High School Graduate',
                            'Some College', "Bachelor's Degree or Higher"]]
                     
education_df = education_df.reindex(index=education_df.index[::-1])
education_df.head()

In [1]:
plt.figure(figsize=(15,10))
Construction = plt.plot(grouped_industry['Years'], grouped_industry['Construction'], color='blue')
Manufacturing = plt.plot(grouped_industry['Years'], grouped_industry['Manufacturing'], color='red')
TradeTransportationUtilities = plt.plot(grouped_industry['Years'], grouped_industry['Trade, Transportation and Utilities'], color='green')
Information = plt.plot(grouped_industry['Years'], grouped_industry["Information"], color='purple')
Financial = plt.plot(grouped_industry['Years'], grouped_industry["Financial Activities"], color='yellow')
BusinessServices = plt.plot(grouped_industry['Years'], grouped_industry["Professional and Business Services"], color='orange')
EducationHealthServices = plt.plot(grouped_industry['Years'], grouped_industry["Education and Health Services"], color='black')
Hospitality = plt.plot(grouped_industry['Years'], grouped_industry["Leisure and Hospitality"], color='cyan')
OtherService = plt.plot(grouped_industry['Years'], grouped_industry["Other Services"], color='grey')
Government = plt.plot(grouped_industry['Years'], grouped_industry["Government"], color='brown')

red_glabel = mpatches.Patch(color='red', label='Construction')
blue_glabel = mpatches.Patch(color='blue', label='Manufacturing')
green_glabel = mpatches.Patch(color='green', label='TradeTransportationUtilities')
purple_glabel = mpatches.Patch(color='purple', label='Information')
yellow_glabel = mpatches.Patch(color='purple', label='Financial Activities')
orange_glabel = mpatches.Patch(color='red', label='BusinessServices')
black_glabel = mpatches.Patch(color='blue', label='EducationHealthServices')
cyan_glabel = mpatches.Patch(color='green', label='Hospitality')
grey_glabel = mpatches.Patch(color='purple', label='OtherService')
brown_glabel = mpatches.Patch(color='purple', label='Government')

plt.legend(handles=[blue_glabel, red_glabel, green_glabel, purple_glabel, yellow_glabel, orange_glabel,\
                   black_glabel, cyan_glable, grey_glabel, brown_glabel])
plt.title('Annual - Industry Employment levels by Major Sectors')
plt.ylabel('Average Annual Number of Person Employed in Sector')
plt.xlabel('Year')
plt.grid()
# set tick locations to the drug regimens
#tick_locations = [value for value in x_axis]

#plt.xticks(tick_locations,['Capomulin', 'Ceftamin', 'Infubinol', 'Ketapril', 'Naftisol', 
#                           'Placebo', 'Propriva', 'Ramicane', 'Stelasyn', 'Zoniferol'], rotation='vertical')
plt.show()


NameError: name 'plt' is not defined

In [None]:
# creating DataFrame with Specific columns
yearinfo = education_df[['Year', 'No High School Diploma', 'High School Graduate', 'Some College', "Bachelor's Degree or Higher"]]

year_groupby= yearinfo.groupby('Year')['No High School Diploma', 'High School Graduate', 
                                       'Some College', "Bachelor's Degree or Higher"].mean()

year_groupby

In [None]:
year_groupby.plot.bar(rot=0, figsize=(15, 10))
plt.xlabel("Year")
plt.ylabel("Unemployment Rate (% Average)")
plt.title("Unemployment Impact on Different Levels of Education")

In [14]:
grouped_industry_T = grouped_industry.T
grouped_industry_T.head()


Years,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
Total Non-Farm,137224.583333,131288.833333,130336.75,131921.666667,134156.583333,136355.666667,138922.25,141804.25,144329.416667,146589.166667,148890.75,150935.416667,152161.666667
Total Private,114721.666667,108736.083333,107847.083333,109828.833333,112237.0,114508.416667,117047.166667,119777.25,122099.833333,124240.916667,126442.75,128347.0,129421.666667
Goods Production,21331.5,18559.166667,17751.75,18045.25,18419.916667,18738.833333,19225.833333,19607.583333,19747.75,20079.666667,20700.916667,21068.666667,21169.0
Service Production,115893.083333,112729.666667,112585.0,113876.416667,115736.666667,117616.833333,119696.416667,122196.666667,124581.666667,126509.5,128189.833333,129866.75,130992.666667
Private Service Production,93390.166667,90176.916667,90095.333333,91783.583333,93817.083333,95769.583333,97821.333333,100169.666667,102352.083333,104161.25,105741.833333,107278.333333,108252.666667


In [None]:
piechart_df = grouped_industry_T.loc[grouped_industry_T["2018"] == "Billy", :]