In [108]:
import requests
import json
import pandas as pd
import numpy as np
import datetime
import calendar
from config import bls_api_key


## Construction spending

In [67]:
start_year = "2008"
end_year = "2011"

In [166]:
cs_df = pd.read_csv("Resources/totsatime.csv")

cs_df.columns = cs_df.iloc[2,:]
cs_df = cs_df.loc[3:329,["Date", "Total\n\rConstruction1", "Total\n\rResidential", "Total\n\rNonresidential",
                         "Total\n\rPrivate Construction2", "Total\n\rPublic Construction3"]]
cs_df = cs_df.iloc[:,6:]

cs_df = cs_df.rename(columns={"Total\n\rConstruction1" : "Total Construction",
                              "Total\n\rResidential" : "Total Residential construction",
                              "Total\n\rNonresidential" : "Total Nonresidential construction",
                              "Total\n\rPrivate Construction2" : "Total Private construction",
                              "Total\n\rPublic Construction3" : "Total Public construction"})

cs_df.reset_index(drop=True, inplace=True)

cs_df = cs_df.iloc[cs_df.index[cs_df["Date"] == f"Dec-{end_year[2:]}"][0]: (cs_df.index[cs_df["Date"] == f"Jan-{start_year[2:]}"][0])+1,:]

cs_df["Year"] = "20" + cs_df["Date"].str[-2:] + "-"
cs_df["month"] = cs_df["Date"].str[:3]

cs_df.replace(to_replace=["Jan","Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"],
              value=["01","02","03", "04", "05", "06", "07", "08", "09", "10", "11", "12"],
              inplace=True)

cs_df["date2"] = cs_df["Year"] + cs_df["month"]

cs_df = cs_df.drop(["Date", "Year", "month"], axis=1)

cs_df.rename(columns={"date2": "Date"}, inplace=True)

cs_df = cs_df.reindex(columns=["Date", "Total Construction", "Total Residential construction", "Total Nonresidential construction",
                       "Total Private construction", "Total Public construction"])

cs_df.sort_values(by="Date", ascending=True, inplace=True)

cs_df.reset_index(drop=True, inplace=True)

cs_df.to_csv("construction.csv")

cs_df

2,Date,Total Construction,Total Residential construction,Total Nonresidential construction,Total Private construction,Total Public construction
0,2008-01,1111687,409760,701927,817436,294251
1,2008-02,1097891,394436,703456,798568,299323
2,2008-03,1103580,397603,705977,797041,306539
3,2008-04,1101416,392965,708451,796904,304512
4,2008-05,1101187,388975,712212,795867,305320
5,2008-06,1089511,381412,708098,781953,307558
6,2008-07,1086874,372678,714196,776265,310609
7,2008-08,1069183,359490,709693,755499,313684
8,2008-09,1059898,348526,711372,751340,308559
9,2008-10,1056331,337197,719135,742000,314332


## Demographic unemployment rate

In [69]:
# Pull gender, race, educational attainment, and age data from Bureau of Labor Statistics

url = "https://api.bls.gov/publicAPI/v2/timeseries/data/"
headers = {"content-type": "application/json"}

target_series = ["LNS14000000", "LNS14000001", "LNS14000002", "LNS14000003", "LNS14000006", "LNS14032183", "LNS14000009",
         "LNS14027659", "LNS14027660", "LNS14027689", "LNS14027662", "LNS14024887", "LNS14000089", "LNS14000091",
                "LNS14000093", "LNS14024230"]


parameters = json.dumps({"seriesid": target_series,
             "startyear": start_year,
             "endyear": end_year,
             "registrationkey": bls_api_key})

p = requests.post(url, data=parameters, headers=headers)

json_data = json.loads(p.text)

In [70]:
years = []
months = []
date = []

for data_point in np.arange(len(json_data["Results"]["series"][0]["data"])):
        years.append(json_data["Results"]["series"][0]["data"][data_point]["year"])
        months.append(json_data["Results"]["series"][0]["data"][data_point]["periodName"])
        date.append(f"{json_data['Results']['series'][0]['data'][data_point]['year']}-{json_data['Results']['series'][0]['data'][data_point]['period'][1:]}")

In [71]:
# Create DataFrame

unemployment_df = pd.DataFrame({"Date": date,
                                "Year": years,
                                "Month": months,
                                "LNS14000000" : "",
                                "LNS14000001" : "",
                                "LNS14000002" : "",
                                "LNS14000003" : "",
                                "LNS14000006" : "",
                                "LNS14032183" : "",
                                "LNS14000009" : "",
                                "LNS14027659" : "",
                                "LNS14027660" : "",
                                "LNS14027689" : "",
                                "LNS14027662" : "",
                                "LNS14024887" : "",
                                "LNS14000089" : "",
                                "LNS14000091" : "",
                                "LNS14000093" : "",
                                "LNS14024230" : ""})
unemployment_df.head()

Unnamed: 0,Date,Year,Month,LNS14000000,LNS14000001,LNS14000002,LNS14000003,LNS14000006,LNS14032183,LNS14000009,LNS14027659,LNS14027660,LNS14027689,LNS14027662,LNS14024887,LNS14000089,LNS14000091,LNS14000093,LNS14024230
0,2011-12,2011,December,,,,,,,,,,,,,,,,
1,2011-11,2011,November,,,,,,,,,,,,,,,,
2,2011-10,2011,October,,,,,,,,,,,,,,,,
3,2011-09,2011,September,,,,,,,,,,,,,,,,
4,2011-08,2011,August,,,,,,,,,,,,,,,,


In [72]:
# Fills in values

for series in np.arange(len(json_data["Results"]["series"])):
    for data_point in np.arange(len(json_data["Results"]["series"][series]["data"])):
        unemployment_df.loc[data_point, json_data["Results"]["series"][series]["seriesID"]] = json_data["Results"]["series"][series]["data"][data_point]["value"]

In [73]:
# Rename column labels

unemployment_df.rename(columns={"LNS14000000": "Unemployment Rate for 16yo and over",
                                "LNS14000001": "Unemployment Rate for Men 16yo and over",
                                "LNS14000002": "Unemployment Rate for Women 16yo and over",
                                "LNS14000003": "Unemployment Rate for White people 16yo and over",
                                "LNS14000006": "Unemployment Rate for African Americans 16yo and over",
                                "LNS14032183": "Unemployment Rate for Asian people 16yo and over",
                                "LNS14000009": "Unemployment Rate for Hispanic people 16yo and over",
                                "LNS14027659": "Unemployment Rate for 25yo and older with less than High School Diploma",
                                "LNS14027660": "Unemployment Rate for 25yo and older with High School Degree",
                                "LNS14027689": "Unemployment Rate for 25yo and older with some college",
                                "LNS14027662": "Unemployment Rate for 25yo and older with Bachelor's degree or higher",
                                "LNS14024887": "Unemployment Rate for 16-24yo",
                                "LNS14000089": "Unemployment Rate for 25-34yo",
                                "LNS14000091": "Unemployment Rate for 35-44yo",
                                "LNS14000093": "Unemployment Rate for 45-54yo",
                                "LNS14024230": "Unemployment Rate for 55yo and over"},
                      inplace=True)

unemployment_df.sort_values(by="Date", ascending=True, inplace=True)

unemployment_df.reset_index(drop=True, inplace=True)

unemployment_df.to_csv("Unemployment_Rates.csv", index=False)

unemployment_df

Unnamed: 0,Date,Year,Month,Unemployment Rate for 16yo and over,Unemployment Rate for Men 16yo and over,Unemployment Rate for Women 16yo and over,Unemployment Rate for White people 16yo and over,Unemployment Rate for African Americans 16yo and over,Unemployment Rate for Asian people 16yo and over,Unemployment Rate for Hispanic people 16yo and over,Unemployment Rate for 25yo and older with less than High School Diploma,Unemployment Rate for 25yo and older with High School Degree,Unemployment Rate for 25yo and older with some college,Unemployment Rate for 25yo and older with Bachelor's degree or higher,Unemployment Rate for 16-24yo,Unemployment Rate for 25-34yo,Unemployment Rate for 35-44yo,Unemployment Rate for 45-54yo,Unemployment Rate for 55yo and over
0,2008-01,2008,January,5.0,5.2,4.8,4.4,9.1,3.1,6.3,7.7,4.7,3.7,2.1,11.7,5.1,3.6,3.4,3.3
1,2008-02,2008,February,4.9,5.0,4.7,4.4,8.4,2.9,6.2,7.4,4.7,3.8,2.0,11.4,4.9,3.6,3.4,3.3
2,2008-03,2008,March,5.1,5.2,5.0,4.5,9.2,3.6,6.9,8.4,5.1,3.9,2.1,11.4,5.3,3.8,3.5,3.4
3,2008-04,2008,April,5.0,5.2,4.8,4.4,8.6,3.5,7.1,7.7,5.0,4.0,2.1,11.0,5.1,3.9,3.7,3.0
4,2008-05,2008,May,5.4,5.6,5.2,4.8,9.6,4.1,6.9,8.1,5.0,4.3,2.2,13.0,5.2,4.2,3.8,3.2
5,2008-06,2008,June,5.6,5.8,5.3,5.0,9.4,4.2,7.6,8.7,5.1,4.3,2.4,12.9,5.4,4.4,3.8,3.3
6,2008-07,2008,July,5.8,6.2,5.3,5.2,10.0,3.7,7.5,8.6,5.4,4.6,2.5,13.5,5.7,4.7,3.8,3.7
7,2008-08,2008,August,6.1,6.3,5.9,5.4,10.6,4.5,8.0,9.7,5.8,4.9,2.8,13.1,6.2,5.1,4.3,4.2
8,2008-09,2008,September,6.1,6.7,5.5,5.4,11.3,4.0,8.0,9.8,6.2,5.0,2.6,13.5,6.2,5.1,4.3,4.2
9,2008-10,2008,October,6.5,7.1,5.9,5.9,11.4,3.9,8.8,10.3,6.4,5.2,3.1,13.6,6.7,5.4,4.6,4.5


## Statewide unemployment rate

In [74]:
# Pull state data from Bureau of Labor Statistics

url = "https://api.bls.gov/publicAPI/v2/timeseries/data/"
headers = {"content-type": "application/json"}

target_series = ["LASST010000000000003", "LASST020000000000003", "LASST040000000000003", "LASST050000000000003",
                 "LASST060000000000003", "LASST080000000000003", "LASST090000000000003", "LASST100000000000003",
                 "LASST110000000000003", "LASST120000000000003", "LASST130000000000003", "LASST150000000000003",
                 "LASST160000000000003", "LASST170000000000003", "LASST180000000000003", "LASST190000000000003",
                 "LASST200000000000003", "LASST210000000000003", "LASST220000000000003", "LASST230000000000003",
                 "LASST240000000000003", "LASST250000000000003", "LASST260000000000003", "LASST270000000000003",
                 "LASST280000000000003", "LASST290000000000003", "LASST300000000000003", "LASST310000000000003",
                 "LASST320000000000003", "LASST330000000000003", "LASST340000000000003", "LASST350000000000003",
                 "LASST360000000000003", "LASST370000000000003", "LASST380000000000003", "LASST390000000000003",
                 "LASST400000000000003", "LASST410000000000003", "LASST420000000000003", "LASST720000000000003",
                 "LASST440000000000003", "LASST450000000000003", "LASST460000000000003", "LASST470000000000003",
                 "LASST480000000000003", "LASST490000000000003", "LASST500000000000003", "LASST510000000000003",
                 "LASST530000000000003", "LASST540000000000003"]




parameters = json.dumps({"seriesid": target_series,
             "startyear": start_year,
             "endyear": end_year,
             "registrationkey": bls_api_key})

p = requests.post(url, data=parameters, headers=headers)

json_data2 = json.loads(p.text)

In [75]:
# Pull state data from Bureau of Labor Statistics

url = "https://api.bls.gov/publicAPI/v2/timeseries/data/"
headers = {"content-type": "application/json"}

target_series = ["LASST550000000000003", "LASST560000000000003"]




parameters = json.dumps({"seriesid": target_series,
             "startyear": start_year,
             "endyear": end_year,
             "registrationkey": bls_api_key})

p = requests.post(url, data=parameters, headers=headers)

json_data3 = json.loads(p.text)

In [76]:
years = []
months = []
date = []

for data_point in np.arange(len(json_data2["Results"]["series"][0]["data"])):
        years.append(json_data2["Results"]["series"][0]["data"][data_point]["year"])
        months.append(json_data2["Results"]["series"][0]["data"][data_point]["periodName"])
        date.append(f"{json_data['Results']['series'][0]['data'][data_point]['year']}-{json_data['Results']['series'][0]['data'][data_point]['period'][1:]}")

In [77]:
# Create DataFrame

state_unemployment_df = pd.DataFrame({"Date": date,
                                "Year": years,
                                "Month": months,
                                "LASST010000000000003": "", "LASST020000000000003": "", "LASST040000000000003": "",
                                "LASST050000000000003": "", "LASST060000000000003": "", "LASST080000000000003": "",
                                "LASST090000000000003": "", "LASST100000000000003": "", "LASST110000000000003": "",
                                "LASST120000000000003": "", "LASST130000000000003": "", "LASST150000000000003": "",
                                "LASST160000000000003": "", "LASST170000000000003": "", "LASST180000000000003": "",
                                "LASST190000000000003": "", "LASST200000000000003": "", "LASST210000000000003": "",
                                "LASST220000000000003": "", "LASST230000000000003": "", "LASST240000000000003": "",
                                "LASST250000000000003": "", "LASST260000000000003": "", "LASST270000000000003": "",
                                "LASST280000000000003": "", "LASST290000000000003": "", "LASST300000000000003": "",
                                "LASST310000000000003": "", "LASST320000000000003": "", "LASST330000000000003": "",
                                "LASST340000000000003": "", "LASST350000000000003": "", "LASST360000000000003": "",
                                "LASST370000000000003": "", "LASST380000000000003": "", "LASST390000000000003": "",
                                "LASST400000000000003": "", "LASST410000000000003": "", "LASST420000000000003": "",
                                "LASST720000000000003": "", "LASST440000000000003": "", "LASST450000000000003": "",
                                "LASST460000000000003": "", "LASST470000000000003": "", "LASST480000000000003": "",
                                "LASST490000000000003": "", "LASST500000000000003": "", "LASST510000000000003": "",
                                "LASST530000000000003": "", "LASST540000000000003": "", "LASST550000000000003": "",
                                "LASST560000000000003": ""})
state_unemployment_df.head()

Unnamed: 0,Date,Year,Month,LASST010000000000003,LASST020000000000003,LASST040000000000003,LASST050000000000003,LASST060000000000003,LASST080000000000003,LASST090000000000003,...,LASST460000000000003,LASST470000000000003,LASST480000000000003,LASST490000000000003,LASST500000000000003,LASST510000000000003,LASST530000000000003,LASST540000000000003,LASST550000000000003,LASST560000000000003
0,2011-12,2011,December,,,,,,,,...,,,,,,,,,,
1,2011-11,2011,November,,,,,,,,...,,,,,,,,,,
2,2011-10,2011,October,,,,,,,,...,,,,,,,,,,
3,2011-09,2011,September,,,,,,,,...,,,,,,,,,,
4,2011-08,2011,August,,,,,,,,...,,,,,,,,,,


In [78]:
# Fills in values

for series2 in np.arange(len(json_data2["Results"]["series"])):
    for data_point2 in np.arange(len(json_data2["Results"]["series"][series2]["data"])):
        state_unemployment_df.loc[data_point2, json_data2["Results"]["series"][series2]["seriesID"]] = json_data2["Results"]["series"][series2]["data"][data_point2]["value"]

for series3 in np.arange(len(json_data3["Results"]["series"])):
    for data_point3 in np.arange(len(json_data3["Results"]["series"][series3]["data"])):
        state_unemployment_df.loc[data_point3, json_data3["Results"]["series"][series3]["seriesID"]] = json_data3["Results"]["series"][series3]["data"][data_point3]["value"]

In [79]:
# Rename Column labels

state_unemployment_df.rename(columns={"LASST010000000000003" : "Alabama", "LASST020000000000003" : "Alaska",
                                      "LASST040000000000003" : "Arizona", "LASST050000000000003" : "Arkansas",
                                      "LASST060000000000003" : "California", "LASST080000000000003" : "Colorado",
                                      "LASST090000000000003" : "Connecticut", "LASST100000000000003" : "Delaware",
                                      "LASST110000000000003" : "District Of Columbia", "LASST120000000000003" : "Florida",
                                      "LASST130000000000003" : "Georgia", "LASST150000000000003" : "Hawaii",
                                      "LASST160000000000003" : "Idaho", "LASST170000000000003" : "Illinois",
                                      "LASST180000000000003" : "Indiana", "LASST190000000000003" : "Iowa",
                                      "LASST200000000000003" : "Kansas", "LASST210000000000003" : "Kentucky",
                                      "LASST220000000000003" : "Louisiana", "LASST230000000000003" : "Maine",
                                      "LASST240000000000003" : "Maryland", "LASST250000000000003" : "Massachusetts",
                                      "LASST260000000000003" : "Michigan", "LASST270000000000003" : "Minnesota",
                                      "LASST280000000000003" : "Mississippi", "LASST290000000000003" : "Missouri",
                                      "LASST300000000000003" : "Montana", "LASST310000000000003" : "Nebraska",
                                      "LASST320000000000003" : "Nevada", "LASST330000000000003" : "New Hampshire",
                                      "LASST340000000000003" : "New Jersey", "LASST350000000000003" : "New Mexico",
                                      "LASST360000000000003" : "New York", "LASST370000000000003" : "North Carolina",
                                      "LASST380000000000003" : "North Dakota", "LASST390000000000003" : "Ohio",
                                      "LASST400000000000003" : "Oklahoma", "LASST410000000000003" : "Oregon",
                                      "LASST420000000000003" : "Pennsylvania", "LASST720000000000003" : "Puerto Rico",
                                      "LASST440000000000003" : "Rhode Island", "LASST450000000000003" : "South Carolina",
                                      "LASST460000000000003" : "South Dakota", "LASST470000000000003" : "Tennessee",
                                      "LASST480000000000003" : "Texas", "LASST490000000000003" : "Utah",
                                      "LASST500000000000003" : "Vermont", "LASST510000000000003" : "Virginia",
                                      "LASST530000000000003" : "Washington", "LASST540000000000003" : "West Virginia",
                                      "LASST550000000000003" : "Wisconsin", "LASST560000000000003" : "Wyoming"},
                             inplace=True)

state_unemployment_df.sort_values(by="Date", ascending=True, inplace=True)

state_unemployment_df.reset_index(drop=True, inplace=True)

state_unemployment_df.to_csv("State_Unemployment_Rates.csv", index=False)

state_unemployment_df

Unnamed: 0,Date,Year,Month,Alabama,Alaska,Arizona,Arkansas,California,Colorado,Connecticut,...,South Dakota,Tennessee,Texas,Utah,Vermont,Virginia,Washington,West Virginia,Wisconsin,Wyoming
0,2008-01,2008,January,4.4,6.5,4.5,5.0,6.0,4.1,4.9,...,2.6,5.4,4.3,3.0,4.3,3.4,4.7,4.5,4.5,2.8
1,2008-02,2008,February,4.5,6.5,4.7,5.0,6.1,4.2,5.0,...,2.6,5.5,4.3,3.0,4.3,3.4,4.7,4.4,4.4,2.8
2,2008-03,2008,March,4.6,6.6,5.0,4.9,6.2,4.3,5.1,...,2.6,5.6,4.3,3.1,4.3,3.4,4.8,4.3,4.3,2.8
3,2008-04,2008,April,4.8,6.6,5.3,5.0,6.5,4.4,5.2,...,2.7,5.8,4.4,3.2,4.4,3.5,4.9,4.2,4.3,2.8
4,2008-05,2008,May,5.1,6.7,5.6,5.0,6.7,4.5,5.3,...,2.8,6.1,4.5,3.2,4.4,3.7,5.0,4.1,4.4,2.8
5,2008-06,2008,June,5.3,6.7,5.9,5.2,7.0,4.7,5.5,...,2.9,6.4,4.6,3.3,4.5,3.8,5.2,4.1,4.5,2.9
6,2008-07,2008,July,5.6,6.7,6.3,5.3,7.3,4.9,5.7,...,3.1,6.7,4.8,3.4,4.6,3.9,5.4,4.0,4.7,2.9
7,2008-08,2008,August,5.9,6.7,6.6,5.5,7.6,5.0,5.9,...,3.2,6.9,4.9,3.6,4.7,4.0,5.5,4.1,4.9,3.1
8,2008-09,2008,September,6.2,6.8,6.9,5.8,7.9,5.2,6.1,...,3.4,7.2,5.1,3.7,4.8,4.2,5.7,4.2,5.2,3.2
9,2008-10,2008,October,6.7,6.8,7.3,6.1,8.3,5.4,6.3,...,3.5,7.5,5.3,3.9,5.1,4.4,6.0,4.4,5.5,3.3


## Unemployment level

In [80]:
# Pull unemploymend level data from Bureau of Labor Statistics

url = "https://api.bls.gov/publicAPI/v2/timeseries/data/"
headers = {"content-type": "application/json"}

target_series = ["LNS13000000", "LNS13023621", "LNS13023653", "LNS13025699", "LNS13023705", "LNS13023557", "LNS13023569"]




parameters = json.dumps({"seriesid": target_series,
             "startyear": start_year,
             "endyear": end_year,
             "registrationkey": bls_api_key})

p = requests.post(url, data=parameters, headers=headers)

json_data4 = json.loads(p.text)

In [81]:
years = []
months = []
date = []

for data_point in np.arange(len(json_data4["Results"]["series"][0]["data"])):
        years.append(json_data4["Results"]["series"][0]["data"][data_point]["year"])
        months.append(json_data4["Results"]["series"][0]["data"][data_point]["periodName"])
        date.append(f"{json_data['Results']['series'][0]['data'][data_point]['year']}-{json_data['Results']['series'][0]['data'][data_point]['period'][1:]}")

In [82]:
# Create DataFrame

layoffs_df = pd.DataFrame({"Date": date,
                           "years": years,
                           "months": months,
                           "LNS13000000": "",
                           "LNS13023621": "",
                           "LNS13023653": "",
                           "LNS13025699": "",
                           "LNS13023705": "",
                           "LNS13023557": "",
                           "LNS13023569": ""})
layoffs_df.head()

Unnamed: 0,Date,years,months,LNS13000000,LNS13023621,LNS13023653,LNS13025699,LNS13023705,LNS13023557,LNS13023569
0,2011-12,2011,December,,,,,,,
1,2011-11,2011,November,,,,,,,
2,2011-10,2011,October,,,,,,,
3,2011-09,2011,September,,,,,,,
4,2011-08,2011,August,,,,,,,


In [83]:
# Fills in values

for series4 in np.arange(len(json_data4["Results"]["series"])):
    for data_point4 in np.arange(len(json_data4["Results"]["series"][series4]["data"])):
        layoffs_df.loc[data_point4, json_data4["Results"]["series"][series4]["seriesID"]] = json_data4["Results"]["series"][series4]["data"][data_point4]["value"]


In [84]:
# Rename column labels

layoffs_df.rename(columns={"LNS13000000": "Unemployment level, 16yo and over, thousands",
                           "LNS13023621": "Job losers, 16yo and over, thousands",
                           "LNS13023653": "Job losers on layoff, 16yo and over, thousands",
                           "LNS13025699": "Job losers not on layoff, 16yo and over, thousands",
                           "LNS13023705": "Job leavers (quit), 16yo and over, thousands",
                           "LNS13023557": "Reentrants to labor force, 16yo and over, thousands",
                           "LNS13023569": "New entrants to labor force, 16yo and over, thousands"},
                 inplace=True)

layoffs_df.sort_values(by="Date", ascending=True, inplace=True)

layoffs_df.reset_index(drop=True, inplace=True)

layoffs_df.to_csv("Unemployment_Levels.csv", index=False)

layoffs_df

Unnamed: 0,Date,years,months,"Unemployment level, 16yo and over, thousands","Job losers, 16yo and over, thousands","Job losers on layoff, 16yo and over, thousands","Job losers not on layoff, 16yo and over, thousands","Job leavers (quit), 16yo and over, thousands","Reentrants to labor force, 16yo and over, thousands","New entrants to labor force, 16yo and over, thousands"
0,2008-01,2008,January,7685,3851,1066,2785,823,2203,677
1,2008-02,2008,February,7497,3894,993,2901,796,2129,656
2,2008-03,2008,March,7822,4119,1074,3045,796,2121,704
3,2008-04,2008,April,7637,4067,1109,2958,847,2119,625
4,2008-05,2008,May,8395,4252,1071,3181,868,2493,797
5,2008-06,2008,June,8575,4390,1092,3298,859,2533,786
6,2008-07,2008,July,8937,4539,1074,3465,880,2662,835
7,2008-08,2008,August,9438,4942,1246,3696,1019,2655,821
8,2008-09,2008,September,9494,5257,1345,3912,995,2599,815
9,2008-10,2008,October,10074,5706,1325,4381,931,2623,819


## COVID cases

In [85]:
url = "https://covidtracking.com/api/us/daily"

response = requests.get(url).json()

# print(json.dumps(response, indent=True))

In [86]:
dates = []
cum_positives = []
cum_deaths = []
inc_positives = []
inc_deaths = []

In [87]:
for day in np.arange(len(response)):
    dates.append(response[day]["date"])
    cum_positives.append(response[day]["positive"])
    cum_deaths.append(response[day]["death"])
    inc_positives.append(response[day]["positiveIncrease"])
    inc_deaths.append(response[day]["deathIncrease"])

In [88]:
sick_df = pd.DataFrame({"Date" : dates,
                       "Total Cases" : cum_positives,
                       "Total Deaths" : cum_deaths,
                       "Daily increase in Cases" : inc_positives,
                       "Daily increase in Deaths" : inc_deaths})

sick_df.loc[len(inc_positives)-1, "Daily increase in Deaths"] = sick_df.loc[len(cum_positives)-1, "Total Deaths"]
sick_df.loc[len(inc_deaths)-1, "Daily increase in Cases"] = sick_df.loc[len(cum_deaths)-1, "Total Cases"]

sick_df["Date"] = sick_df["Date"].astype(str)

In [89]:
for x in np.arange(len(dates)):
    sick_df.loc[x,"Date"] = f"{sick_df.loc[x,'Date'][0:4]}-{sick_df.loc[x,'Date'][4:6]}-{sick_df.loc[x,'Date'][6:]}"

sick_df.sort_values(by="Date", ascending=True, inplace=True)

sick_df.reset_index(drop=True, inplace=True)

In [90]:
sick_df.to_csv("COVID_cases.csv", index=False)
sick_df

Unnamed: 0,Date,Total Cases,Total Deaths,Daily increase in Cases,Daily increase in Deaths
0,2020-02-28,9,4,9.0,4.0
1,2020-02-29,18,5,9.0,1.0
2,2020-03-01,31,8,13.0,3.0
3,2020-03-02,35,11,4.0,3.0
4,2020-03-03,60,14,25.0,3.0
5,2020-03-04,157,16,97.0,2.0
6,2020-03-05,204,20,47.0,4.0
7,2020-03-06,296,26,92.0,6.0
8,2020-03-07,422,27,126.0,1.0
9,2020-03-08,563,31,141.0,4.0


## Economic Indicators

In [91]:
# csv files are downloaded from Yahoo Finance

file_path1 = "Resources/2020_GSPC.csv"
file_path2 = "Resources/2020_DJI.csv"
file_path3 = "Resources/2020_IXIC.csv"
file_path4 = "Resources/2020_RUT.csv"

SP500_df = pd.read_csv(file_path1)
Dow_df = pd.read_csv(file_path2)
NASDAQ_df = pd.read_csv(file_path3)
Russell2000_df = pd.read_csv(file_path4)

In [92]:
SP500_df = SP500_df[["Date", "Close", "Volume"]]
Dow_df = Dow_df[["Date", "Close", "Volume"]]
NASDAQ_df = NASDAQ_df[["Date", "Close", "Volume"]]
Russell2000_df = Russell2000_df[["Date", "Close", "Volume"]]

In [93]:
eco_ind_df = pd.merge(SP500_df, Dow_df, on="Date", how="outer", suffixes=["_SP500", "_Dow_Jones_Industrial_Average"])
eco_ind_2_df = pd.merge(NASDAQ_df, Russell2000_df, on="Date", how="outer", suffixes=["_NASDAQ", "_Russell2000"])
eco_ind_df = pd.merge(eco_ind_df, eco_ind_2_df, on="Date", how="outer")

eco_ind_df.sort_values(by="Date", ascending=True, inplace=True)

eco_ind_df

Unnamed: 0,Date,Close_SP500,Volume_SP500,Close_Dow_Jones_Industrial_Average,Volume_Dow_Jones_Industrial_Average,Close_NASDAQ,Volume_NASDAQ,Close_Russell2000,Volume_Russell2000
0,2019-10-14,2966.149902,2557020000,26787.359375,178620000,8048.649902,1419730000,1505.430054,25570200
1,2019-10-15,2995.679932,3340740000,27024.800781,245510000,8148.709961,1836650000,1523.300049,33407400
2,2019-10-16,2989.689941,3222570000,27001.980469,214660000,8124.180176,1886720000,1525.060059,32225700
3,2019-10-17,2997.949951,3115960000,27025.880859,222540000,8156.850098,1861570000,1541.839966,31159600
4,2019-10-18,2986.199951,3264290000,26770.199219,288970000,8089.540039,2012930000,1535.479980,32642900
...,...,...,...,...,...,...,...,...,...
119,2020-04-03,2488.649902,6087190000,21052.529297,450010000,7373.080078,3279100000,1052.050049,60871900
120,2020-04-06,2663.679932,6391860000,22679.990234,610760000,7913.240234,3849100000,1138.780029,63918600
121,2020-04-07,2659.409912,7040720000,22653.859375,594660000,7887.259766,4069410000,1139.170044,70407200
122,2020-04-08,2749.979980,5856370000,23433.570313,472740000,8090.899902,3470730000,1191.660034,58563700


In [94]:
eco_ind_df.to_csv("Eco_Indicators.csv", index=False)

In [95]:
COVID_econ_df = pd.merge(sick_df, eco_ind_df, on="Date", how="inner")
COVID_econ_df = COVID_econ_df.drop(["Daily increase in Cases", "Daily increase in Deaths"], axis=1)
COVID_econ_df.to_csv("COVID_econ.csv")
COVID_econ_df

Unnamed: 0,Date,Total Cases,Total Deaths,Close_SP500,Volume_SP500,Close_Dow_Jones_Industrial_Average,Volume_Dow_Jones_Industrial_Average,Close_NASDAQ,Volume_NASDAQ,Close_Russell2000,Volume_Russell2000
0,2020-02-28,9,4,2954.219971,8563850000,25409.359375,915990000,8567.370117,5301170000,1476.430054,85638500
1,2020-03-02,35,11,3090.22998,6376400000,26703.320313,637200000,8952.169922,4232760000,1518.48999,63764000
2,2020-03-03,60,14,3003.370117,6355940000,25917.410156,647080000,8684.089844,4336700000,1486.079956,63559400
3,2020-03-04,157,16,3130.120117,5035480000,27090.859375,457590000,9018.089844,3602870000,1531.199951,50354800
4,2020-03-05,204,20,3023.939941,5575550000,26121.279297,477370000,8738.589844,3748090000,1478.819946,55755500
5,2020-03-06,296,26,2972.370117,6552140000,25864.779297,599780000,8575.620117,4279850000,1449.219971,65521400
6,2020-03-09,794,35,2746.560059,8423050000,23851.019531,750430000,7950.680176,4530350000,1313.439941,84230500
7,2020-03-10,1007,37,2882.22998,7635960000,25018.160156,654860000,8344.25,4431930000,1350.900024,76359600
8,2020-03-11,1262,43,2741.379883,7374110000,23553.220703,663960000,7952.049805,4273890000,1264.300049,73741100
9,2020-03-12,1521,51,2480.639893,8829380000,21200.619141,908260000,7201.799805,5066530000,1122.930054,88293800
