In [1]:
import pandas as pd
import requests, json
from config import fred_api_key, quandl_api_key, eia_api_key
import datetime as dt
import pprint
import os
import matplotlib as plt

In [2]:
# FRED access and criteria
consumer_address = f"https://api.stlouisfed.org/fred/series/observations?series_id=UMCSENT&frequency=q&api_key={fred_api_key}&file_type=json"
deficit_address = f"https://api.stlouisfed.org/fred/series/observations?series_id=FYFSD&frequency=a&api_key={fred_api_key}&file_type=json"
velocity_address = f"https://api.stlouisfed.org/fred/series/observations?series_id=M2V&frequency=q&api_key={fred_api_key}&file_type=json"
recession_address = f"https://api.stlouisfed.org/fred/series/observations?series_id=JHDUSRGDPBR&frequency=q&api_key={fred_api_key}&file_type=json"

# Quandl access and criteria
copper_address = f"https://www.quandl.com/api/v3/datasets/CHRIS/CME_HG1?api_key={quandl_api_key}"

#EIA access and criteria
# category_id = '293798'
oil_inv_address = f"http://api.eia.gov/series/?series_id=PET.RCRR01NUS_1.A&api_key={eia_api_key}&out=json"


In [3]:
# FRED jsons
consumer_json = requests.get(consumer_address).json()
deficit_json = requests.get(deficit_address).json()
velocity_json = requests.get(velocity_address).json()
recession_json = requests.get(recession_address).json()

# Quandl json
copper_json = requests.get(copper_address).json()

# EIA json
oil_inv_json = requests.get(oil_inv_address).json()

In [4]:
# Quandl cleaning headers for dataframe
copper_headers = copper_json["dataset"]["column_names"]
clean_copper_headers = []
for header in copper_headers:
    header = header.lower().replace(" ", "_")
    clean_copper_headers.append(header)

In [5]:
# FRED data frame clean and create
consumer_df = pd.DataFrame(consumer_json["observations"])
consumer_df = consumer_df[consumer_df.value != '.']
consumer_df = consumer_df.reset_index()
consumer_df = consumer_df.drop(columns = ["index","realtime_end","realtime_start"], axis=1)

deficit_df = pd.DataFrame(deficit_json["observations"])
deficit_df = deficit_df.drop(["realtime_end", "realtime_start"], axis=1)
deficit_df = deficit_df.rename(index=str, columns={"value" : "deficit"})

velocity_df = pd.DataFrame(velocity_json["observations"])
velocity_df = velocity_df.drop(["realtime_end", "realtime_start"], axis=1)
velocity_df = velocity_df.rename(index=str, columns={"value" : "velocity"})

recession_df = pd.DataFrame(recession_json["observations"])
recession_df = recession_df.drop(["realtime_end", "realtime_start"], axis=1)
recession_df = recession_df.rename(index=str, columns={"value" : "recession"})

# Quandl data frame
copper_df = pd.DataFrame(copper_json["dataset"]["data"], columns = clean_copper_headers)

# EIA data frame
oil_inv_df = pd.DataFrame(oil_inv_json['series'][0]['data'], columns = ['date', 'oil_inv'])

# Adding Regression Data
regression_data = pd.read_csv('../data/RegressionData.csv')
regression_data = regression_data.drop("Unnamed: 0", axis=1)
regression_data = regression_data.rename(columns = {'Date' : 'date'})


In [24]:
# FRED integer and datetime changer
for index, row in consumer_df.iterrows():
    row.date = dt.date.fromisoformat(row.date)
    row.value = float(row.value)
    
for index, row in deficit_df.iterrows():
    row.date = dt.date.fromisoformat(row.date)
    row.deficit = float(row.deficit)
    
for index, row in velocity_df.iterrows():
    row.date = dt.date.fromisoformat(row.date)
    row.velocity = float(row.velocity)

for index, row in recession_df.iterrows():
    row.date = dt.date.fromisoformat(row.date)
    row.recession = float(row.recession)
    
# Regression Data to datetime
for index, row in regression_data.iterrows():
    regression_data['date'][index] = dt.date.fromisoformat(row.date)

TypeError: fromisoformat: argument must be str

In [7]:
# Quandl integer and datetime changer
for index, row in copper_df.iterrows():
    row.date = dt.date.fromisoformat(row.date)
    mon = row.date.month
    if mon >= 1 and mon <= 3:
        mon = 1
    elif mon >= 4 and mon <= 6:
        mon = 4
    elif mon >= 7 and mon <= 9:
        mon = 7
    elif mon >= 10 and mon <= 12:
        mon = 10
    copper_df.date[index] = row.date.replace(month = mon , day = 1)
    
copper_df = copper_df.groupby(["date"]).mean()
copper_df = copper_df.reset_index()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  del sys.path[0]


In [8]:
#EIA integer and datetime changer
for index, row in oil_inv_df.iterrows():
    date_list = list(str(row.date))
    date_list.extend(list('-01-01'))
    new_date = ''.join(date_list)
    row.oil_inv = float(row.oil_inv)
    oil_inv_df.date[index] = dt.date.fromisoformat(new_date)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys


In [9]:
# Creating new df with quarterly data
quarter_deficit_list = []
for index, row in deficit_df.iterrows():
    date = row.date
    q1 = {"date" : date.replace(month = 1, day = 1), "deficit": row.deficit}
    q2 = {"date" : date.replace(month = 4, day = 1), "deficit": row.deficit}
    q3 = {"date" : date.replace(month = 7, day = 1), "deficit": row.deficit}
    q4 = {"date" : date.replace(month = 10, day = 1), "deficit": row.deficit}
    quarter_deficit_list.append(q1)
    quarter_deficit_list.append(q2)
    quarter_deficit_list.append(q3)
    quarter_deficit_list.append(q4)
quarter_deficit_df = pd.DataFrame(quarter_deficit_list)

In [10]:
# Creating new df with quarterly data
quarter_oil_inv_list = []
for index, row in oil_inv_df.iterrows():
    date = row.date
    q1 = {"date" : date.replace(month = 1, day = 1), "oil_inv": row.oil_inv}
    q2 = {"date" : date.replace(month = 4, day = 1), "oil_inv": row.oil_inv}
    q3 = {"date" : date.replace(month = 7, day = 1), "oil_inv": row.oil_inv}
    q4 = {"date" : date.replace(month = 10, day = 1), "oil_inv": row.oil_inv}
    quarter_oil_inv_list.append(q1)
    quarter_oil_inv_list.append(q2)
    quarter_oil_inv_list.append(q3)
    quarter_oil_inv_list.append(q4)
quarter_oil_inv_df = pd.DataFrame(quarter_oil_inv_list)

In [25]:
# Regression Data to datetime
for index, row in regression_data.iterrows():
    regression_data['date'][index] = dt.date.fromisoformat(row.date)
regression_data.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,date,Contraction,Unemployment,One Year Bond,Ten Year Bond,GDP,Discount Rate
0,1967-10-01,0.0,3.9,5.56,5.64,4615.853,4.23
1,1968-01-01,0.0,3.7,5.47,5.61,4709.993,4.55
2,1968-04-01,0.0,3.6,5.94,5.74,4788.688,5.4
3,1968-07-01,0.0,3.5,5.51,5.46,4825.799,5.41
4,1968-10-01,0.0,3.4,5.84,5.77,4844.779,5.29


In [26]:
type(regression_data["date"][0])

datetime.date

In [27]:
combined_df = consumer_df.merge(copper_df, on="date")
combined_df = combined_df.drop(['open', 'high', 'low', 'last', 'change', 'volume', "previous_day_open_interest"], axis=1)
combined_df = combined_df.rename(index=str, columns={"value": "consumer_sentiment", "settle": "copper_price"})
combined_df = combined_df.merge(quarter_deficit_df, on="date")
combined_df = combined_df.merge(velocity_df, on="date")
combined_df = combined_df.merge(quarter_oil_inv_df, on="date")
# combined_df = combined_df.merge(recession_df, on="date")
combined_df = combined_df.merge(regression_data, on='date')
combined_df.head()

Unnamed: 0,date,consumer_sentiment,copper_price,deficit,velocity,oil_inv,Contraction,Unemployment,One Year Bond,Ten Year Bond,GDP,Discount Rate
0,1960-04-01,93.3,0.311057,301.0,1.797,31613,1.0,5.2,3.87,4.26,3258.088,3.88
1,1960-07-01,97.2,0.303747,301.0,1.78,31613,1.0,5.5,3.07,3.83,3274.029,3.23
2,1960-10-01,90.1,0.282263,301.0,1.737,31613,1.0,6.3,2.99,3.89,3232.009,3.0
3,1961-01-01,91.6,0.281681,-3335.0,1.723,31759,0.0,6.8,2.87,3.79,3253.826,3.0
4,1961-04-01,92.5,0.306175,-3335.0,1.724,31759,0.0,7.0,2.94,3.79,3309.059,3.0


In [28]:
combined_df.to_csv(path_or_buf='../data/combined_df.csv',index=False)

In [15]:
# combined_df.plot(x="date",y="consumer_sentiment", kind="line", figsize=(30,10))

In [16]:
# combined_df.plot(x="date", y="copper_price", kind="line", figsize=(30,10))

In [17]:
# combined_df.plot(x="date", y="deficit", kind="line", figsize=(30,10))

In [18]:
# combined_df.plot(x="date", y="velocity", kind="line", figsize=(30,10))

In [19]:
# combined_df.plot(x="date", y="oil_inv", kind="line", figsize=(30,10))