# ETL Project - Benjamin Aubry, Gary Fisher, and Bruce Mark

## Instructions for Importing Quandl Data Function:
#### Install Code: pip install quandl
#### Dependencies: import quandl
##### Reference: https://docs.quandl.com/docs/python-installation

## Instructions for Importing Fed Data Function:
#### Install Code: pip install datapungi_fed
#### Dependencies: import datapungi_fed as dpf
##### Reference: https://github.com/jjotterson/datapungi_fed

In [1]:
#Import Dependencies
import pandas as pd
import pymongo
import numpy as np
import requests
import sys
import os.path
import datetime as dt
import pprint

import quandl
import datapungi_fed as dpf

from quandl_apitoken import api_key
from api_keys import fred_api_key
from bs4 import BeautifulSoup as bs
from pymongo import MongoClient

## Extract and Transform

### Quandl

In [2]:
#Quandl API call- pull copper and gold and convert to csv

try:
    copper_download = quandl.get("CHRIS/CME_HG2", authtoken=api_key)
    gold_download = quandl.get("CHRIS/CME_GC2", authtoken=api_key)
    copper_download.to_csv("data_export_files/copper_2nd.csv")
    gold_download.to_csv("data_export_files/gold_2nd.csv")

except Exception as e:
    print(e)

In [3]:
try:
    silver_download = quandl.get("CHRIS/CME_SI2", authtoken=api_key)
    crude_download = quandl.get("CHRIS/CME_CL2", authtoken=api_key)
    silver_download.to_csv("data_export_files/silver_2nd.csv")
    crude_download.to_csv("data_export_files/crude_2nd.csv")

except Exception as e:
    print(e)

In [4]:
try:
    natgas_download = quandl.get("CHRIS/CME_NG2", authtoken=api_key)
    corn_download = quandl.get("CHRIS/CME_C2", authtoken=api_key)
    natgas_download.to_csv("data_export_files/natgas_2nd.csv")
    corn_download.to_csv("data_export_files/corn_2nd.csv")

except Exception as e:
    print(e)

In [5]:
try:
    livecatt_download = quandl.get("CHRIS/CME_LC2", authtoken=api_key)
    wheat_download = quandl.get("CHRIS/CME_KW2", authtoken=api_key)
    livecatt_download.to_csv("data_export_files/livecatt_2nd.csv")
    wheat_download.to_csv("data_export_files/K_Wheat_2nd.csv")

except Exception as e:
    print(e)

In [6]:
csv_file = os.path.join("data_export_files/copper_2nd.csv")
copper_raw_df = pd.read_csv(csv_file)

copper_df = copper_raw_df[["Date", "Open", "High", "Low","Settle","Volume"]].copy()
copper_df.head()

Unnamed: 0,Date,Open,High,Low,Settle,Volume
0,1959-10-29,0.324,0.324,0.319,0.324,79.0
1,1959-10-30,0.3215,0.3278,0.321,0.3215,100.0
2,1959-11-02,0.323,0.323,0.318,0.323,88.0
3,1959-11-04,0.33,0.3307,0.3259,0.33,135.0
4,1959-11-05,0.331,0.334,0.331,0.331,161.0


In [7]:
csv_file = os.path.join("data_export_files/gold_2nd.csv")
gold_raw_df = pd.read_csv(csv_file)

gold_df = gold_raw_df[["Date", "Open", "High", "Low","Settle","Volume"]].copy()
gold_df.head()

Unnamed: 0,Date,Open,High,Low,Settle,Volume
0,1974-12-31,195.4,195.4,185.7,185.7,879.0
1,1975-01-02,186.0,187.5,176.0,178.2,868.0
2,1975-01-03,175.5,178.5,172.5,177.5,633.0
3,1975-01-06,175.0,177.2,169.0,176.8,1026.0
4,1975-01-07,173.0,176.5,171.5,175.8,1065.0


In [8]:
csv_file = os.path.join("data_export_files/silver_2nd.csv")
silver_raw_df = pd.read_csv(csv_file)

silver_df = silver_raw_df[["Date", "Open", "High", "Low","Settle","Volume"]].copy()
silver_df.head()

Unnamed: 0,Date,Open,High,Low,Settle,Volume
0,1964-09-30,1.318,1.318,1.318,1.318,1.0
1,1964-10-01,1.319,1.319,1.319,1.319,0.0
2,1964-10-02,1.316,1.316,1.316,1.316,0.0
3,1964-10-05,1.316,1.316,1.316,1.316,5.0
4,1964-10-06,1.319,1.319,1.319,1.319,14.0


In [9]:
csv_file = os.path.join("data_export_files/crude_2nd.csv")
crude_raw_df = pd.read_csv(csv_file)

crude_df = crude_raw_df[["Date", "Open", "High", "Low","Settle","Volume"]].copy()
crude_df.head()

Unnamed: 0,Date,Open,High,Low,Settle,Volume
0,1983-03-30,29.1,29.4,29.1,29.35,471.0
1,1983-03-31,29.3,29.3,29.12,29.24,399.0
2,1983-04-04,29.23,29.4,29.23,29.25,185.0
3,1983-04-05,29.4,29.7,29.4,29.54,164.0
4,1983-04-06,29.7,29.85,29.5,29.68,257.0


In [10]:
csv_file = os.path.join("data_export_files/natgas_2nd.csv")
natgas_raw_df = pd.read_csv(csv_file)

natgas_df = natgas_raw_df[["Date", "Open", "High", "Low","Settle","Volume"]].copy()
natgas_df.head()

Unnamed: 0,Date,Open,High,Low,Settle,Volume
0,1990-04-03,1.59,1.68,1.59,1.65,75.0
1,1990-04-04,1.63,1.65,1.595,1.635,141.0
2,1990-04-05,1.625,1.65,1.625,1.635,69.0
3,1990-04-06,1.64,1.645,1.625,1.635,28.0
4,1990-04-09,1.62,1.645,1.615,1.645,35.0


In [11]:
csv_file = os.path.join("data_export_files/corn_2nd.csv")
corn_raw_df = pd.read_csv(csv_file)

corn_df = corn_raw_df[["Date", "Open", "High", "Low","Settle","Volume"]].copy()
corn_df.head()

Unnamed: 0,Date,Open,High,Low,Settle,Volume
0,1959-07-01,114.6,114.6,114.2,114.3,2528.0
1,1959-07-02,114.2,114.4,113.7,114.4,2428.0
2,1959-07-06,114.1,114.2,113.4,113.6,2842.0
3,1959-07-07,113.5,114.6,113.5,114.3,2871.0
4,1959-07-08,114.3,114.3,113.4,113.4,2189.0


In [12]:
csv_file = os.path.join("data_export_files/livecatt_2nd.csv")
livecatt_raw_df = pd.read_csv(csv_file)

livecatt_df = livecatt_raw_df[["Date", "Open", "High", "Low","Settle","Volume"]].copy()
livecatt_df.head()

Unnamed: 0,Date,Open,High,Low,Settle,Volume
0,1964-11-30,24.5,24.6,23.75,24.0,38.0
1,1964-12-01,23.8,23.9,23.6,23.8,23.0
2,1964-12-02,23.75,23.8,23.7,23.7,6.0
3,1964-12-03,23.7,23.75,23.5,23.6,13.0
4,1964-12-04,23.7,23.75,23.6,23.65,14.0


In [13]:
csv_file = os.path.join("data_export_files/K_Wheat_2nd.csv")
k_wheat_raw_df = pd.read_csv(csv_file)

k_wheat_df = k_wheat_raw_df[["Date", "Open", "High", "Low","Settle","Volume"]].copy()
k_wheat_df.head()

Unnamed: 0,Date,Open,High,Low,Settle,Volume
0,1976-04-09,378.0,381.0,378.0,379.25,530.0
1,1976-04-12,375.0,376.5,374.0,376.0,790.0
2,1976-04-13,374.0,374.0,372.5,372.75,475.0
3,1976-04-14,372.0,375.75,371.5,373.5,610.0
4,1976-04-15,375.5,385.0,375.5,381.5,755.0


### St. Louis Federal Reserve

In [14]:
data = dpf.data(fred_api_key)

In [15]:
keys = ["DBAA", "BAMLH0A3HYCEY", "USD3MTD156N", "DGS10", "T10Y2Y", "gdp", "gnp"]


df0 = data.series("AAA")

for key in keys:
    df1 = data.series(key)
    df0 = pd.merge(df0, df1, on="date", how="outer")
      
df0 = df0.reset_index()
df0.head()

Unnamed: 0,date,AAA,DBAA,BAMLH0A3HYCEY,USD3MTD156N,DGS10,T10Y2Y,gdp,gnp
0,1919-01-01,5.35,,,,,,,
1,1919-02-01,5.35,,,,,,,
2,1919-03-01,5.39,,,,,,,
3,1919-04-01,5.44,,,,,,,
4,1919-05-01,5.39,,,,,,,


In [16]:
master_df = pd.DataFrame({
    "Date": df0["date"],
    "AAA Corp Yield": df0["AAA"],
    "BBB Corp Yield": df0["DBAA"],
    "CCC Corp High Yield": df0["BAMLH0A3HYCEY"],
    "3-Month LIBOR": df0["USD3MTD156N"],
    "10-Yr Treas Yld": df0["DGS10"],
    "10Yr - 2Yr Treas Yld": df0["T10Y2Y"],
    "US GDP": df0["gdp"],
    "US GNP": df0["gnp"]
})

master_df = master_df.sort_values(by=["Date"])
master_df.head(5)

Unnamed: 0,Date,AAA Corp Yield,BBB Corp Yield,CCC Corp High Yield,3-Month LIBOR,10-Yr Treas Yld,10Yr - 2Yr Treas Yld,US GDP,US GNP
0,1919-01-01,5.35,,,,,,,
1,1919-02-01,5.35,,,,,,,
2,1919-03-01,5.39,,,,,,,
3,1919-04-01,5.44,,,,,,,
4,1919-05-01,5.39,,,,,,,


In [17]:
column_value = master_df[master_df["Date"] < "2019-04-01"].index
master_df.drop(column_value, inplace=True)
master_df.set_index("Date", inplace=True)
master_df.head()

Unnamed: 0_level_0,AAA Corp Yield,BBB Corp Yield,CCC Corp High Yield,3-Month LIBOR,10-Yr Treas Yld,10Yr - 2Yr Treas Yld,US GDP,US GNP
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2019-04-01,3.69,4.73,11.51,2.5955,2.49,0.16,21340.267,21641.264
2019-04-02,,4.72,11.5,2.60238,2.48,0.18,,
2019-04-03,,4.75,11.41,2.59775,2.52,0.19,,
2019-04-04,,4.73,11.37,2.58863,2.51,0.18,,
2019-04-05,,4.72,11.3,2.59213,2.5,0.15,,


### Forexfactory

In [18]:
url = 'https://www.forexfactory.com/calendar?month=last'

In [19]:
response = requests.get(url)

In [20]:
data = response.text
soup = bs(data, 'lxml')

In [21]:
# Searching for elements in the table
table = soup.find('table', class_='calendar__table')
print(table.prettify())

<table class="calendar__table">
 <thead>
  <tr class="calendar__header--desktop subhead">
   <th class="calendar__date">
    Date
   </th>
   <th class="calendar__time">
    <a href="timezone.php" title="Time Options">
     2:45pm
    </a>
   </th>
   <th class="calendar__currency">
    Currency
   </th>
   <th class="calendar__impact">
    Impact
   </th>
   <th class="calendar__event">
   </th>
   <th class="calendar__detail">
    Detail
   </th>
   <th class="calendar__actual">
    Actual
   </th>
   <th class="calendar__forecast">
    Forecast
   </th>
   <th class="calendar__previous">
    Previous
   </th>
   <th class="calendar__graph">
    Graph
   </th>
  </tr>
  <tr class="calendar__header--mobile subhead">
   <th colspan="4">
    <a class="calendar__header-time" href="timezone.php" title="Time Options">
     2:45pm
    </a>
   </th>
   <th>
    Actual
   </th>
  </tr>
 </thead>
 <tr class="calendar__borderfix borderfix">
  <td>
  </td>
 </tr>
 <tr class="calendar__row calend

In [22]:
# Looping through the calendar table
list_of_rows = []


for row in table.find_all('tr', {'data-eventid':True}):
    list_of_cells = []
    
    #Filtering high-impact events
    for cell in row.find_all('td', class_=[
          'calendar__cell calendar__date date',
          'calendar__cell calendar__currency currency', 
          'calendar__cell calendar__event event', 
          'calendar__cell calendar__actual actual', 
          'calendar__cell calendar__forecast forecast', 
          'calendar__cell calendar__previous previous']):
            
        list_of_cells.append(cell.text)
    list_of_rows.append(list_of_cells)


In [23]:
df_calendar = pd.DataFrame(list_of_rows, columns=['Date','Country','Event','Actual','Forecast','Previous'])
df_calendar.iloc[:,1] = df_calendar.iloc[:,1].str.split('\n').str[1]

df_calendar = df_calendar.reset_index()
df_calendar = df_calendar.set_index(df_calendar.columns[1])
df_calendar = df_calendar.drop(columns=['index'])
df_calendar

Unnamed: 0_level_0,Country,Event,Actual,Forecast,Previous
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
WedApr 1,AUD,Commodity Prices y/y,-10.2%,,-6.0%
,,,,,
,EUR,German Retail Sales m/m,1.2%,0.1%,1.0%
,,,,,
,EUR,Spanish Manufacturing PMI,45.7,44.0,50.4
...,...,...,...,...,...
,,,,,
,JPY,Final Manufacturing PMI,41.9,43.7,43.7
,,,,,
,AUD,PPI q/q,0.2%,,0.3%


In [24]:
df_calendar

Unnamed: 0_level_0,Country,Event,Actual,Forecast,Previous
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
WedApr 1,AUD,Commodity Prices y/y,-10.2%,,-6.0%
,,,,,
,EUR,German Retail Sales m/m,1.2%,0.1%,1.0%
,,,,,
,EUR,Spanish Manufacturing PMI,45.7,44.0,50.4
...,...,...,...,...,...
,,,,,
,JPY,Final Manufacturing PMI,41.9,43.7,43.7
,,,,,
,AUD,PPI q/q,0.2%,,0.3%


In [25]:
new_calendar = df_calendar

In [26]:
new_calendar = new_calendar.dropna(how='all')
new_calendar

Unnamed: 0_level_0,Country,Event,Actual,Forecast,Previous
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
WedApr 1,AUD,Commodity Prices y/y,-10.2%,,-6.0%
,EUR,German Retail Sales m/m,1.2%,0.1%,1.0%
,EUR,Spanish Manufacturing PMI,45.7,44.0,50.4
,CHF,Manufacturing PMI,43.7,42.3,49.5
,EUR,Italian Manufacturing PMI,40.3,41.1,48.7
...,...,...,...,...,...
,JPY,Tokyo Core CPI y/y,-0.1%,0.1%,0.4%
,JPY,Monetary Policy Meeting Minutes,,,
,CNY,Bank Holiday,,,
,JPY,Final Manufacturing PMI,41.9,43.7,43.7


In [27]:
# Resetting the index
new_calendar = new_calendar.reset_index()

In [28]:
# Applying the function to get rid of the day and convert the string to date
new_calendar['Date'] = [date[4:] for date in new_calendar['Date'].to_list()]

In [29]:
# Check the uniqueness of the dates
new_calendar['Date'].unique()

array(['Apr 1 ', '', 'Apr 2 ', 'Apr 3 ', 'Apr 4 ', 'Apr 5 ', 'Apr 6 ',
       'Apr 7 ', 'Apr 8 ', 'Apr 9 ', 'Apr 10 ', 'Apr 11 ', 'Apr 12 ',
       'Apr 13 ', 'Apr 14 ', 'Apr 15 ', 'Apr 16 ', 'Apr 17 ', 'Apr 18 ',
       'Apr 19 ', 'Apr 20 ', 'Apr 21 ', 'Apr 22 ', 'Apr 23 ', 'Apr 24 ',
       'Apr 25 ', 'Apr 26 ', 'Apr 27 ', 'Apr 28 ', 'Apr 29 ', 'Apr 30 '],
      dtype=object)

In [30]:
new_calendar

Unnamed: 0,Date,Country,Event,Actual,Forecast,Previous
0,Apr 1,AUD,Commodity Prices y/y,-10.2%,,-6.0%
1,,EUR,German Retail Sales m/m,1.2%,0.1%,1.0%
2,,EUR,Spanish Manufacturing PMI,45.7,44.0,50.4
3,,CHF,Manufacturing PMI,43.7,42.3,49.5
4,,EUR,Italian Manufacturing PMI,40.3,41.1,48.7
...,...,...,...,...,...,...
393,,JPY,Tokyo Core CPI y/y,-0.1%,0.1%,0.4%
394,,JPY,Monetary Policy Meeting Minutes,,,
395,,CNY,Bank Holiday,,,
396,,JPY,Final Manufacturing PMI,41.9,43.7,43.7


In [31]:
# Replacing blank cells with NaN
new_calendar['Date'] = new_calendar['Date'].replace( '',np.nan).fillna(method='ffill')

In [32]:
new_calendar['Date'] = new_calendar['Date'] + '20'

In [33]:
new_calendar['Date']=  pd.to_datetime(new_calendar['Date'], format='%b %d %y')

In [34]:
new_calendar

Unnamed: 0,Date,Country,Event,Actual,Forecast,Previous
0,2020-04-01,AUD,Commodity Prices y/y,-10.2%,,-6.0%
1,2020-04-01,EUR,German Retail Sales m/m,1.2%,0.1%,1.0%
2,2020-04-01,EUR,Spanish Manufacturing PMI,45.7,44.0,50.4
3,2020-04-01,CHF,Manufacturing PMI,43.7,42.3,49.5
4,2020-04-01,EUR,Italian Manufacturing PMI,40.3,41.1,48.7
...,...,...,...,...,...,...
393,2020-04-30,JPY,Tokyo Core CPI y/y,-0.1%,0.1%,0.4%
394,2020-04-30,JPY,Monetary Policy Meeting Minutes,,,
395,2020-04-30,CNY,Bank Holiday,,,
396,2020-04-30,JPY,Final Manufacturing PMI,41.9,43.7,43.7


In [35]:
new_calendar_group = new_calendar.groupby(["Date", "Country", "Event"])
#new_calendar_group.first().head(50)

In [36]:
# Setting up the index
#new_calendar.set_index('Date')

## More Transforming and Load to MongoDB

### Create MongoDB

In [37]:
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

# Define the 'Economics' database in Mongo
db = client.economics_db

In [38]:
# Declare the collections
collection_commod_copper = db.commodities_copper
collection_commod_gold = db.commodities_gold
collection_commod_silver = db.commodities_silver
collection_commod_crude = db.commodities_crude
collection_commod_natgas = db.commodities_natgas
collection_commod_corn = db.commodities_corn
collection_commod_livecatt = db.commodities_livecatt
collection_commod_wheat = db.commodities_wheat
collection_fed = db.fed_data
collection_events = db.event_data

### Quandl

In [39]:
#setindex on other df's 
copper_df = copper_df.set_index("Date")
gold_df = gold_df.set_index("Date")
silver_df = silver_df.set_index("Date")
crude_df = crude_df.set_index("Date")
natgas_df = natgas_df.set_index("Date")
corn_df = corn_df.set_index("Date")
livecatt_df = livecatt_df.set_index("Date")
k_wheat_df = k_wheat_df.set_index("Date")

In [40]:
#create additional dictionaries for mongo load
#index in () is how you order this
copper_dict = copper_df.to_dict("index")
gold_dict = gold_df.to_dict("index")
silver_dict = silver_df.to_dict("index")
crude_dict = crude_df.to_dict("index")
natgas_dict = natgas_df.to_dict("index")
corn_dict = corn_df.to_dict("index")
livecatt_dict = livecatt_df.to_dict("index")
k_wheat_dict = k_wheat_df.to_dict("index")

In [41]:
#load dict to mongo
collection_commod_copper.insert_many([{str(k):b} for k, b in copper_dict.items()])

<pymongo.results.InsertManyResult at 0x2913322efc8>

In [42]:
#load dict to mongo
collection_commod_gold.insert_many([{str(k):b} for k, b in gold_dict.items()])

<pymongo.results.InsertManyResult at 0x291333b36c8>

In [43]:
#load dict to mongo
collection_commod_silver.insert_many([{str(k):b} for k, b in silver_dict.items()])

<pymongo.results.InsertManyResult at 0x29132f55fc8>

In [44]:
#load dict to mongo
collection_commod_crude.insert_many([{str(k):b} for k, b in crude_dict.items()])

<pymongo.results.InsertManyResult at 0x291334ae348>

In [45]:
#load dict to mongo
collection_commod_natgas.insert_many([{str(k):b} for k, b in natgas_dict.items()])

<pymongo.results.InsertManyResult at 0x291334d5388>

In [46]:
#load dict to mongo
collection_commod_corn.insert_many([{str(k):b} for k, b in corn_dict.items()])

<pymongo.results.InsertManyResult at 0x291338aaa48>

In [47]:
#load dict to mongo
collection_commod_livecatt.insert_many([{str(k):b} for k, b in livecatt_dict.items()])

<pymongo.results.InsertManyResult at 0x2913398b608>

In [48]:
#load dict to mongo
collection_commod_wheat.insert_many([{str(k):b} for k, b in k_wheat_dict.items()])

<pymongo.results.InsertManyResult at 0x291336693c8>

### St. Louis Federal Reserve

In [49]:
master_df_dict = master_df.to_dict("index")

In [50]:
master_df_dict

{Timestamp('2019-04-01 00:00:00'): {'AAA Corp Yield': 3.69,
  'BBB Corp Yield': 4.73,
  'CCC Corp High Yield': 11.51,
  '3-Month LIBOR': 2.5955,
  '10-Yr Treas Yld': 2.49,
  '10Yr - 2Yr Treas Yld': 0.16,
  'US GDP': 21340.267,
  'US GNP': 21641.264},
 Timestamp('2019-04-02 00:00:00'): {'AAA Corp Yield': nan,
  'BBB Corp Yield': 4.72,
  'CCC Corp High Yield': 11.5,
  '3-Month LIBOR': 2.60238,
  '10-Yr Treas Yld': 2.48,
  '10Yr - 2Yr Treas Yld': 0.18,
  'US GDP': nan,
  'US GNP': nan},
 Timestamp('2019-04-03 00:00:00'): {'AAA Corp Yield': nan,
  'BBB Corp Yield': 4.75,
  'CCC Corp High Yield': 11.41,
  '3-Month LIBOR': 2.59775,
  '10-Yr Treas Yld': 2.52,
  '10Yr - 2Yr Treas Yld': 0.19,
  'US GDP': nan,
  'US GNP': nan},
 Timestamp('2019-04-04 00:00:00'): {'AAA Corp Yield': nan,
  'BBB Corp Yield': 4.73,
  'CCC Corp High Yield': 11.37,
  '3-Month LIBOR': 2.58863,
  '10-Yr Treas Yld': 2.51,
  '10Yr - 2Yr Treas Yld': 0.18,
  'US GDP': nan,
  'US GNP': nan},
 Timestamp('2019-04-05 00:00:00')

In [51]:
collection_fed.insert_many([{str(k):b} for k, b in master_df_dict.items()])

<pymongo.results.InsertManyResult at 0x2912bb43688>

In [52]:
for doc in collection_fed.find():
    print(doc)

{'_id': ObjectId('5eb5a8ce399a3b1bf0c080f8'), '2019-04-01 00:00:00': {'AAA Corp Yield': 3.69, 'BBB Corp Yield': 4.73, 'CCC Corp High Yield': 11.51, '3-Month LIBOR': 2.5955, '10-Yr Treas Yld': 2.49, '10Yr - 2Yr Treas Yld': 0.16, 'US GDP': 21340.267, 'US GNP': 21641.264}}
{'_id': ObjectId('5eb5a8ce399a3b1bf0c080f9'), '2019-04-02 00:00:00': {'AAA Corp Yield': nan, 'BBB Corp Yield': 4.72, 'CCC Corp High Yield': 11.5, '3-Month LIBOR': 2.60238, '10-Yr Treas Yld': 2.48, '10Yr - 2Yr Treas Yld': 0.18, 'US GDP': nan, 'US GNP': nan}}
{'_id': ObjectId('5eb5a8ce399a3b1bf0c080fa'), '2019-04-03 00:00:00': {'AAA Corp Yield': nan, 'BBB Corp Yield': 4.75, 'CCC Corp High Yield': 11.41, '3-Month LIBOR': 2.59775, '10-Yr Treas Yld': 2.52, '10Yr - 2Yr Treas Yld': 0.19, 'US GDP': nan, 'US GNP': nan}}
{'_id': ObjectId('5eb5a8ce399a3b1bf0c080fb'), '2019-04-04 00:00:00': {'AAA Corp Yield': nan, 'BBB Corp Yield': 4.73, 'CCC Corp High Yield': 11.37, '3-Month LIBOR': 2.58863, '10-Yr Treas Yld': 2.51, '10Yr - 2Yr Tr

### Forexfactory

In [53]:
# Converting the dataframe to a calendar 
event_data_dict = new_calendar.to_dict("index")

In [54]:
event_data_dict

{0: {'Date': Timestamp('2020-04-01 00:00:00'),
  'Country': 'AUD',
  'Event': '  Commodity Prices y/y  ',
  'Actual': '-10.2%',
  'Forecast': '',
  'Previous': '-6.0%'},
 1: {'Date': Timestamp('2020-04-01 00:00:00'),
  'Country': 'EUR',
  'Event': '  German Retail Sales m/m  ',
  'Actual': '1.2%',
  'Forecast': '0.1%',
  'Previous': '1.0%'},
 2: {'Date': Timestamp('2020-04-01 00:00:00'),
  'Country': 'EUR',
  'Event': '  Spanish Manufacturing PMI  ',
  'Actual': '45.7',
  'Forecast': '44.0',
  'Previous': '50.4'},
 3: {'Date': Timestamp('2020-04-01 00:00:00'),
  'Country': 'CHF',
  'Event': '  Manufacturing PMI  ',
  'Actual': '43.7',
  'Forecast': '42.3',
  'Previous': '49.5'},
 4: {'Date': Timestamp('2020-04-01 00:00:00'),
  'Country': 'EUR',
  'Event': '  Italian Manufacturing PMI  ',
  'Actual': '40.3',
  'Forecast': '41.1',
  'Previous': '48.7'},
 5: {'Date': Timestamp('2020-04-01 00:00:00'),
  'Country': 'EUR',
  'Event': '  French Final Manufacturing PMI  ',
  'Actual': '43.2',


In [55]:
collection_events.insert_many([{str(k):b} for k, b in event_data_dict.items()])

<pymongo.results.InsertManyResult at 0x2912d8e0988>

In [56]:
for doc in collection_events.find():
    print(doc)

{'_id': ObjectId('5eb5a8cf399a3b1bf0c0821f'), '0': {'Date': datetime.datetime(2020, 4, 1, 0, 0), 'Country': 'AUD', 'Event': '  Commodity Prices y/y  ', 'Actual': '-10.2%', 'Forecast': '', 'Previous': '-6.0%'}}
{'_id': ObjectId('5eb5a8cf399a3b1bf0c08220'), '1': {'Date': datetime.datetime(2020, 4, 1, 0, 0), 'Country': 'EUR', 'Event': '  German Retail Sales m/m  ', 'Actual': '1.2%', 'Forecast': '0.1%', 'Previous': '1.0%'}}
{'_id': ObjectId('5eb5a8cf399a3b1bf0c08221'), '2': {'Date': datetime.datetime(2020, 4, 1, 0, 0), 'Country': 'EUR', 'Event': '  Spanish Manufacturing PMI  ', 'Actual': '45.7', 'Forecast': '44.0', 'Previous': '50.4'}}
{'_id': ObjectId('5eb5a8cf399a3b1bf0c08222'), '3': {'Date': datetime.datetime(2020, 4, 1, 0, 0), 'Country': 'CHF', 'Event': '  Manufacturing PMI  ', 'Actual': '43.7', 'Forecast': '42.3', 'Previous': '49.5'}}
{'_id': ObjectId('5eb5a8cf399a3b1bf0c08223'), '4': {'Date': datetime.datetime(2020, 4, 1, 0, 0), 'Country': 'EUR', 'Event': '  Italian Manufacturing PMI