Idea: learn about the way that macroeconomic data is published and revised in releases

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import datetime

from data_apps_aws.sql import get_db_engine, get_db_data

from data_apps_aws.src_data_pipes.fred_config import get_nowcast_ticker_list

In [None]:
print(f'The time at execution is: {datetime.datetime.now()}')

In [None]:
plt.rcParams['figure.figsize'] = 14, 6
sns.set()

## Get data from database

In [None]:
db_con = get_db_engine('econ_data_read')

Get all release data for multiple series

In [None]:
ticker_list = get_nowcast_ticker_list()
tuple_ticker_list = tuple(ticker_list)

In [None]:
query=f"""
SELECT *
FROM archival_data
WHERE ticker in {tuple_ticker_list}
"""

In [None]:
all_ticker_release_data = get_db_data(query, db_con)

In [None]:
all_ticker_release_data.tail()

Min / max observation dates per ticker:

In [None]:
min_max_obs_dates = all_ticker_release_data.groupby('ticker')['date'].agg(['min', 'max'])
min_max_obs_dates

Min / max release dates per ticker

In [None]:
min_max_release_dates = all_ticker_release_data.groupby('ticker')['realtime_start'].agg(['min', 'max'])
min_max_release_dates

Number of releases per ticker

In [None]:
n_releases = all_ticker_release_data.groupby('ticker')['realtime_start'].nunique()
n_releases

Number of observations per release

In [None]:
n_obs_per_release = all_ticker_release_data.groupby(['ticker', 'realtime_start'])['date'].count().to_frame(name='n_obs').reset_index()
n_obs_per_release

In [None]:
n_obs_per_release.pivot(index='realtime_start', columns='ticker', values='n_obs').fillna(0).plot()
plt.title('Observations per release')
plt.show()

### Get valid realtime data for given historic date

In [None]:
this_real_time_date = '2016-07-19'

max_release_per_obs_date = all_ticker_release_data.query('realtime_start <= @this_real_time_date').groupby(['date', 'ticker'])['realtime_start'].max().to_frame('realtime_start').reset_index()

current_data_view = max_release_per_obs_date.merge(all_ticker_release_data)
current_data_view

In [None]:
df_wide = current_data_view.pivot(index='date', columns='ticker', values='value')
df_wide.columns = df_wide.columns.values
df_wide = df_wide.loc[:, ticker_list]
df_wide.tail()

In [None]:
df_wide = df_wide.ffill(axis=1)

In [None]:
df_wide.plot()