# Data Import

## Imports

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import scipy as sp
import pandas as pd

import warnings
import tqdm

#import pandas_datareader.data as web
import datetime as dt

import statsmodels.api as sm
import linearmodels as lm
import arch as am

import matplotlib.pyplot as plt
import seaborn as sns

  data_klasses = (pandas.Series, pandas.DataFrame, pandas.Panel)
  from pandas import DataFrame, Panel, Series, MultiIndex, get_dummies, Categorical


In [3]:
import sys
sys.path.append('../')
import src

sys.path.append('../../kungfu/')
import kungfu as kf

  from pandas.util.testing import assert_frame_equal


In [4]:
pd.set_option('display.float_format', lambda x: '%.4f' % x) #sets decimals to be displayed

In [5]:
sns.set()
sns.set(rc={'figure.figsize': (17, 6)})

In [6]:
%matplotlib inline

## Data

Data is imported from [Kenneth French's data library](http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html) and [FRED](https://fred.stlouisfed.org/series/). Currently, this workbook uses 49 industry portfolios and the FF3 factors. Additionally, NBER recession indicators complement the data.

In [7]:
#sample_start = dt.datetime(1962,1,1)
#sample_start = dt.datetime(1945,1,1)
sample_start = dt.datetime(1900,1,1)

#sample_end = dt.datetime(2019,12,31)
sample_end = dt.datetime.today()

### Market Index

In [8]:
factors_daily = kf.dataloader.download_factor_data('D')/100
factors_monthly = kf.dataloader.download_factor_data('M')/100

factors_daily = factors_daily.loc[(factors_daily.index >= sample_start) & (factors_daily.index <= sample_end)]
factors_monthly = factors_monthly.loc[(factors_monthly.index >= sample_start) & (factors_monthly.index <= sample_end)]

market_daily = factors_daily['Mkt-RF'].set_obstype('return')
market_monthly = factors_monthly['Mkt-RF'].set_obstype('return')

market_daily.to_pickle(path='../data/external/market_daily.pkl')
market_monthly.to_pickle(path='../data/external/market_monthly.pkl')

print('downloaded FF Market data: ' + str(dt.datetime.now()))

downloaded FF Market data: 2020-04-10 16:08:44.202370


### Aggregation

In [9]:
fdf_market = kf.FinancialDataFrame([])
month_groups = market_daily.groupby([(market_daily.index.year),(market_daily.index.month)])
fdf_market['obs_count'] = month_groups.count()

for group in month_groups:
    fdf_market.at[group[0],'real_volatility'] = group[1].calculate_realised_volatility(252)
    fdf_market.at[group[0],'tot_return'] = group[1].calculate_total_return()

In [11]:
market_returns = market_monthly.copy()
market_returns.name = 'monthly_return'
market_returns.index = pd.MultiIndex.from_arrays([market_returns.index.year, market_returns.index.month])
fdf_market = fdf_market.join(market_returns, how='left')

In [12]:
fdf_market

Unnamed: 0_level_0,Unnamed: 1_level_0,obs_count,real_volatility,tot_return,monthly_return
Date,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1926,7,25,0.0727,0.0288,0.0296
1926,8,26,0.0937,0.0267,0.0264
1926,9,24,0.0771,0.0037,0.0036
1926,10,25,0.1321,-0.0330,-0.0324
1926,11,24,0.0617,0.0258,0.0253
...,...,...,...,...,...
2019,10,23,0.1320,0.0206,0.0206
2019,11,20,0.0676,0.0388,0.0387
2019,12,21,0.0778,0.0278,0.0277
2020,1,21,0.1160,-0.0010,-0.0011
