In [223]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup as bs
import pickle
from IPython.display import Image

import requests
from lxml import etree as et

In [357]:
## Import 2Y, 10Y, 30Y T-Bill data from treasury.gov (as XML)

start_year = 2013
end_year = 2018

tbill_df = pd.DataFrame()

for year in range(start_year, end_year+1):
    url = "https://www.treasury.gov/resource-center/data-chart-center/interest-rates/Pages/TextView.aspx?data=yieldYear&year={0}".format(year)
    xmlurl = "http://data.treasury.gov/feed.svc/DailyTreasuryYieldCurveRateData?$filter=year(NEW_DATE)%20eq%20{0}".format(year)

    xmlcontent = requests.get(xmlurl).content # type: bytes
    xmlcontent = et.fromstring(xmlcontent) # type: lxml.etree_Element

#     tree = et.ElementTree(xmlcontent)
#     for i in xmlcontent.iter():
#         print(tree.getpath(i)) # Tree에서 각 element의 Path를 확인할 수 있다. 

    namespaces = xmlcontent.nsmap
    paths = ['.//d:NEW_DATE', './/d:BC_2YEAR', './/d:BC_10YEAR', './/d:BC_30YEAR']
    
    temp_df = pd.DataFrame()
    for i in paths:
        rates = xmlcontent.findall(path=i, namespaces=namespaces)
        temp_df[i[5:]] = [x.text for x in rates]
        
    tbill_df = tbill_df.append(temp_df, ignore_index=True)
        
tbill_df = tbill_df.rename(columns={'NEW_DATE':"Date"})
tbill_df.Date = pd.to_datetime(tbill_df.Date)
tbill_df = tbill_df.set_index('Date')

print(tbill_df.head())
print(tbill_df.tail())

           BC_2YEAR BC_10YEAR BC_30YEAR
Date                                   
2013-01-02     0.27      1.86      3.04
2013-01-03     0.27      1.92      3.12
2013-01-04     0.27      1.93       3.1
2013-01-07     0.27      1.92       3.1
2013-01-08     0.25      1.89      3.06
           BC_2YEAR BC_10YEAR BC_30YEAR
Date                                   
2018-11-27     2.83      3.06      3.32
2018-11-28     2.81      3.06      3.34
2018-11-29     2.81      3.03      3.33
2018-11-30      2.8      3.01       3.3
2018-12-03     2.83      2.98      3.27


In [358]:
## Download Fama French 5 Factor data from Kenneth R. French's Data Library
## http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html

ff5_df = pd.read_csv('FF5_daily_20181031.csv', skiprows=3)
ff5_df = ff5_df.rename(columns={"Unnamed: 0": "Date"})
ff5_df.Date = pd.to_datetime(ff5_df.Date, format="%Y%m%d")
ff5_df = ff5_df.set_index('Date')
ff5_df = ff5_df[ff5_df.index.year >= start_year]

print(ff5_df.head())
print(ff5_df.tail())

            Mkt-RF   SMB   HML   RMW   CMA   RF
Date                                           
2013-01-02    2.62  0.12  0.35 -0.62 -0.04  0.0
2013-01-03   -0.14  0.14  0.05  0.17  0.22  0.0
2013-01-04    0.55  0.19  0.43 -0.38  0.27  0.0
2013-01-07   -0.31 -0.08 -0.36 -0.10 -0.12  0.0
2013-01-08   -0.27  0.03 -0.06 -0.16  0.09  0.0
            Mkt-RF   SMB   HML   RMW   CMA     RF
Date                                             
2018-10-25    1.93  0.33 -0.72 -0.16 -1.09  0.008
2018-10-26   -1.65  0.57  0.37 -0.37  0.52  0.008
2018-10-29   -0.76 -0.07  1.63 -0.04  0.99  0.008
2018-10-30    1.66  0.43  0.11  0.36  0.38  0.008
2018-10-31    1.21 -0.68 -0.69 -0.39 -1.00  0.008
