In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import datetime, time
import requests
import json
import calendar
import re

from utils import headers, url_fred_str

In [3]:
plt.rcParams['figure.figsize'] = [12, 8]
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'
plt.style.use('ggplot')

In [4]:
today = datetime.datetime.today().strftime('%Y-%m-%d')
Sess = requests.Session()

### ISM & NMI

因在线的数据是从08年1月开始,故数据分为两个部分,每次更新时从线上获取(数据来源:东财),而08年之前的数据从东财choice拿到, 虽然所有的数据都可以从东财choice拿到, 但是更新不方便

获取最新的数据

In [5]:
url_ism = 'http://data.eastmoney.com/DataCenter_V3/Chart/cjsj/foreign.ashx?mkt=0&stat=0&r=0.2845910438219901&isxml=false'
url_nmi = 'http://data.eastmoney.com/DataCenter_V3/Chart/cjsj/foreign.ashx?mkt=0&stat=1&r=0.6911199353114659&isxml=false'

In [6]:
def retrieve_data(url, headers, data_name):
    
    html_ism = Sess.get(url, headers=headers)

    date = json.loads(html_ism.text)['X']
    date = re.sub(r'年', '-', date)
    date = re.sub(r'月', '', date)
    date = re.sub(r",", "','", date)
    date = eval("['" + date + "']")
    date = pd.to_datetime(date, yearfirst=True, infer_datetime_format=True, format='%y-%m')
    date = (date + pd.offsets.MonthEnd(0))[:-1]

    data = pd.Series(eval('[' + json.loads(html_ism.text)['Y'][1] + ']'))
    data.index = date
    data.name = data_name
    
    return data

In [7]:
data_ism = retrieve_data(url_ism, headers, 'ISM')
data_nmi = retrieve_data(url_nmi, headers, 'NMI')
data_ism_nmi = pd.concat([data_ism, data_nmi], axis=1)
data_ism_nmi.index.name = 'date'

# data_ism_nmi.head()
# data_ism_nmi.tail()

获取历史数据

In [8]:
data_ism_name = 'data/ISM采购经理指数(PMI).xlsx'
data_ism_historical = pd.read_excel(data_ism_name, header=1, index_col=0)
date_ism_historical = str(data_ism_historical.index.tolist())
date_ism_historical = re.sub(r'年', '-', date_ism_historical)
date_ism_historical = re.sub(r'月', '', date_ism_historical)
date_ism_historical = eval(date_ism_historical)
date_ism_historical = pd.to_datetime(date_ism_historical, yearfirst=True, infer_datetime_format=True, format='%y-%m')
date_ism_historical = (date_ism_historical + pd.tseries.offsets.MonthEnd(0))
data_ism_historical.index = date_ism_historical
# data_ism_historical.head()
# data_ism_historical.tail()

data_ism_nmi_historical = data_ism_historical[:'2007'][['综合指数', '综合指数2']]
data_ism_nmi_historical.index.name = 'date'
data_ism_nmi_historical.columns = ['ISM', 'NMI']

合并数据 & 写入到excel表中

In [9]:
data_ism_nmi_index = pd.concat([data_ism_nmi_historical, data_ism_nmi])
data_ism_nmi_index = data_ism_nmi_index.replace(['——'], [np.nan])
data_ism_nmi_index = data_ism_nmi_index.assign(
    ISM_diff = data_ism_nmi_index.ISM.diff(),
    NMI_diff = data_ism_nmi_index.NMI.diff()
)
data_ism_nmi_index = data_ism_nmi_index[['ISM', 'ISM_diff', 'NMI', 'NMI_diff']]

# data_ism_nmi_index.head()
# data_ism_nmi_index.tail()

In [10]:
# # 写入到excel中
# writer = pd.ExcelWriter('data/data_ism_nmi_index.xlsx')
# data_ism_nmi_index.to_excel(writer)
# writer.save()

### UMCSI

In [11]:
html_umcsi = Sess.get('http://www.sca.isr.umich.edu', headers=headers)
url_umcsi = 'http://www.sca.isr.umich.edu/files/tbmics.xls'
data_umcsi = pd.read_excel(url_umcsi, header=3)

data_umcsi = data_umcsi[1:]
data_umcsi.columns = ['Month', 'Year', 'UMCSI']
month_name_list = list(calendar.month_name)
data_umcsi.Month = data_umcsi.Month.apply(lambda x : month_name_list.index(x))
data_umcsi = data_umcsi.assign(
    date = pd.to_datetime(data_umcsi.Year.apply(int).astype(str) + data_umcsi.Month.astype(str),
    format='%Y%m') + pd.offsets.MonthEnd(0),
    UMCSI_diff = data_umcsi.UMCSI.diff()
)
data_umcsi = data_umcsi[['date', 'UMCSI', 'UMCSI_diff']]
data_umcsi = data_umcsi.set_index('date')

# data_umcsi.tail(15)

### Building Permits(PermitsSA)

In [12]:
url_bp = 'https://www.census.gov/construction/nrc/xls/permits_cust.xls'
data_bp = pd.read_excel(url_bp, sheet_name=2, header=7, index_col=0, usecols="B,C")
data_bp = data_bp.dropna(subset=['Total'])
data_bp.index = pd.to_datetime(data_bp.index) + pd.offsets.MonthEnd(0)


### FRED data retrieve

In [13]:
def retrieve_fred_data(ids, fq, url=url_fred_str, start='1900-01-01', end=today):
    url = url.format(ids=ids, fq=fq, start=start, end=end)
    data = pd.read_excel(url, header=10, index_col=0, usecols="A,B")
    data[data.columns[0]+'_diff'] = data.iloc[:,0].diff()
#     if 'Monthly' in fq:
#         data.index = pd.to_datetime(data.index) + pd.offsets.MonthEnd(0)
    data.index = pd.to_datetime(data.index)
    data.index.name = 'date'
    return data

#### M2

In [15]:
data_m2 = retrieve_fred_data('M2', fq='Weekly%2C%20Ending%20Monday')

In [16]:
data_m2.head()

Unnamed: 0_level_0,M2,M2_diff
date,Unnamed: 1_level_1,Unnamed: 2_level_1
1980-11-03,1591.4,
1980-11-10,1592.9,1.5
1980-11-17,1596.3,3.4
1980-11-24,1597.2,0.9
1980-12-01,1596.1,-1.1


In [17]:
data_m2['2018-07']

Unnamed: 0_level_0,M2,M2_diff
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-07-02,14135.8,16.2
2018-07-09,14131.0,-4.8
2018-07-16,14148.1,17.1
2018-07-23,14158.9,10.8
2018-07-30,14159.7,0.8


#### IR%

In [18]:
data_ir = retrieve_fred_data('FEDFUNDS', 'Monthly')

In [19]:
data_ir.head()

Unnamed: 0_level_0,FEDFUNDS,FEDFUNDS_diff
date,Unnamed: 1_level_1,Unnamed: 2_level_1
1954-07-01,0.8,
1954-08-01,1.22,0.42
1954-09-01,1.06,-0.16
1954-10-01,0.85,-0.21
1954-11-01,0.83,-0.02


#### CPIAUCSL

In [20]:
data_cpiaucsl = retrieve_fred_data('CPIAUCSL', 'Monthly')

In [21]:
data_cpiaucsl.head()

Unnamed: 0_level_0,CPIAUCSL,CPIAUCSL_diff
date,Unnamed: 1_level_1,Unnamed: 2_level_1
1947-01-01,21.48,
1947-02-01,21.62,0.14
1947-03-01,22.0,0.38
1947-04-01,22.0,0.0
1947-05-01,21.95,-0.05


In [22]:
data_cpiaucsl.tail(15)

Unnamed: 0_level_0,CPIAUCSL,CPIAUCSL_diff
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-01-01,248.884,0.983
2018-02-01,249.369,0.485
2018-03-01,249.498,0.129
2018-04-01,249.956,0.458
2018-05-01,250.646,0.69
2018-06-01,251.134,0.488
2018-07-01,251.597,0.463
2018-08-01,251.879,0.282
2018-09-01,252.01,0.131
2018-10-01,252.794,0.784


#### CPILFESL

In [23]:
data_cpilfesl = retrieve_fred_data('CPILFESL', 'Monthly')

In [24]:
data_cpilfesl.head()

Unnamed: 0_level_0,CPILFESL,CPILFESL_diff
date,Unnamed: 1_level_1,Unnamed: 2_level_1
1957-01-01,28.5,
1957-02-01,28.6,0.1
1957-03-01,28.7,0.1
1957-04-01,28.8,0.1
1957-05-01,28.8,0.0


In [25]:
data_cpilfesl.tail(15)

Unnamed: 0_level_0,CPILFESL,CPILFESL_diff
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-01-01,255.218,0.773
2018-02-01,255.662,0.444
2018-03-01,256.144,0.482
2018-04-01,256.42,0.276
2018-05-01,256.906,0.486
2018-06-01,257.327,0.421
2018-07-01,257.876,0.549
2018-08-01,258.087,0.211
2018-09-01,258.496,0.409
2018-10-01,259.002,0.506


#### PPIFGS (DISCONTINUED)

In [26]:
data_ppifgs = retrieve_fred_data('PPIFGS', 'Monthly')

In [27]:
data_ppifgs.head()

Unnamed: 0_level_0,PPIFGS,PPIFGS_diff
date,Unnamed: 1_level_1,Unnamed: 2_level_1
1947-04-01,26.0,
1947-05-01,26.1,0.1
1947-06-01,26.2,0.1
1947-07-01,26.2,0.0
1947-08-01,26.3,0.1


In [28]:
data_ppifgs.tail(15)

Unnamed: 0_level_0,PPIFGS,PPIFGS_diff
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2014-10-01,200.8,-0.5
2014-11-01,199.2,-1.6
2014-12-01,196.7,-2.5
2015-01-01,192.9,-3.8
2015-02-01,193.0,0.1
2015-03-01,193.3,0.3
2015-04-01,192.2,-1.1
2015-05-01,195.4,3.2
2015-06-01,196.8,1.4
2015-07-01,196.4,-0.4


#### PPILFE (DISCONTINUED)

In [29]:
data_ppilfe = retrieve_fred_data('PPILFE', 'Monthly')

In [30]:
data_ppilfe.head()

Unnamed: 0_level_0,PPILFE,PPILFE_diff
date,Unnamed: 1_level_1,Unnamed: 2_level_1
1974-01-01,49.7,
1974-02-01,50.0,0.3
1974-03-01,50.5,0.5
1974-04-01,51.1,0.6
1974-05-01,52.2,1.1


In [31]:
data_ppilfe.tail(15)

Unnamed: 0_level_0,PPILFE,PPILFE_diff
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2014-10-01,189.5,0.2
2014-11-01,189.6,0.1
2014-12-01,189.8,0.2
2015-01-01,190.7,0.9
2015-02-01,191.3,0.6
2015-03-01,191.6,0.3
2015-04-01,191.6,0.0
2015-05-01,191.9,0.3
2015-06-01,192.9,1.0
2015-07-01,193.1,0.2


#### PPIACO

Producer Price Index for All Commodities

In [32]:
data_ppiaco = retrieve_fred_data('PPIACO', 'Monthly')

In [33]:
data_ppiaco.head()

Unnamed: 0_level_0,PPIACO,PPIACO_diff
date,Unnamed: 1_level_1,Unnamed: 2_level_1
1913-01-01,12.1,
1913-02-01,12.0,-0.1
1913-03-01,12.0,0.0
1913-04-01,12.0,0.0
1913-05-01,11.9,-0.1


In [34]:
data_ppiaco.tail()

Unnamed: 0_level_0,PPIACO,PPIACO_diff
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-11-01,202.3,-2.3
2018-12-01,202.1,-0.2
2019-01-01,198.6,-3.5
2019-02-01,198.7,0.1
2019-03-01,201.1,2.4


#### PCUOMFGOMFG

Producer Price Index by Industry: Total Manufacturing Industries

In [35]:
data_ppitmi = retrieve_fred_data('PCUOMFGOMFG', 'Monthly')

In [36]:
data_ppitmi.head()

Unnamed: 0_level_0,PCUOMFGOMFG,PCUOMFGOMFG_diff
date,Unnamed: 1_level_1,Unnamed: 2_level_1
1984-12-01,100.0,
1985-01-01,0.0,-100.0
1985-02-01,0.0,0.0
1985-03-01,0.0,0.0
1985-04-01,0.0,0.0


In [37]:
data_ppitmi.tail(15)

Unnamed: 0_level_0,PCUOMFGOMFG,PCUOMFGOMFG_diff
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-01-01,192.6,1.3
2018-02-01,193.5,0.9
2018-03-01,194.0,0.5
2018-04-01,195.3,1.3
2018-05-01,197.7,2.4
2018-06-01,198.4,0.7
2018-07-01,198.5,0.1
2018-08-01,198.6,0.1
2018-09-01,198.9,0.3
2018-10-01,200.4,1.5


#### Gvot

GFDEBTN total debt

FYFR Federal Receipts

FYONET Federal Outlays

FYOINT Federal Outlays: Interest -- interest bill

In [38]:
data_debt = retrieve_fred_data('GFDEBTN', 'Quarterly')
data_debt.columns = ['debt', 'debt_diff']

data_receipts = retrieve_fred_data('FYFR', 'Annual%2C%20Fiscal%20Year')
data_receipts.columns = ['receipts', 'receipts_diff']

data_outlays = retrieve_fred_data('FYONET', 'Annual%2C%20Fiscal%20Year')
data_outlays.columns = ['outlays', 'outlays_diff']

data_ib = retrieve_fred_data('FYOINT', 'Annual%2C%20Fiscal%20Year')
data_ib.columns = ['interest_bill', 'interest_bill_diff']

#### US 10 Year Treasury Benchmark

In [39]:
# WGS10YR
data_tb = retrieve_fred_data('WGS10YR', 'Weekly%2C%20Ending%20Friday')
data_tb.columns = ['Treasury_Benchmark', 'TB_diff']

#### CB Balance Sheet as % of GDP 

In [40]:
# WALCL
data_assets = retrieve_fred_data('WALCL', 'Weekly%2C%20As%20of%20Wednesday')
data_assets.columns = ['Total_Assets', 'TA_diff']

In [41]:
data_assets.head(10)

Unnamed: 0_level_0,Total_Assets,TA_diff
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2002-12-18,719542,
2002-12-25,732059,12517.0
2003-01-01,730994,-1065.0
2003-01-08,723762,-7232.0
2003-01-15,720074,-3688.0
2003-01-22,735953,15879.0
2003-01-29,712809,-23144.0
2003-02-05,719643,6834.0
2003-02-12,713281,-6362.0
2003-02-19,730400,17119.0


将数据输出到excel

In [42]:
data_name = 'data/marco_data.xlsx'
writer = pd.ExcelWriter(data_name)

In [43]:
data_ism_nmi_index[['ISM', 'ISM_diff']].to_excel(writer, 'ISM')
data_ism_nmi_index[['NMI', 'NMI_diff']].to_excel(writer, 'NMI')
data_umcsi.to_excel(writer, 'UMCSI')
data_bp.to_excel(writer, 'PermitsSA')
data_m2.to_excel(writer, 'M2')
data_ir.to_excel(writer, 'IR%')
data_cpiaucsl.to_excel(writer, 'CPIAUCSL')
data_cpilfesl.to_excel(writer, 'CPILFESL')
data_ppifgs.to_excel(writer, 'PPIFGS')
data_ppilfe.to_excel(writer, 'PPILFE')
data_ppiaco.to_excel(writer, 'PPIACO')
data_ppitmi.to_excel(writer, 'PPITMI')
data_debt.to_excel(writer, 'DEBT')
data_receipts.to_excel(writer, 'RECEIPTS')
data_outlays.to_excel(writer, 'OUTLAYS')
data_ib.to_excel(writer, 'INTEREST_BILL')
data_tb.to_excel(writer, 'Treasury_Benchmark')
data_assets.to_excel(writer, 'Total_Assets')
writer.save()