In [2]:
import pandas as pd
pd.core.common.is_list_like = pd.api.types.is_list_like
import pandas_datareader.data as pdr
import datetime
import requests
import quandl
import config   ## File with API key
quandl.ApiConfig.api_key = config.key

from lxml import html
import re
import os
import shutil

os.chdir('C:/Working/Python/Japan/')

def write_txt(filename, filetext):
# Write label to txt file
    with open(filename, 'w') as text_file:
        text_file.write(filetext)

In [3]:
# To get the latest data, need to find the correct link on the cabinet office page
base = 'http://www.esri.cao.go.jp/en/sna/'

url = base + 'sokuhou/sokuhou_top.html'
links = html.fromstring(requests.get(url).content).xpath('//a/@href')
link = next(i for i in links if 'gde' in i)

url = base+link[3:]
links = html.fromstring(requests.get(url).content).xpath('//a/@href')

In [4]:
link = next(i for i in links if 'nkiyo-jk' in i)
url = base.replace('en', 'jp') + link[link.find('data/'):]
urlY = url.replace('nkiyo', 'kgaku')
urlI = url.replace('nkiyo', 'gaku')
urlX = url.replace('nkiyo-j', 'gaku-m')
df = pd.read_csv(url, header=5, encoding='iso-8859-1').loc[49:]

In [5]:
gdpkeep = {
    'Unnamed: 0': 'date',
    'GDP(Expenditure Approach)': 'gdp',
    'PrivateConsumption': 'cons',
    'PrivateResidentialInvestment': 'inv1',
    'Private Non-Resi.Investment': 'inv2',
    'Changein PrivateInventories': 'inv3',
    'GovernmentConsumption': 'gov1',
    'PublicInvestment': 'gov2',
    'Changein PublicInventories': 'gov3',
    'Goods & Services': 'nx'
}
df = df[list(gdpkeep.keys())].dropna()
df.columns = df.columns.to_series().map(gdpkeep)

# Adjust the date column to make each value a consistent format
dts = df['date'].str.split('-').str[0].str.split('/ ')
for dt in dts:
    if len(dt) == 1:
        dt.append(dt[0])
        dt[0] = None
df['year'] = dts.str[0].fillna(method='ffill')
df['month'] = dts.str[1].str.zfill(2)
df['date2'] = df['year'].str.cat(df['month'], sep='-')
df['date'] = pd.to_datetime(df['date2'], format='%Y-%m')

# Sum up various types of investment and government spending
df['inv'] = df['inv1'] + df['inv2'] + df['inv3']
df['gov'] = df['gov1'] + df['gov2'] + df['gov3']
df = df.set_index('date')[['gdp', 'cons', 'inv', 'gov', 'nx']]
df.to_csv('data/gdp.csv', header=True)  # csv file created

# Declare variables for chart label
q = df.index[-1].quarter
y = df.index[-1].strftime('%Y')
Y = pd.read_csv(urlY, header=5, encoding='iso-8859-1').iloc[49:, 1:2].dropna().iloc[-1].values[0]
ch = df['gdp'][-1]
text = '{} Q{}: Real GDP: {}billion Yen; Growth: {}\%'.format(y, q, Y, ch)
# Write label to txt file
write_txt('data/gdp.txt', text)

In [6]:
# Household consumption expenditures 
df = pd.read_csv(urlY, header=5, thousands=',', 
                 encoding='iso-8859-1').iloc[49:, [0,5,6,7,8]].dropna().set_index('Unnamed: 0')
df['tot'] = df.sum(axis=1)

series = {'DurableGoods': 'durable',
          'Semi-DurableGoods': 'semidur',
          'Non-DurableGoods': 'nondur',
          'Services': 'services',
          'tot': 'total'}

df.columns = series.values()
# Convert to share of total change
for k, v in series.items():
    df[v+'_ch'] = ((df[v] - df[v].shift(1))/df['total'].shift(1) * 400).round(2)

df = df.dropna()    
df['quarter'] = df.index.str.split('-').str[0].str.split('/ ').str[-1]
df['year'] = [x for x in df.index.str.split('/ ').str[0].values if len(x) == 4 for n in range(4)][:len(df)]
df['date'] = pd.to_datetime(df['year'].str.cat(df['quarter'], sep='-'))

df = df.reset_index().set_index('date').drop(['quarter', 'year', 'Unnamed: 0'],1).dropna()
df.to_csv('data/cons.csv', header=True)  # csv file created

# Declare variables for chart label
q = df.index[-1].quarter
y = df.index[-1].strftime('%Y')
C = df['total'][-1]
ch = df['total_ch'][-1]
text = 'Total household consumption expenditure: \\\ {} Q{}: {:,} billion Yen; Growth: {}\%'.format(y, q, C, ch)
# Write label to txt file
write_txt('data/cons.txt', text)

In [7]:
# Investment by source 
df = pd.read_csv(urlI, header=5, thousands=',', 
                 encoding='iso-8859-1').iloc[49:, [0,5,6,9]].dropna().set_index('Unnamed: 0')
df['tot'] = df.sum(axis=1)

series2 = {'PrivateResidentialInvestment': 'res',
           'Private Non-Resi.Investment': 'bus', 
           'PublicInvestment': 'pub', 
           'tot': 'total'}

df.columns = series2.values()
# Convert to share of total change
for k, v in series2.items():
    df[v+'_ch'] = ((df[v] - df[v].shift(1))/df['total'].shift(1) * 400).round(2)
    
df['quarter'] = df.index.str.split('-').str[0].str.split('/ ').str[-1]
df['year'] = [x for x in df.index.str.split('/ ').str[0].values if len(x) == 4 for n in range(4)][:len(df)]
df['date'] = pd.to_datetime(df['year'].str.cat(df['quarter'], sep='-'))

df = df.reset_index().set_index('date').drop(['quarter', 'year', 'Unnamed: 0'],1).dropna()
df.to_csv('data/inv.csv', header=True)  # csv file created

# Declare variables for chart label
q = df.index[-1].quarter
y = df.index[-1].strftime('%Y')
I = df['total'][-1]
ch = df['total_ch'][-1]
text = 'Total investment expenditure: \\\{} Q{}: {:,} billion Yen; Growth: {}\%'.format(y, q, I, ch)
# Write label to txt file
write_txt('data/inv.txt', text)

In [8]:
# Current account from Ministry of Finance balance of payments data
url = 'http://www.mof.go.jp/international_policy/reference/balance_of_payments/bp_trend/bpnet/sbp/s-a/6s-a-1.csv'
columns = ['year' ,'quarter', 'cab', 'gs', 'goods', 'ex', 'im', 'serv', 'income', 'transfers']
series = ['cab', 'goods', 'serv', 'income', 'transfers']
df = pd.read_csv(url, skiprows=28, thousands=',', encoding='iso-8859-1').iloc[:,2:].drop('Unnamed: 4',1)
df.columns = columns
df['quarter'] = df['quarter'].str[0]
df['year'] = df['year'].fillna(method='ffill').astype(int)
df['date'] = pd.to_datetime(df['year'].map(str) + 'Q' + df['quarter'])
df = df.set_index('date')
df = df.iloc[40:,:]


In [9]:
print(urlX)

http://www.esri.cao.go.jp/jp/sna/data/data_list/sokuhou/files/2018/qe182/tables/gaku-mk1821.csv


In [10]:
# Nominal GDP from cabinet office
ngdp = pd.read_csv(urlX, header=5, thousands=',').iloc[49:, 0:2].dropna()
dts = ngdp['Unnamed: 0'].str.split('-').str[0].str.split('/ ')
for dt in dts:
    if len(dt) == 1:
        dt.append(dt[0])
        dt[0] = None
ngdp['year'] = dts.str[0].fillna(method='ffill')
ngdp['month'] = dts.str[1].str.zfill(2)
ngdp['date2'] = ngdp['year'].str.cat(ngdp['month'], sep='-')
ngdp['date'] = pd.to_datetime(ngdp['date2'], format='%Y-%m')
ngdp = ngdp.set_index('date')['GDP(Expenditure Approach)']
ngdp.name = 'gdp'

df = df.join(ngdp).dropna()
df = df[series].div(df['gdp'].multiply(.025), axis=0).round(2).join(df[['ex', 'im']].div(10))
df.to_csv('data/external.csv', header=True)  # csv file created

# Declare variables for chart label
q = df.index[-1].quarter
y = df.index[-1].strftime('%Y')
lt = df['cab'][-1]
exim = 'Exports: {:,}; Imports {:,}'.format(df['ex'][-1], df['im'][-1])
text = '{} Q{}: Current Account Balance: {}\% of GDP'.format(y, q, lt)
text2 = '{} Q{}: {} (goods only, billion Yen)'.format(y, q, exim)
# Write label to txt file
write_txt('data/cab.txt', text)
write_txt('data/tb.txt', text2)

In [11]:
# Unemployment and participation
unemp_url = 'http://www.stat.go.jp/data/roudou/longtime/zuhyou/lt01-a10.xls'
r = requests.get(unemp_url)
with open('unemp.xls', 'wb') as f:
    f.write(r.content)

df = pd.read_excel('unemp.xls', skiprows=5, skipfooter=3).drop([1, 3])
col1 = df.loc[0].fillna(method='ffill')
col2 = df.loc[2]
col = col1 + '-' + col2
col[0] = 'year'
col[1] = 'mon'
col[2] = 'monname'
col[3] = 'DEL'
df = df.drop([0,2])
df.columns = col.values
df = df.drop('DEL', 1)
df['month'] = df['mon'].str[:-1].str.zfill(2)
df['year'] = df['year'].apply(pd.to_numeric, errors='coerce').shift(-1).fillna(method='ffill').astype(int)
df['date2'] = df['year'].astype(str).str.cat(df['month'], sep='-')
df['date'] = pd.to_datetime(df['date2'], format='%Y-%m')
df = df.set_index('date').dropna()
df['pop'] = df['Labour force-Both sexes'] + df['Not in labour force-Both sexes']
df['partc'] = df['Labour force-Both sexes'] / df['pop'] * 100
df['pop-m'] = df['Labour force-Male'] + df['Not in labour force-Male']
df['partc-m'] = df['Labour force-Male'] / df['pop-m'] * 100
df['pop-w'] = df['Labour force-Female'] + df['Not in labour force-Female']
df['partc-w'] = df['Labour force-Female'] / df['pop-w'] * 100
unemplt = df['Unemployment rate  (percent)-Both sexes'][-1]
df['unemp'] = df['Unemployment rate  (percent)-Both sexes']
d1 = df.index[-1].strftime('%b %Y')
df = df[['partc', 'partc-m', 'partc-w', 'unemp']][636:]

# Write to file
df.to_csv('data/labor.csv', header=True)  # csv file created
text = 'Unemployment rate, both sexes: {}: {}\%'.format(d1, unemplt)
# Write label to txt file
write_txt('data/labor.txt', text)

In [12]:
# Updated inputs to pandas datareader:
source = 'fred' 
start = datetime.datetime(2013,1,1)
series = {'RBJPBIS': 'REER',
          'NBJPBIS': 'NEER'}
series2 = {'BOE/XUDLJYD': 'Rate'}

# Retrieve data as pandas dataframe named df
df = pdr.DataReader(list(series.keys()), source, start)
df.columns = series.values()
dft = 1 / quandl.get(list(series2.keys()), start_date='2013-01-01')['BOE/XUDLJYD - Value']
dft = dft.rename('Rate')
df2 = df.join(dft, how='outer').fillna(method='bfill').loc['2014-01-01':]
#df2['Rate'] = df2['Rate'].fillna(method='bfill')

label = 'As of {}: {} JPY per 1 USD'.format(df2.index[-1].strftime('%b %#d, %Y'), (1 / df2['Rate'][-1]).round(2))
for column in df2.columns:
    df2[column] = df2[column] / df2.dropna()[column][0]

df2.to_csv('data/fx.csv', header=True, index_label='Date')  # csv file created

# Write label to txt file
write_txt('data/usdjpy.txt', label)

In [13]:
os.system('pdflatex JPNDash.tex')
shutil.copy('JPNDash.pdf', 'C:/Working/bdecon.github.io/Dash/')

'C:/Working/bdecon.github.io/Dash/JPNDash.pdf'