### Sample test 1: LBS balance and DOT balance

#### LBS code : https://stats.bis.org/statx/srs/table/A4?c=&p=20191&m=S&f=TSKEYS
<pre>
1. Claim
Q:S:C:A:TO1:A:##:A:5A:A:##:N
    *          |         |
               |         To country
               From country               

2. Liability
Q:S:L:A:TO1:A:##:A:5A:A:##:N
    *          |         |
               |         To country
               From country
               
3. Balance
(Q:S:C:A:TO1:A:##:A:5A:A:##:N) - (Q:S:L:A:TO1:A:##:A:5A:A:##:N)
</pre>


#### DOT code
<pre>
1. Balance
Q:##:B:##
   |    |
   |    To country
   From country
</pre>

### Data retrieval

In [1]:
import pandas as pd
from pymongo import MongoClient

In [1]:
mgclient = MongoClient(unicode_decode_error_handler='ignore')

In [13]:
res = mgclient.lbsdot.countrygroup.find({'code':'OECD'})
oecd_cclist = list(res.next()['cclist'].keys())

In [14]:
# Count number of claims on LBS for OECD countries

num_claim = mgclient.lbsdot.raw_lbsn2.count_documents({
    "FREQ":"Q",
    "L_MEASURE":"S",
    "L_POSITION":"C",  #C: Claim
    "L_INSTR":"A",
    "L_DENOM":"TO1",
    "L_CURR_TYPE":"A",
    "L_PARENT_CTY":{'$in': oecd_cclist},
    "L_REP_BANK_TYPE":"A",
    "L_REP_CTY":"5A",
    "L_CP_SECTOR":"A",
    "L_CP_COUNTRY":{'$in': oecd_cclist},
    "L_POS_TYPE":"N"  #N: Cross-border
})


num_liability = mgclient.lbsdot.raw_lbsn2.count_documents({
    "FREQ":"Q",
    "L_MEASURE":"S",
    "L_POSITION":"L",  #L: Liability
    "L_INSTR":"A",
    "L_DENOM":"TO1",
    "L_CURR_TYPE":"A",
    "L_PARENT_CTY":{'$in': oecd_cclist},
    "L_REP_BANK_TYPE":"A",
    "L_REP_CTY":"5A",
    "L_CP_SECTOR":"A",
    "L_CP_COUNTRY":{'$in': oecd_cclist},
    "L_POS_TYPE":"N"  #N: Cross-border
})

print('#claim:{}, #liability:{}'.format(num_claim, num_liability))

#claim:806, #liability:814


In [18]:
claim_iter = mgclient.lbsdot.raw_lbsn2.find({
    "FREQ":"Q",
    "L_MEASURE":"S",
    "L_POSITION":"C",  #C: Claim
    "L_INSTR":"A",
    "L_DENOM":"TO1",
    "L_CURR_TYPE":"A",
    "L_PARENT_CTY":{'$in': oecd_cclist},
    "L_REP_BANK_TYPE":"A",
    "L_REP_CTY":"5A",
    "L_CP_SECTOR":"A",
    "L_CP_COUNTRY":{'$in': oecd_cclist},
    "L_POS_TYPE":"N"  #N: Cross-border
})

In [19]:
claim_df = pd.DataFrame(list(claim_iter))

In [23]:
claim_df.head(1)

Unnamed: 0,_id,timeseries,FREQ,L_MEASURE,L_POSITION,L_INSTR,L_DENOM,L_CURR_TYPE,L_PARENT_CTY,L_REP_BANK_TYPE,L_REP_CTY,L_CP_SECTOR,L_CP_COUNTRY,L_POS_TYPE
0,5d3ed151e5c38b7bc2664030,"[[20120630, 5445.263], [20120930, 6194.032], [...",Q,S,C,A,TO1,A,AT,A,5A,A,AT,N


In [24]:
liability_iter = mgclient.lbsdot.raw_lbsn2.find({
    "FREQ":"Q",
    "L_MEASURE":"S",
    "L_POSITION":"L",  #L: Liability
    "L_INSTR":"A",
    "L_DENOM":"TO1",
    "L_CURR_TYPE":"A",
    "L_PARENT_CTY":{'$in': oecd_cclist},
    "L_REP_BANK_TYPE":"A",
    "L_REP_CTY":"5A",
    "L_CP_SECTOR":"A",
    "L_CP_COUNTRY":{'$in': oecd_cclist},
    "L_POS_TYPE":"N"  #N: Cross-border
})

In [25]:
liability_df = pd.DataFrame(list(liability_iter))

In [27]:
liability_df.head(1)

Unnamed: 0,_id,timeseries,FREQ,L_MEASURE,L_POSITION,L_INSTR,L_DENOM,L_CURR_TYPE,L_PARENT_CTY,L_REP_BANK_TYPE,L_REP_CTY,L_CP_SECTOR,L_CP_COUNTRY,L_POS_TYPE
0,5d3edb1fe5c38b7bc2862ff0,"[[20120630, 9482.634], [20120930, 8974.387], [...",Q,S,L,A,TO1,A,AT,A,5A,A,AT,N


In [42]:
oecd_imfcclist = [x['imfnumeric'] for x in mgclient.lbsdot.countrytable.find({'iso2':{'$in':oecd_cclist}})]
oecd_imfcclist = [str(x) for x in oecd_imfcclist]

In [43]:
dotbalance_iter = mgclient.lbsdot.raw_dot.find({
    'Country Code': {'$in': oecd_imfcclist},
    'Indicator Code': 'TBG_USD',
    'Counterpart Country Code': {'$in': oecd_imfcclist}
})

In [44]:
dotbalance_df = pd.DataFrame(list(dotbalance_iter))

In [46]:
dotbalance_df.head(1)

Unnamed: 0,_id,Country Name,Country Code,Indicator Name,Indicator Code,Counterpart Country Name,Counterpart Country Code,freq,timeseries
0,5d386cee1ce487b9eba5b301,Austria,122,"Goods, Value of Trade Balance, US Dollars",TBG_USD,France,132,A,"[[19481231, 1400000.0], [19491231, -9200000.0]..."


In [48]:
# Save dataframe to pickle files
liability_df.to_pickle('../data/liability_df.pkl')
dotbalance_df.to_pickle('../data/dotbalance_df.pkl')

### Convert data to time series object

In [4]:
import os
import zipfile

In [8]:
for x in os.listdir('../data/'):
    with zipfile.ZipFile(os.path.join('../data/', x), 'r') as zip_ref:
        zip_ref.extractall('../data/')

In [10]:
liab_df = pd.read_pickle('../data/liability_df.pkl')
dotb_df = pd.read_pickle('../data/dotbalance_df.pkl')

In [38]:
res = list()
for i in range(liab_df.shape[0]):
    raw = liab_df.iloc[i,:][['L_PARENT_CTY', 'L_CP_COUNTRY','timeseries']]
    ts = pd.Series([x[1] for x in raw[2]], index=pd.to_datetime([x[0] for x in raw[2]], format='%Y%m%d'))
    ts.name = raw[0] + raw[1]
    res.append(ts)

In [42]:
liab_df = pd.DataFrame(res).transpose()

In [46]:
liab_df

Unnamed: 0,ATAT,ATAU,ATBE,ATCA,ATCH,ATCL,ATCZ,ATDE,ATDK,ATEE,...,USMX,USNL,USNO,USPL,USPT,USSE,USSI,USSK,USTR,USUS
2003-03-31,,,,,,,,,,,...,,,,,,,,,,
2003-06-30,,,,,,,,,,,...,,,,,,,,,,
2003-09-30,,,,,,,,,,,...,,,,,,,,,,
2003-12-31,,,,,,,,,,,...,,,,,,,,,,
2004-03-31,,,,,,,,,,,...,,,,,,,,,,
2004-06-30,,,,,,,,,,,...,,,,,,,,,,
2004-09-30,,,,,,,,,,,...,,,,,,,,,,
2004-12-31,,,,,,,,,,,...,,,,,,,,,,
2005-03-31,,,,,,,,,,,...,,,,,,,,,,
2005-06-30,,,,,,,,,,,...,,,,,,,,,,


In [48]:
pd.DataFrame(res[0])

Unnamed: 0,ATAT
2012-06-30,9482.634
2012-09-30,8974.387
2012-12-31,8392.641
2013-03-31,7478.688
2013-06-30,7510.236
2013-09-30,7423.432
2013-12-31,7626.6
2014-03-31,7506.076
2014-06-30,7287.85
2014-09-30,7345.51
