# Trade network analysis
**Brian Dew (brianwdew@gmail.com)**

**04_imfdata.ipynb**

Using the IMF API, data on exports, prices, and exchange rates are collected for all available countries during 2008-2014.

METODO: 

1) Annotate and clean method2 section - can I use a loop?

2) Match the missing country price data with regional substitutes
       

#### Import packages

In [1]:
import requests                                             # For requesting json data from the url
import pandas as pd                                         # pandas dataframes used for convenience
import os                                                   # change current directory in next line
os.chdir('C:/Working/trade_network/data/')

#### settings for API request

In [2]:
webserv = 'http://dataservices.imf.org/REST/SDMX_JSON.svc/' # the main URL for the JSON rest API
method = 'CompactData/'                                     # Data is stored here. Datastructure has info
series = 'IFS'                                              # International Financial Statistics series
freq = 'A'                                                  # Annual
inds = {'x': 'TXG_FOB_USD', 'q': 'EREER_IX', 'p': 'TMG_D_USD_CIF_IX'}  # Set of IFS indicators of interest 
date = '?startPeriod=2008&endPeriod=2014'                   # Date range of interest

Gather additional dictionaries with country and indicator codes. CLEAN UP and ANNOTATE

In [3]:
method2 = 'DataStructure/' # This method gives info on the country names, units, and indicator names
url2 = webserv+method2+series   # url to access IMF datastructure method API
data1 = requests.get(url2).json()['Structure']['CodeLists']['CodeList'][2]['Code'] # area names here
data2 = requests.get(url2).json()['Structure']['CodeLists']['CodeList'][3]['Code'] # indicator codes
data3 = requests.get(url2).json()['Structure']['CodeLists']['CodeList'][0]['Code'] # unit codes
df = pd.DataFrame(data1).set_index('@value')
area_names = {c : df['Description'].loc[c]['#text'] for c in df.index.values}
df = pd.DataFrame(data2).set_index('@value')
ifs_inds = {i : df['Description'].loc[i]['#text'] for i in df.index.values}
df = pd.DataFrame(data3).set_index('@value')
unit_codes = {m : df['Annotations'].loc[m]['Annotation'][2]['AnnotationText']['#text'] for m in df.index.values}

Print out the full indicator names

In [4]:
for k in inds.keys():                         # keys are the x, p, q values in inds
    print inds[k]+': '+ifs_inds[inds[k]]      # print the indicator id and name

EREER_IX: Real Effective Exchange Rate, based on Consumer Price Index, Index
TXG_FOB_USD: Goods, Value of Exports, Free on board (FOB), US Dollars
TMG_D_USD_CIF_IX: Goods, Deflator/Unit Value of Imports, US Dollars, Index


#### Loop with API request for each indicator

In [5]:
fd = {} # dictionary for saving each series from inds above
unit_mult = {} # dictionary for saving unit multipliers by country and indicator
for k, v in inds.iteritems(): # k is the key and v is the value
    url = webserv+method+series+'/'+freq+'..'+v+'.'+date # print url to see
    df = pd.DataFrame(requests.get(url).json()['CompactData']['DataSet']['Series']).set_index('@REF_AREA')
    df['@UNIT_MULT'] = df['@UNIT_MULT'].map(unit_codes) # match unit codes with unit multipliers
    df = df[df['Obs'].apply(lambda x: isinstance(x, list))] # drops empties
    d = {} # temporary dict to save country by country dataframes
    for c in df.index.values: # index values are countries (@REF_AREA) as set above
        d[c] = pd.DataFrame(df.loc[c]['Obs']).rename(columns={'@TIME_PERIOD':'date'})
        d[c]['date'] = pd.to_datetime(d[c]['date'].values) # set dates as datetime for index
        d[c]['@OBS_VALUE'] = pd.to_numeric(d[c]['@OBS_VALUE']) * int(df['@UNIT_MULT'][c]) # adjust units
    fd[k]= pd.concat(d, axis=0).reset_index().set_index(['level_0','date']).drop('level_1', 1)
merged = pd.concat(fd, axis=1).reset_index() # combine all series to one merged dataframe
merged['full_name'] = merged['level_0'].map(area_names)    # add column with full name of area
merged = merged.set_index(['level_0','full_name','date'])  # set index to country and date

#### Missing price data filled with regional values

Using best match from this list: https://www.imf.org/external/pubs/ft/weo/2016/02/weodata/groups.htm, each country with mising import price data has the regional price data used in place of missing values.

In [31]:
ccodes = merged.reset_index()[['level_0','full_name']].set_index('level_0').to_dict()[('full_name', '')]
test = merged[merged['p']['@OBS_VALUE'].isnull()]['q']['@OBS_VALUE'].notnull()

In [59]:
values = test[test == True].reset_index().set_index('level_0').full_name.unique()

In [91]:
#for m in merged[merged['p']['@OBS_VALUE'].notnull()].reset_index().set_index('level_0').full_name.unique():
#    print m

In [104]:
# Create a dictionary of countries and their region match.
region_codes = pd.read_csv('region_codes.csv', header=None, index_col=0).to_dict()[1]

In [129]:
for v in ['i', 'j']:
    df[v] = df[v].apply(lambda x: iso3.get(x,x)) # replace country names
#merged.dropna(axis=0)
df['Group'] = df['Group'].map(df1.set_index('Group')['Hotel'])
df

df1.join(df2,on='Group')

In [215]:
#new = {}
#for c, r in region_codes:
#    new[c] = merged.reset_index()[merged.level_0 == r]
df = merged.reset_index()#.set_index('level_0')
#df.loc['BG', 'p'].join(df.loc['1C_903', 'p'])#[df.level_0=='BG'] = df[df.level_0=='1C_903']['p']

In [242]:
#df.loc['BG','p']['@BASE_YEAR'][0] = df.loc['1C_903', 'p']['@BASE_YEAR'][0]
#new = {}
#merged.loc['1C_903', 'p'].values
#new['1C_903'] = df[df.level_0 == '1C_903'].set_index(['level_0','date'])['p']
#new['BG'] = new['1C_903'].reset_index()

In [241]:
df.set_index('level_0').loc['BG']

Unnamed: 0_level_0,full_name,date,p,p,q,x
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,@BASE_YEAR,@OBS_VALUE,@OBS_VALUE,@OBS_VALUE
level_0,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
BG,Bulgaria,2008-01-01,,,99.764007,22485490000.0
BG,Bulgaria,2009-01-01,,,104.061766,16377690000.0
BG,Bulgaria,2010-01-01,,,100.0,20570650000.0
BG,Bulgaria,2011-01-01,,,102.713979,28221550000.0
BG,Bulgaria,2012-01-01,,,100.717493,26669690000.0
BG,Bulgaria,2013-01-01,,,101.978671,29492360000.0
BG,Bulgaria,2014-01-01,,,101.425051,30879970000.0


In [168]:
#new['1C_903'].reset_index()['level_0'].replace(to_replace='1C_903', value='BG')
#df['level_0'] = df['level_0'].map(region_codes)

In [240]:
df[df['level_0'] == 'BG']['p']['@BASE_YEAR'][154] = '2010'

In [269]:
#df# = df.set_index('level_0')

In [259]:
region = pd.DataFrame(df['level_0'].apply(lambda x: region_codes.get(x,x))).set_index('level_0')

In [306]:
region['p'] = region.join(df.reset_index()['p']['@BASE_YEAR'], how='inner')

In [308]:
country_codes = {v: k for k, v in region_codes.iteritems()}

In [311]:
country_codes.keys()

['A10', 'F6', 'U2', '1C_903', 'F97', 'XS25', 'XR43', 'F98', 'E1']

In [317]:
region_p = df[df.index.isin(country_codes.keys())]['p']

In [92]:
#pd.DataFrame(merged.reset_index()[['level_0', 'full_name']]).set_index('level_0').drop_duplicates().to_csv('test.csv', index=True) # save as csv)