# EMERFIN_IFS_Search.ipynb

Find the best IFS indicators to cover a list of countries and search terms.

Resources:
* [JSON RESTful Web Service](http://datahelp.imf.org/knowledgebase/articles/667681-using-json-restful-web-service)
* [Data Services News](http://data.imf.org/?sk=A329021F-1ED6-4D6E-B719-5BF5413923B6)

-----------------------------------------------------------------------------------------------------------------

Import packages and obtain metadata dict

In [1]:
import requests     # requests.get('source') method retrieves data from source
import time         # used to pace API requests and avoid disconnection

source = 'http://dataservices.imf.org/REST/SDMX_JSON.svc/DataStructure/IFS'
metafull = (requests.get(source).json()['Structure']['CodeLists']['CodeList'])
items_to_collect = {'areas': 'CL_AREA_IFS', 'units': 'CL_UNIT_MULT'}
name_vars = ('Short Name', 'Full Name', 'Concept', 'Unit', 'Topic')

metadata = {}
for item, code in items_to_collect.items():
    # use item code to filter data_structure result
    item_to_collect = [sub['Code'] for sub in metafull
                       if sub['@id'] == code][0]
    metadata[item] = {
    # Obtain metadata {code: description} from DataStructure method        
        subitem['@value']: subitem['Description']['#text']
        for subitem in item_to_collect
    }

# Collect metadata for indicators separately
meta_indicators = [sub['Code'] for sub in metafull
                   if sub['@id'] == 'CL_INDICATOR_IFS'][0]
metadata['indicators'] = {
    indic['@value']: {
        'Description': indic['Description']['#text']
        } for indic in meta_indicators
}
name_vars = {'Short':2,'Full':3,'Concept':5,'Unit_Name':7,'Topic':8}
for indic in meta_indicators:
    for var, loc in name_vars.items():
        try: 
            metadata['indicators'][indic['@value']][var] = (
            indic['Annotations']['Annotation'][loc]['AnnotationText']['#text'])
        except KeyError:
            continue

meta_full = None
meta_indicators = None

In [2]:
metadata['areas']

{u'1C_080': u'Export earnings: fuel',
 u'1C_092': u'Export earnings: nonfuel',
 u'1C_355': u'Curacao & St. Maarten',
 u'1C_356': u'Panama Canal Zone',
 u'1C_440': u'Middle East, North Africa, Afghanistan, and Pakistan',
 u'1C_459': u'Yemen, P.D. Rep.',
 u'1C_473': u'Yemen Arab Rep.',
 u'1C_568': u'Ryukyus',
 u'1C_752': u'Central Bank West Africa',
 u'1C_903': u'Emerging and Developing Europe',
 u'1C_904': u'Central and eastern Europe',
 u'1C_969': u'All Participants',
 u'1C_970': u'Other Holders',
 u'1C_977': u'European Monetary Institution',
 u'1C_994': u'EPU/EF',
 u'1C_ALL': u'All Countries and Country Groups',
 u'1C_ALLC': u'All Countries',
 u'1C_ALLG': u'All Country Groups',
 u'1C_All_Countries_Published': u'All Countries Published',
 u'1C_EMU': u'1C_EMU',
 u'1C_NANSA': u'National Accounts (NSA) Countries',
 u'1C_NASA': u'National Accounts (SA) Countries',
 u'1C_NSC': u'Non SRF countries',
 u'1C_SRF': u'SRF Countries',
 u'1C_SRF_less_EMU': u'SRF (excluding EA)',
 u'1C_SRF_plus_EMU'

Obtain list of usable country codes

In [2]:
import pandas as pd 
from itertools import izip_longest

cty_full = metadata['areas'].keys()
cty_sel = [cty for cty in cty_full if len(cty) < 7]
codes = pd.read_csv('https://raw.githubusercontent.com/lukes/ISO-3166-Countries-with-Regional-Codes/master/all/all.csv')
cty_reg_map = codes[['alpha-2', 'region']].set_index('alpha-2').dropna()
cty_reg_map = cty_reg_map.replace('Oceania', 'Asia').to_dict()

IMF_cty_df = pd.DataFrame(cty_sel)
IMF_cty_df['region'] = IMF_cty_df[0].map(cty_reg_map['region'])
IMF_cty_df = IMF_cty_df.dropna().reset_index(drop=True).set_index(0)

def grouper(seq, size):
    return (seq[pos:pos + size] for pos in xrange(0, len(seq), size))

Define variables to use in data API request

In [3]:
# Terms to use to filter the descriptions of IFS indicators.
filter_terms = ['Equities', 'Industrial Production', 'Interest Rate', 'Price Ind',
                'Gross Domestic Product, Real', 'Employm', 'Services, Net', 'Rice', 
                'Soy', 'Metal', 'Energy', 'Sugar']
filtered_inds = [code for code, descr in metadata['indicators'].items()
                 if any([f_term in descr['Description'] for f_term in filter_terms])]

# ISO2 codes--IFS also contains regional aggregates with IMF specific codes
#country_list = ['BR', 'RU', 'ZA', 'MX', 'TR']
#cty_list = '+'.join(country_list)  # formatted for API requests

date_lim = '?startPeriod=2014'

# Location of actual IFS data (rather than metadata)
data_src = 'http://dataservices.imf.org/REST/SDMX_JSON.svc/CompactData/IFS/'

Make request to data API and save nonblank series to list

In [9]:
# One indicator at a time, full list with valid results from throwing all
# combinations of countries and filtered indicators at the API
valid_series = []
for group in grouper(IMF_cty_df.index.values, 50):
    cty_list = '+'.join(group)
    for filt_ind in filtered_inds:
        data_url = '{}.{}.{}.{}'.format(data_src, cty_list, filt_ind, date_lim)
        data = requests.get(data_url).json()['CompactData']['DataSet']
        if 'Series' in data.keys():         # Check if data contain series
            for series in data['Series']:
                try:
                    if 'Obs' in series.keys():  # Check if series contain observations
                        valid_series.append(series)
                except:
                    pass
        time.sleep(0.5)   # Delay requests slightly so server does not block them

In [8]:
series

u'@REF_AREA'

In [6]:
#metadata['indicators']['NGDP_R_F_XDC']

{'Concept': u'Gross Domestic Product, Real, Factor Cost',
 'Description': u'Gross Domestic Product, Real, Factor Cost, National Currency',
 'Full': u'National Accounts, Gross Domestic Product, Real, Factor Cost, National Currency',
 'Short': u'Gross Domestic Product, Real, Factor Cost, National Currency',
 'Topic': u'Real Sector',
 'Unit_Name': u'Domestic Currency'}

Use pandas to build table of best indicators

In [10]:
df = pd.DataFrame(valid_series).iloc[:,0:5]
#valid_series = None  # clear the extra data from memory
# We prefer more frequent data, and give it a higher score in the sort
df['Freq'] = df['@FREQ'].replace({'M':2, 'Q':1, 'A':0})
df = df.sort_values(['@INDICATOR', '@REF_AREA', 'Freq']).drop_duplicates(
    ['@INDICATOR', '@REF_AREA'], keep='last').reset_index(drop=True)
df['Country'] = df['@REF_AREA'].map(metadata['areas'])
df['Units'] = df[df['@UNIT_MULT']!='0']['@UNIT_MULT'].map(metadata['units'])
# Merge in metadata from indicator level 
meta_df = pd.DataFrame(metadata['indicators']).T
df = pd.concat([df.set_index('@INDICATOR'), meta_df], axis=1, join='inner')
# Generate more clear/specific measures for each series
df['Category'] = df['Full'].str.split(',', expand=True, n=1)[0]
df['Y_Label'] = df['Unit_Name'].str.cat(df['Units'], sep=', ').fillna(df['Unit_Name'])
df['Combined'] = df['@FREQ'].str.cat([df['@REF_AREA'], df.index], sep='.')
df['Chart_Title'] = df['Description'].str.rsplit(',', expand=True, n=1)[0]
Group_dict = {
    'National Accounts':'1-National Accounts',
    'Labor Markets':'2-Labor Markets',
    'Social Indicators':'2-Labor Markets',
    'Economic Activity':'3-Economic Activity',
    'Producer Price Index':'4-Prices',
    'Consumer Prices':'4-Prices',
    'Prices':'4-Prices',
    'Balance of Payments':'5-Balance of Payments',
    'Exchange Rates':'6-Exchange Rates',
    'Financial':'7-Interest Rates',
    'Financial Market Prices': '8-Equities',
    'Financial Indicators': '8-Equities',
    'Commodity Prices': '9-Commodities',
    'External Trade': '10-Trade'
}
df['Group'] = df['Category'].map(Group_dict)
df.index.name = '@INDICATOR'
df = df.reset_index().set_index('Combined')
df['Type'] = 'line'
try:
    df.loc[df['Group'].str.contains('1|5').tolist(), 'Type'] = 'bar'
except:
    pass
df['Source'] = 'IFS'
df['Region'] = df['@REF_AREA'].map(cty_reg_map['region'])
df = df[df['Category'] != 'Non-energy']

In [11]:
df = df.reset_index().sort_values(['Region', 'Country', 'Group'])
keep_list = ['Region', 'Country', 'Group', 'Combined', 
             'Chart_Title', 'Y_Label', 'Type', 'Source']
df[keep_list].to_csv('IFS_Series_List.csv', index=False)