In [3]:
import urllib
from collections import OrderedDict
from bs4 import BeautifulSoup

In [5]:
# build function to extract mapping from data structure
idd_structure=BeautifulSoup(urllib.urlopen('http://stats.oecd.org/restsdmx/sdmx.ashx/GetDataStructure/IDD'))

In [6]:
def extract_mapping(id_value, xml=idd_structure.html.body):
    """
    stats.oecd.org sends back results in the same order as the
    structure specified in XML body, so we need to use
    an ordered dictionary to preserve the ordering
    """
    output = OrderedDict()
    subtree = xml.find(id=id_value)
    for x in subtree.find_all('code'):
        output[x['value']] = x.find(attrs={'xml:lang':'en'}).text
    
    return output

In [7]:
"""
the 'keyfamily' tags in the structure contain the id's of all the dimension-data
<KeyFamily id="IDD" agencyID="OECD">
    <Name xml:lang="en">Income Distribution and Poverty</Name>
    <Name xml:lang="fr">Distribution des revenus et pauvreté</Name>
    <Components>
        <Dimension codelist="CL_IDD_LOCATION" conceptRef="LOCATION"/>
        <Dimension codelist="CL_IDD_MEASURE" conceptRef="MEASURE" isMeasureDimension="true"/>
        <Dimension codelist="CL_IDD_AGE" conceptRef="AGE"/>
        <Dimension codelist="CL_IDD_DEFINITION" conceptRef="DEFINITION"/>
        <Dimension codelist="CL_IDD_METHODO" conceptRef="METHODO"/>
        <TimeDimension codelist="CL_IDD_TIME" conceptRef="TIME"/>
        ...
    </Components>
...
</KeyFamily>

We can then search the dom for valid dimensions values via
dom.find_all(id=dimension['codelist'])
"""
dimension_maps = {}
for x in idd_structure.html.body.keyfamily.components.find_all('dimension'):
    dimension_maps[x['conceptref'].lower()]=extract_mapping(x['codelist'])

""" build API query:
The format is 

LOCATION.MEASURE.AGE.DEFINITION.METHODO

where each dimension is '+' separated

For the most part, we're interest all locations,
measures, ages, and methods, and just the 'CURRENT'
dimension...
age and definition only return one value...
"""
dimensions = ['location', 'measure', 'age', 'definition', 'methodo']
dimension_filter = []
for dimension in dimensions:
    dimension_map = dimension_maps[dimension]
    dimension_filter.append('+'.join(dimension_map.keys()))

dimension_filter = '.'.join(dimension_filter)

In [8]:
print dimension_filter

AUS+AUT+BEL+CAN+CZE+DNK+FIN+FRA+DEU+GRC+HUN+ISL+IRL+ITA+JPN+KOR+LUX+MEX+NLD+NZL+NOR+POL+PRT+SVK+ESP+SWE+CHE+TUR+GBR+USA+CHL+EST+ISR+RUS+NMEC+SVN.GINI+INEQ+CPI2010+REF+PVTAA1+PVTAATOTAL+PVT+ECTOTAL+INCCTOTAL+INC_CRT+TRRSSCTOTAL+TRRCTOTAL+SECTOTAL+SEICTOTAL+TACTOTAL+TRPCTOTAL+INCAC1+INCACTOTAL+INCAC2+TRPERCTOTAL+OCCTOTAL+TRRERCTOTAL+KICTOTAL+PVTAA2+PPPPRC+STDG+GINIB+PVTAA3+KCTOTAL+TRROTCTOTAL+TRPOTCTOTAL+INCAC3+INCAC4+TRCTOTAL+PVTAA4+GINIG+MEDIANC+PVTAA5+INCAC5+INCAC6+PVTAA6+PALMA+PVTAA7+INCAC7+INCHCTOTAL+PVT5B+PVTBHTOTAL+PVT5A+PMEAN5A+PMED5A+PVTAHTOTAL+PVT6B+PVT6A+PMEAN6A+PMED6A+P90P10+P90P50+P50P10+S80S20+S90S10+IND+POP+SHA1+SHA2+SHA3+SHA4+SHA5+SHA6+SHA7+HHD.TOT+WA+OLD.CURRENT+PREVIOUS+INCOMPARABLE.METH2012+METH2011
