In [1]:
import requests
import json
import pandas as pd
import numpy as np


In [21]:
url ='http://stat.abs.gov.au/itt/query.jsp'
dataid = 'SEIFA_POA'

In [22]:
r1 = requests.get(url, params=dict(method='GetDatasetConcepts',
                                   datasetid=dataid))

In [23]:
r1.json()

{u'concepts': [u'MEASURE', u'INDEX_TYPE', u'POA'],
 u'copyright': u'ABS (c) copyright Commonwealth of Australia 2015. Retrieved on 23/04/2015 at 16:57'}

In [24]:
r2 = requests.get(url, params=dict(method='GetCodeListValue', 
                                   concept='INDEX_TYPE',
                                   datasetid=dataid, format='json'))

In [25]:
r2.json()

{u'codes': [{u'code': u'IEO',
   u'description': u'Index of Education and Occupation',
   u'parentCode': u'',
   u'parentDescription': u''},
  {u'code': u'IER',
   u'description': u'Index of Economic Resources',
   u'parentCode': u'',
   u'parentDescription': u''},
  {u'code': u'IRSAD',
   u'description': u'Index of Relative Socio-economic Advantage and Disadvantage',
   u'parentCode': u'',
   u'parentDescription': u''},
  {u'code': u'IRSD',
   u'description': u'Index of Relative Socio-economic Disadvantage',
   u'parentCode': u'',
   u'parentDescription': u''}],
 u'copyright': u'ABS (c) copyright Commonwealth of Australia 2015. Retrieved on 23/04/2015 at 16:57'}

In [26]:
r2 = requests.get(url, params=dict(method='GetCodeListValue', 
                                   concept='MEASURE',
                                   datasetid=dataid, format='json'))

r2.json()

{u'codes': [{u'code': u'MAXS',
   u'description': u'Maximum score for SA1s in area',
   u'parentCode': u'',
   u'parentDescription': u''},
  {u'code': u'MINS',
   u'description': u'Minimum score for SA1s in area',
   u'parentCode': u'',
   u'parentDescription': u''},
  {u'code': u'RWA',
   u'description': u'RANK WITHIN AUSTRALIA',
   u'parentCode': u'',
   u'parentDescription': u''},
  {u'code': u'RWAD',
   u'description': u'Rank within Australia - Decile',
   u'parentCode': u'RWA',
   u'parentDescription': u'RANK WITHIN AUSTRALIA'},
  {u'code': u'RWAP',
   u'description': u'Rank within Australia - Percentile',
   u'parentCode': u'RWA',
   u'parentDescription': u'RANK WITHIN AUSTRALIA'},
  {u'code': u'RWAR',
   u'description': u'Rank within Australia',
   u'parentCode': u'RWA',
   u'parentDescription': u'RANK WITHIN AUSTRALIA'},
  {u'code': u'RWSD',
   u'description': u'Rank within State or Territory - Decile',
   u'parentCode': u'RWST',
   u'parentDescription': u'RANK WITHIN STATE AND

In [27]:
r3 = requests.get(url, params={'method':'GetGenericData','datasetid':dataid,
                       'format':'json', 'and':'INDEX_TYPE.IRSD'})

In [28]:
r3.json()

{u'copyright': u'ABS (c) copyright Commonwealth of Australia 2015. Retrieved on 23/04/2015 at 16:58',
 u'series': [{u'concepts': [{u'Value': u'800', u'name': u'POA'},
    {u'Value': u'IRSD', u'name': u'INDEX_TYPE'},
    {u'Value': u'SCORE', u'name': u'MEASURE'}],
   u'observations': [{u'Time': u'2011', u'Value': u'1060'}]},
  {u'concepts': [{u'Value': u'800', u'name': u'POA'},
    {u'Value': u'IRSD', u'name': u'INDEX_TYPE'},
    {u'Value': u'RWAR', u'name': u'MEASURE'}],
   u'observations': [{u'Time': u'2011', u'Value': u'2053'}]},
  {u'concepts': [{u'Value': u'800', u'name': u'POA'},
    {u'Value': u'IRSD', u'name': u'INDEX_TYPE'},
    {u'Value': u'RWAD', u'name': u'MEASURE'}],
   u'observations': [{u'Time': u'2011', u'Value': u'9'}]},
  {u'concepts': [{u'Value': u'800', u'name': u'POA'},
    {u'Value': u'IRSD', u'name': u'INDEX_TYPE'},
    {u'Value': u'RWAP', u'name': u'MEASURE'}],
   u'observations': [{u'Time': u'2011', u'Value': u'83'}]},
  {u'concepts': [{u'Value': u'800', u'name'

In [42]:
def abs_to_pandas(abs_data):
    """
    Convert the results from an abs data request into a pandas data-frame.
    
    :param abs_data: Raw data returned from a request to the abs TTL interface with format 'json' specified.
    :returns: A Pandas Data Frame with one row per concept configuration
    """
    data = abs_data['series']
    # Examine the first element to find out what the headings should be:
    # Lets make empty lists which we can fill up as we go.
    headings = {}
    for c in data[0]['concepts']:
        headings[c['name']] = []
    
    # Now examine the observations, construct appropriate headings here too.
    for o in data[0]['observations']:
        headings[o['Time']] = []
            
    for datum in data:
        for c in datum['concepts']:
            headings[c['name']].append(c['Value'])
                         
        for o in datum['observations']:
            headings[o['Time']].append(o['Value'])
        if len(datum['observations']) == 0:
            headings['2011'].append(np.NaN)
    return pd.DataFrame(headings)

In [43]:
df = abs_to_pandas(r3.json())

In [44]:
df

Unnamed: 0,2011,INDEX_TYPE,MEASURE,POA
0,1060,IRSD,SCORE,800
1,2053,IRSD,RWAR,800
2,9,IRSD,RWAD,800
3,83,IRSD,RWAP,800
4,25,IRSD,RWSR,800
5,9,IRSD,RWSD,800
6,,IRSD,RWSP,800
7,1032,IRSD,MINS,800
8,1127,IRSD,MAXS,800
9,0,IRSD,URP,800


In [37]:
len(df['MEASURE'])

24810

In [41]:
len(df['2011'])

24810