In [1]:
import os
import sys

import shutil
import urllib.request as request
from contextlib import closing

from pandasdmx import Request
import xmltodict

from datetime import date

import pandas as pd


In [2]:
def check_exists(path):

    return os.path.exists(path)


def check_or_create_dir(directory):

    if not check_exists(directory):
        os.makedirs(directory)
        
        
def find_from(s, first):

    try:
        start = s.index(first) + len(first)

        return s[start:]

    except Exception as e:
        print(e)
        
        
def copy_url_to_file(url, tmp_file):

    try:

        with closing(request.urlopen(url)) as r:

            with open(tmp_file, 'wb') as f:
                shutil.copyfileobj(r, f)

    except Exception as e:
        print(e)

In [3]:
def apply_dsd(df, dsd_url):
    copy_url_to_file(dsd_url, '_temp')

    with open('_temp') as f:
        dsd = xmltodict.parse(f.read().replace('&', '&'))

    os.remove('_temp')

    for i, col in enumerate(df.columns):

        if col not in ['Time Period', 0]:
            dic_val = {}

            ls_codes = dsd['message:Structure']['message:CodeLists']['CodeList'][i]['Code']
            ls_codes = ls_codes if type(ls_codes) is list else [ls_codes]

            for code in ls_codes:
                dic_val[code['@value']] = code['Description'][0]['#text']

            df = df.replace({col: dic_val})

    return df

In [4]:
# http://stat.data.abs.gov.au/Index.aspx

ls_query = []

series_cat = "Economy"
dtype_cat = "Business Indicators"
dtype_subcat = "Business Indicators"
dtype_desc = "Inventories, Current Prices and Chain Volume Measures"
dtype_abb = "QBIS"
res_url = "http://stat.data.abs.gov.au/restsdmx/sdmx.ashx/GetData/QBIS/50+70.B+C+D+F+G+H+TOTAL.0.99.10+20+30.Q/"
dsd_url = "http://stat.data.abs.gov.au/restsdmx/sdmx.ashx/GetDataStructure/QBIS"
ls_query.append([series_cat, dtype_cat, dtype_subcat, dtype_desc, dtype_abb, res_url, dsd_url])

series_cat = "Health"
dtype_cat = "Health Conditions and Risk Factors"
dtype_subcat = "Apparent Consumption of Alcohol, Australia"
dtype_desc = "Pure Alcohol Available for Consumption"
dtype_abb = "ALC"
res_url = "http://stat.data.abs.gov.au/restsdmx/sdmx.ashx/GetData/ALC/1.1+2.1+2+3+5+4.6+10+11+12+15+14.A/"
dsd_url = "http://stat.data.abs.gov.au/restsdmx/sdmx.ashx/GetDataStructure/ALC"
ls_query.append([series_cat, dtype_cat, dtype_subcat, dtype_desc, dtype_abb, res_url, dsd_url])


df_query = pd.DataFrame(ls_query, columns=['series_cat', 'dtype_cat', 'dtype_subcat', 'dtype_desc', 'dtype_abb', 'res_url', 'dsd_url'])

In [5]:
for index, row in df_query.iterrows():

    agency_code = 'ABS'

    ABS = Request(agency_code)
    ABS.client.config['timeout'] = 100000

    res_url = row['dtype_abb'] + find_from(row['res_url'], row['dtype_abb'])

    data_response = ABS.data(resource_id=res_url)

    df = data_response.write().unstack().reset_index()
    df = apply_dsd(df, row['dsd_url'])
    
    data_file = row['dtype_abb'] + ".csv.gz"
    
    df.to_csv(data_file, sep=';', quotechar='"', date_format='%Y-%m-%d %H:%M:%S.%f', compression='gzip', index=False)