In [83]:
import urllib.request, json 
import pandas as pd

In [84]:
#Create Api call example
dataset_identifier="ICT_HH2"
filter_dict={"Country":["FRA"],
             "Indicator":[""], #Take all the indicators
             "Breakdown":[""]}
agency_name="all"
params={"detail":"dataonly"}

In [89]:
def makeAPICall(dataset_identifier,
                filter_dict,
                agency_name,
                params={},
                OECD_BASE_URL="https://stats.oecd.org/SDMX-JSON/data/"):
    # See https://data.oecd.org/api/sdmx-json-documentation/#d.en.330346
    """
    Create the url for the OECD API
    Arguments:
        dataset_identifier: string containing the name of the dataset
        filter_dict: dictionary containing the value of the variables to query (Country can be only one)
        agency_name: agency name to pass to the api  
    """

    filter_expression=""
    filter_list=[]

    if all(filter_dict.values())=="":
        filter_expression="all"
    else:
        for dim in filter_dict.keys():
            filter_list.append("+".join(filter_dict[dim]))
    filter_expression=".".join(filter_list)
    
    api_string=f"{OECD_BASE_URL}{dataset_identifier}/{filter_expression}/{agency_name}"
    if len(params)>0:
        api_string+="?"
        for par in params.keys():
            api_string+=(par+'='+params[par])

    return api_string

In [118]:
#json_url=makeAPICall(dataset_identifier,filter_dict,agency_name,params)
#print(json_url)

In [119]:
def getData(json_url):
    "Return a dict with the data"
    
    with urllib.request.urlopen(json_url) as url:
        data = json.loads(url.read().decode())
        #print(data)
    return data

In [120]:
def convertToPandas(data):
    col_names=[x["id"] for x in data["structure"]["dimensions"]["observation"][0]["values"] ]
    #print(col_names)
     
    dataset_dimensions = data["structure"]["dimensions"]["series"]
    print(f"Dataset dimensions:\t{len(dataset_dimensions)}")
    
    country = dataset_dimensions[0]["values"][0]["id"]
    print(f"Country:\t{country}\n")
    
    indicators=[x["id"] for x in dataset_dimensions[1]["values"]]
    print(f"Indicators:\t{indicators}")
    print(f"N indicators = {len(indicators)}\n")
    
    breakdowns=[x["id"] for x in dataset_dimensions[2]["values"]]
    print(f"Breakdowns:\t{breakdowns}")
    print(f"N breakdowns = {len(breakdowns)}")
    
    df_ind_list=[]
    
    for k in data["dataSets"][0]["series"].keys():
        ind = int(k.split(':')[1])
        brkd = int(k.split(':')[2])
        df = pd.DataFrame(data["dataSets"][0]["series"][k]['observations'])
        df["Var"]=indicators[ind]
        df["Brkd"]=breakdowns[brkd]
        df.set_index(["Var","Brkd"],inplace=True)
        df_ind_list.append(df)
        
    
    df_res=pd.concat(df_ind_list,sort=False)
    df_res.columns = col_names
    
    df_res.reset_index(inplace=True)
    df_res["Country"]=country
    
    return df_res.set_index(["Country","Var","Brkd"])

In [127]:
#convertToPandas(data)

### Create final dataset

In [135]:
def createMultipleCountryDataset(country_list):
    """
    Create a dataset with multiple countries
    """

    df_to_concat = []

    for country in country_list:
        filter_dict["Country"] = [country]
        json_url=makeAPICall(dataset_identifier,filter_dict,agency_name,params)
        print(json_url)
        data=getData(json_url)
        df_to_concat.append(convertToPandas(data))
    
    res = pd.concat(df_to_concat,sort=False)
    res.columns = sorted(res.columns)
    
    return res

In [136]:
country_list = ["DEU","FRA","AUS","BEL","ITA"]
df_final = createMultipleCountryDataset(country_list)

https://stats.oecd.org/SDMX-JSON/data/ICT_HH2/DEU../all?detail=dataonly
Dataset dimensions:	3
Country:	DEU

Indicators:	['A1', 'B1', 'B21', 'B21A', 'B21B', 'C2A', 'C2B', 'C5A', 'C5B', 'C5B1', 'D1A', 'D1B', 'D1E', 'D1F', 'D1G', 'D1H', 'D1I', 'D1J', 'D1K', 'D1M', 'D1N', 'D1O', 'D1P', 'D1Q', 'D1R', 'D1T', 'D1U', 'F1', 'F1A', 'F1B', 'F1C', 'G1', 'G2A', 'G2B', 'G2C', 'G2G', 'G2H', 'G2I', 'G2J', 'G2K', 'G2M', 'G2N', 'G2O', 'H1A', 'H1B', 'H1C', 'H1D', 'H1E', 'H1F', 'H1G', 'H1H', 'H1I', 'H1J', 'H1K', 'H1L', 'H1M', 'I3', 'I3A', 'I3B', 'I3C', 'I3D', 'F4G', 'F4B', 'I6', 'I7C', 'I8', 'F4F', 'I7B', 'F4J', 'F4A', 'I9', 'I7A', 'D1W', 'G2DBIS', 'C6E']
N indicators = 75

Breakdowns:	['HH_TOTAL', 'HH_Q1', 'HH_Q2', 'HH_Q3', 'HH_Q4', 'IND_TOTAL', 'I_HI', 'I_ME', 'I_LO', 'Y16_24', 'Y16_24HI', 'Y16_24ME', 'Y16_24LO', 'Y25_54', 'Y25_54HI', 'Y25_54ME', 'Y25_54LO', 'Y55_74', 'Y55_74HI', 'Y55_74ME', 'Y55_74LO', 'Y75_MAX', 'F_Y16_74', 'F_HI', 'F_ME', 'F_LO', 'F_Y16_24', 'F_Y25_54', 'F_Y55_74', 'M_Y16_74', 'M_HI'

### Write csv

In [125]:
df_final.to_csv("data.csv",header=True)