In [None]:
import urllib.request, json 
import pandas as pd

In [None]:
#Create Api call example
#dataset_identifier="ICT_HH2"
#filter_dict={"Country":["FRA"],
#             "Indicator":[""], #Take all the indicators
#             "Breakdown":[""]} #Take all the breakdowns
#agency_name="all"
#params={"detail":"dataonly"}

In [None]:
def makeAPICall(dataset_identifier,
                filter_dict,
                agency_name,
                params={},
                OECD_BASE_URL="https://stats.oecd.org/SDMX-JSON/data/"):
    # See https://data.oecd.org/api/sdmx-json-documentation/#d.en.330346
    """
    Create the url for the OECD API
    Arguments:
        dataset_identifier: string containing the name of the dataset
        filter_dict: dictionary containing the value of the variables to query (Country can be only one)
        agency_name: agency name to pass to the api  
    """

    filter_expression=""
    filter_list=[]

    if all(filter_dict.values())=="":
        filter_expression="all"
    else:
        for dim in filter_dict.keys():
            filter_list.append("+".join(filter_dict[dim]))
    filter_expression=".".join(filter_list)
    
    api_string=f"{OECD_BASE_URL}{dataset_identifier}/{filter_expression}/{agency_name}"
    if len(params)>0:
        api_string+="?"
        for par in params.keys():
            api_string+=(par+'='+params[par])

    return api_string

In [None]:
#json_url=makeAPICall(dataset_identifier,filter_dict,agency_name,params)
#print(json_url)

In [None]:
def getData(json_url):
    "Return a dict with the data"
    
    with urllib.request.urlopen(json_url) as url:
        data = json.loads(url.read().decode())
        #print(data)
    return data

In [None]:
def convertToPandas(data):
    col_names=[x["id"] for x in data["structure"]["dimensions"]["observation"][0]["values"] ]
    #print(col_names)
     
    dataset_dimensions = data["structure"]["dimensions"]["series"]
    print(f"Dataset dimensions:\t{len(dataset_dimensions)}")
    
    country = dataset_dimensions[0]["values"][0]["id"]
    print(f"Country:\t{country}\n")
    
    indicators=[x["id"] for x in dataset_dimensions[1]["values"]]
    print(f"Indicators:\t{indicators}")
    print(f"N indicators = {len(indicators)}\n")
    
    breakdowns=[x["id"] for x in dataset_dimensions[2]["values"]]
    print(f"Breakdowns:\t{breakdowns}")
    print(f"N breakdowns = {len(breakdowns)}")
    
    df_ind_list=[]
    
    for k in data["dataSets"][0]["series"].keys():
        ind = int(k.split(':')[1])
        brkd = int(k.split(':')[2])
        df = pd.DataFrame(data["dataSets"][0]["series"][k]['observations'])
        df["Var"]=indicators[ind]
        df["Brkd"]=breakdowns[brkd]
        df.set_index(["Var","Brkd"],inplace=True)
        df_ind_list.append(df)
        
    
    df_res=pd.concat(df_ind_list,sort=False)
    df_res.columns = col_names
    
    df_res.reset_index(inplace=True)
    df_res["Country"]=country
    
    return df_res.set_index(["Country","Var","Brkd"])

In [None]:
#convertToPandas(data)

### Create final dataset

In [None]:
def createMultipleCountryDataset(country_list,
                                 dataset_identifier="ICT_HH2",
                                 indicator_list=[""],
                                 breakdown_list=[""],
                                 agency_name="all",                #do not touch
                                 params={"detail":"dataonly"}      #do not touch
                                ):
    """
    Create a dataset with multiple countries
    """

    filter_dict={
        "Country":[""], #leave it empty it will be overwritten by country list
        "Indicator":indicator_list,
        "Breakdown":breakdown_list
    }
    
    df_to_concat = []

    for country in country_list:
        filter_dict["Country"] = [country]
        json_url=makeAPICall(dataset_identifier,filter_dict,agency_name,params)
        print(json_url)
        data=getData(json_url)
        df_to_concat.append(convertToPandas(data))
    
    res = pd.concat(df_to_concat,sort=False)
    res.columns = sorted(res.columns)
    
    return res

### Multiple breakdown and indicator list is not working properly (quick workaround using loop)

In [None]:
# Specify here your list of indicators and breakdown 
country_list = ["DEU","ITA","FRA","AUS"]
indicator_list = ["A1","B1"]
breakdown_list = ["HH_TOTAL"]

df_list = []

for cou in country_list:
    for ind in indicator_list:
        for bkd in breakdown_list:
            try:
                df = createMultipleCountryDataset(
                        country_list = [cou], 
                        indicator_list = [ind],
                        breakdown_list = [bkd]
                )                
                df_list.append(df)
            except:
                raise ValueError(f"One of {(cou,ind,bkd)} not found")


# Union the various dataframes
df_final = pd.concat(df_list,sort=True)

In [None]:
df_final

### Write csv

In [None]:
df_final.to_csv("data.csv",header=True)