### This is the code to retrieve economic parameter information (inflation, interest rate and GDP) from OECD database.
#### References: 
- Bagal, A. (2023, October 6). OECD Stats Website: Your Go-To for Comprehensive Statistics datasets on OECD Countries. Medium. https://medium.com/@akshaybagal/oecd-stats-website-your-go-to-for-comprehensive-statistics-datasets-on-oecd-countries-2ae04e4aa044. 
Contribution: The basic logic to retrieve data from OECD.

#### Data sources:
- OECD. (2024). OECD Statistics [dataset]. https://stats.oecd.org/

#### Libraries:
- Reitz, K. (2023). Package Requests (2.31.0) [Python; OS Independent]. https://requests.readthedocs.io
- Package Pandas (2.2). (2024). [Python]. https://pandas.pydata.org/

In [1]:
# Importing libraries
import requests
import pandas as pd
import useful_functions as uf
import xml.etree.ElementTree as ET

In [2]:
# Define the last month to get data
end_period = uf.define_end_period('%Y-%m')


In [None]:
#Get the inflation data from OECD

#Let's set a URL to get inflation from Canada, Chile, UK, USA, Euro Area, Argentina and China from 2001 to the last available month
url = f'https://stats.oecd.org/SDMX-JSON/data/PRICES_CPI/CAN+CHL+GBR+USA+EA20+ARG+CHN.CPALTT01.GP.M/all?startTime=2001-01&endTime={end_period}&dimensionAtObservation=allDimensions'

# Get the data from the URL
response = requests.request("GET", url)
# Check if the request was successful
data = response.json()
# Subset the data to get the values
data_values = data['dataSets'][0]['observations']
# Subset the data to get the dimensions
dimensions = data['structure']['dimensions']['observation']
# Get the id to name mappings
id_to_name_mappings = {
    dim['name']: {item['id']: item['name'] for item in dim['values']}
    for dim in dimensions
}
# Get the values of the dimensions
dimension_values = [dim['values'] for dim in dimensions]

# Function to get the id from the index
def get_id_from_index(dim_index, index):
    return dimension_values[dim_index][index]['id']

# Function to map the id to the name
def map_id_to_name(dim_name, id):
    return id_to_name_mappings[dim_name].get(id, id)

rows = []
# Iterate over the data values to get the values for each country
for key, value in data_values.items():
    indices = key.split(':')  # Split keys into separate dimension indices
    country = map_id_to_name('Country', get_id_from_index(0, int(indices[0])))
    subject = map_id_to_name('Subject', get_id_from_index(1, int(indices[1])))
    measure = map_id_to_name('Measure', get_id_from_index(2, int(indices[2]))) 
    frequency = map_id_to_name('Frequency', get_id_from_index(3, int(indices[3])))
    time = map_id_to_name('Time', get_id_from_index(4, int(indices[4])))
    data_value = value[0]  # Extract the data value
    rows.append([country, subject, measure, frequency, time, data_value])  # Append the row to the list of rows

# Create a dataframe from the list of rows
df_inflation = pd.DataFrame(rows, columns=['Country', 'Subject', 'Measure', 'Frequency', 'Time', 'Value'])
# Convert the time column to datetime
df_inflation['Date'] = pd.to_datetime(df_inflation['Time'], format='%b-%Y').dt.strftime('%Y-%m-%d')
# Pivot the dataframe to have the countries as columns
df_inflation_rotated = df_inflation.pivot(index='Date', columns=['Country'], values='Value')
# Rename the columns
df_inflation_rotated = df_inflation_rotated.rename(columns={
    'Argentina': 'WD_ARG_inflation', # WD stands for World Data
    'Canada': 'WD_CAN_inflation',
    'Chile': 'WD_CHI_inflation',
    "China (People's Republic of)": 'WD_CHN_inflation',
    'Euro area (20 countries)': 'WD_EUZ_inflation',
    'United Kingdom': 'WD_UK_inflation',
    'United States': 'WD_US_inflation'
})




In [4]:
# Convert the dataset to monthly frequency getting the last valid value of each month
df_inflation_rotated_monthly = uf.convert_df_to_monthly(df_inflation_rotated,'%Y-%m-%d')

#Export to csv
df_inflation_rotated_monthly.to_csv('../data/df_inflation.csv')

In [5]:
#This code is to get the interest rates from OECD

# Set the URL to get the interest rates from Canada, Chile, UK, USA, Euro Area, Argentina and China from 2001 to the last available month
url = f'https://stats.oecd.org/SDMX-JSON/data/MEI_FIN/IRSTCI.CAN+CHL+GBR+USA+EA19+ARG+CHN.M/all?startTime=2001-01&endTime{end_period}2&dimensionAtObservation=allDimensions'
# Get the data from the URL
response = requests.request("GET", url)
# read the data
data = response.json()
# Subset the data to get the values
data_values = data['dataSets'][0]['observations']
# Subset the data to get the dimensions
dimensions = data['structure']['dimensions']['observation']
# Get the id to name mappings
id_to_name_mappings = {
    dim['name']: {item['id']: item['name'] for item in dim['values']}
    for dim in dimensions
}
# Get the values of the dimensions
dimension_values = [dim['values'] for dim in dimensions]

# Function to get the id from the index
def get_id_from_index(dim_index, index):
    return dimension_values[dim_index][index]['id']

# Function to map the id to the name
def map_id_to_name(dim_name, id):
    return id_to_name_mappings[dim_name].get(id, id)

# Create a list to store the extracted rows
rows = []
# Iterate over the data values to get the values for each country
for key, value in data_values.items():
    indices = key.split(':')  # Split keys into separate dimension indices
    subject = map_id_to_name('Subject', get_id_from_index(0, int(indices[0])))
    country = map_id_to_name('Country', get_id_from_index(1, int(indices[1])))
    frequency = map_id_to_name('Frequency', get_id_from_index(2, int(indices[2])))
    time = map_id_to_name('Time', get_id_from_index(3, int(indices[3])))
    data_value = value[0]  # Extract the data value
    rows.append([country, subject, frequency, time, data_value])  # Append the row to the list of rows

# Create a dataframe from the list of rows
df_interest = pd.DataFrame(rows, columns=['Country', 'Subject', 'Frequency', 'Time', 'Value'])
# Convert the time column to datetime
df_interest['Date'] = pd.to_datetime(df_interest['Time'], format='%b-%Y').dt.strftime('%Y-%m-%d')
# Subset the dataframe to get only the immediate interest rates
df_interest = df_interest[df_interest['Subject'].str.contains('Immediate')]
# Set the time column as index
df_interest.set_index('Date', inplace=True)
# Sort the index
df_interest.sort_index(inplace=True)
# Pivot the dataframe to have the countries as columns
df_interest_rotated = df_interest.pivot(columns=['Country'], values='Value')
# Rename the columns
df_interest_rotated = df_interest_rotated.rename(columns={
    'Canada': 'WD_CAN_interest', # WD stands for World Data
    'Chile': 'WD_CHI_interest',
    "China (People's Republic of)": 'WD_CHN_interest',
    'Euro area (20 countries)': 'WD_EUZ_interest',
    'United Kingdom': 'WD_UK_interest',
    'United States': 'WD_US_interest'
})

In [6]:
# Convert the dataset to monthly frequency getting the last valid value of each month
df_interest_rotated_monthly = uf.convert_df_to_monthly(df_interest_rotated,'%Y-%m-%d')
# Export to csv
df_interest_rotated_monthly.to_csv('../data/df_interest_rate.csv')

In [7]:
#Get the GDP data from OECD
# Set the URL to get the GDP from China, UK and USA from 2001 to the last available month
url = f'https://sdmx.oecd.org/public/rest/data/OECD.SDD.STES,DSD_KEI@DF_KEI,4.0/CHN+USA+GBR.M.RS.IX._T..?startPeriod=2001-01&endPeriod={end_period}&dimensionAtObservation=AllDimensions'
# Get the data from the URL
response = requests.request("GET", url)
xml_data = response.text
# Parse the XML data
root = ET.fromstring(xml_data)

# Define a function to extract data from each observation
def extract_observation(observation):
    # Extract the observation key, value and attributes
    obs_key = observation.find('.//generic:ObsKey', namespaces=NAMESPACES)
    obs_value = observation.find('.//generic:ObsValue', namespaces=NAMESPACES)
    attributes = observation.findall('.//generic:Attributes/generic:Value', namespaces=NAMESPACES)

    # Create a dictionary to store the data
    data = {
        'TIME_PERIOD': obs_key.find('.//generic:Value[@id="TIME_PERIOD"]', namespaces=NAMESPACES).get('value'),
        'COUNTRY': obs_key.find('.//generic:Value[@id="REF_AREA"]', namespaces=NAMESPACES).get('value'),
        'VALUE': obs_value.get('value'),
    }
    # For all attributes, add them to the data dictionary based on their id
    for attr in attributes:
        data[attr.get('id')] = attr.get('value')

    return data

# Define XML namespaces
NAMESPACES = {
    'generic': 'http://www.sdmx.org/resources/sdmxml/schemas/v2_1/data/generic',
}

# Extract observations from the XML
observations = root.findall('.//generic:Obs', namespaces=NAMESPACES)
# Convert data to a list of dictionaries
data_list = [extract_observation(observation) for observation in observations]
# Create a DataFrame from the list of dictionaries
df = pd.DataFrame(data_list)
# Copy some columns to a new dataframe
df_gdp_raw = df[['TIME_PERIOD', 'COUNTRY', 'VALUE']].copy()
# Convert the time column to datetime
df_gdp_raw['TIME_PERIOD'] = pd.to_datetime(df_gdp_raw['TIME_PERIOD'], format='%Y-%m').dt.strftime('%Y-%m-%d')
# Rename the columns
df_gdp_raw.rename(columns={'TIME_PERIOD': 'Date', 'COUNTRY': 'country', 'VALUE': 'value'}, inplace=True)
# Set the time column as index
df_gdp_raw.set_index('Date', inplace=True)
# Pivot the dataframe to have the countries as columns
df_gdp_rotated = df_gdp_raw.pivot(columns='country', values='value')
# Rename the columns
df_gdp = df_gdp_rotated.rename(columns={
    "CHN": 'WD_CHN_GDP', # WD stands for World Data
    'GBR': 'WD_UK_GDP',
    'USA': 'WD_US_GDP'
})

In [8]:
# Convert the dataset to monthly frequency getting the last valid value of each month
df_gdp_monthly = uf.convert_df_to_monthly(df_gdp,'%Y-%m-%d')

df_gdp_monthly.to_csv('../data/df_gdp_rate.csv')