### This is the code to merge all datasets to create a unique dataset with all  information. 
#### References: 



#### Source of data
- Previous import scripts
- Brazilian Ibovespa Index (B3) - Historical data (mannually collected from Yahoo Finance) - YAHOO Finance. (2024). Dataset IBOVESPA - Stock Historical Prices & Data [dataset]. https://finance.yahoo.com/quote/%5EBVSP/history/

#### Packages
- Package Pandas (2.2). (2024). [Python]. https://pandas.pydata.org/

In [1]:
# Importing the packages
import pandas as pd


In [6]:
# Let's import the data using the exported csv files

# Define the file paths
file_path_macro = '../data/df_macroeco.csv'
file_path_gdp = '../data/df_gdp_rate.csv'
file_path_inflation = '../data/df_inflation.csv'
file_path_interest = '../data/df_interest_rate.csv'
file_path_commodities = '../data/df_commodities.csv'
file_path_budget = '../data/df_budget.csv'
file_path_focus = '../data/df_FOCUS.csv'
file_path_currencies = '../data/df_currencies.csv'
file_path_revenue = '../data/df_revenue_FBCF.csv'
file_path_ibov = '../data/IBOV_historical.csv'

# Import the data
df_macro_monthly = pd.read_csv(file_path_macro, parse_dates=['Date'], index_col='Date') # DF with brazilian macro data
df_gdp_monthly = pd.read_csv(file_path_gdp, parse_dates=['Date'], index_col='Date') # DF with GDP data from other countries
df_inflation_rotated_monthly = pd.read_csv(file_path_inflation, parse_dates=['Date'], index_col='Date') # DF with inflation data from other countries
df_interest_rotated_monthly = pd.read_csv(file_path_interest, parse_dates=['Date'], index_col='Date') # DF with interest rate data from other countries
df_commodities = pd.read_csv(file_path_commodities, parse_dates=['Date'], index_col='Date') # DF with commodities historical prices
df_budget = pd.read_csv(file_path_budget, parse_dates=['Date'], index_col='Date') # DF with brazilian budget data
df_focus_indicators_monthly = pd.read_csv(file_path_focus, parse_dates=['Date'], index_col='Date')# DF with market expectations indicators from Brazilian Central Bank
df_currencies_monthly = pd.read_csv(file_path_currencies, parse_dates=['Date'], index_col='Date')# DF with currencies historical prices - USD, EUR and CHN
df_revenue =  pd.read_csv(file_path_revenue, parse_dates=['Date'], index_col='Date')
df_ibov =  pd.read_csv(file_path_ibov, parse_dates=['Date'], index_col='Date')

In [None]:
# Use this to import the data from py scripts

# from import_budget_data import df_budget
# from import_oecd_api import df_gdp_monthly, df_inflation_rotated_monthly, df_interest_rotated_monthly
# from import_bcb_api import df_macro_monthly
# from import_bcb_currencies import df_currencies_monthly
# from import_bcb_focus import df_focus_indicators_monthly
# from import_imf_api import df_commodities
# from import_ipea_api import df_revenue

In [7]:
# List of dataframes
dataframes = [df_macro_monthly, # DF with brazilian macro data
              df_gdp_monthly, # DF with GDP data from other countries
              df_inflation_rotated_monthly, # DF with inflation data from other countries
              df_interest_rotated_monthly, # DF with interest rate data from other countries
              df_commodities, # DF with commodities historical prices
              df_budget, # DF with brazilian budget data
              df_focus_indicators_monthly, # DF with market expectations indicators from Brazilian Central Bank
              df_currencies_monthly,# DF with currencies historical prices - USD, EUR and CHN
              df_revenue,# DF with brazilian total revenue for federal government 
              df_ibov] # DF with IBOV historical prices

In [8]:
# Create a new merged DF with all dataframes
merged_df = df_macro_monthly # the merged df begins with the dataframe which contains the target variable

#loop to merge dataframes
for df in dataframes:
    df.rename_axis('Date', inplace=True) #rename index
    if df is not df_macro_monthly:
        merged_df = pd.merge(merged_df, df, on='Date', how='left') #merge dataframes

In [9]:
# Convert all columns to float
cleaned_df = merged_df.astype(float).copy()

# Fill NaN values interpollating the values of the column
cleaned_df.interpolate(limit_area='inside', inplace = True)

# Fill NaN values in the beggining of each time series with the next value of the column
cleaned_df.bfill(limit=None, inplace = True)

# Drop rows which index is bigger than 2024-01-01. 
cleaned_df = cleaned_df[cleaned_df.index < '2024-01-01']

# Fill NaN values in the beggining of each time series with the next value of the column
cleaned_df.ffill(limit=None, inplace = True)



In [7]:
# Export merged df to csv
cleaned_df.to_csv('../data/cleaned_df.csv')