In [1]:
import numpy as np
import pandas as pd
import os
import glob


In [2]:
# load and name each file by its file name in folder (without extension)
path = 'files/'
var_list = []
countries = []

for f in glob.glob(os.path.join(path, '*.csv')):
    variable_name = os.path.splitext(os.path.basename(f))[0]
    var_list.append(variable_name)
    globals()[variable_name] = pd.read_csv(f, parse_dates=['TIME'],index_col='TIME') # load all files separately, parse date column 'TIME' as index of datetime
    print(f'''{variable_name}
From {globals()[variable_name].index[0]} until {globals()[variable_name].index[-1]}''') # Check dates to ensure final df includes all indecies
    # create a list containing all countries from all files
    for country in globals()[variable_name].LOCATION:
        countries.append(country)
        countries = list(dict.fromkeys(countries))
    



CPI_M
From 1979-09-01 00:00:00 until 2023-05-01 00:00:00
CPI_Q
From 1979-10-01 00:00:00 until 2023-01-01 00:00:00
GDP_Q_PC_CHG_PP
From 1980-01-01 00:00:00 until 2023-01-01 00:00:00
GDP_Q_PC_CHG_PY
From 1980-01-01 00:00:00 until 2023-01-01 00:00:00
IPI_M
From 1979-12-01 00:00:00 until 2023-03-01 00:00:00
IPI_Q
From 1980-01-01 00:00:00 until 2023-01-01 00:00:00
LongTerm_M
From 1980-01-01 00:00:00 until 2023-05-01 00:00:00
LongTerm_Q
From 1979-10-01 00:00:00 until 2023-01-01 00:00:00
ShortTerm_M
From 1997-07-01 00:00:00 until 2023-05-01 00:00:00
ShortTerm_Q
From 1999-07-01 00:00:00 until 2023-01-01 00:00:00


In [3]:
# save formal variable description to use as main sheet name
variable_description = {}

for var in var_list:    
    variable_description[var] = str(globals()[var].iloc[0][1] + ' ' + globals()[var].iloc[0][2] + ' ' + globals()[var].iloc[0][3])

In [4]:
variable_description['CPI_M']

'CPI TOT IDX2015'

In [5]:
# transform with pivot and complete missing countries
for var in var_list:
    # pivot wider, columns=LOCATION, values=Value
    globals()[var] = globals()[var].pivot(columns='LOCATION', values='Value')
    # add missing countries to all variables
    for country in countries:
        if country in globals()[var].columns:
            continue
        else:
            globals()[var][country] = np.nan


In [6]:
# create dictionary of all missing dates for each country in each variable
all_missing_dates = {}

for var in var_list:
    all_missing_dates[var] = {}
    for country in countries:
        all_missing_dates[var][country] = globals()[var][country][globals()[var][country].isna()].index.tolist()

In [34]:
# create a dictionary with info about each variable - start date and end date
first_last_date = {}

for var in var_list:
    first_last_date[var] = {'index':['first', 'last']}
    for country in countries:
        first_last_date[var][country] = [globals()[var][country].first_valid_index(), globals()[var][country].last_valid_index()]        

In [48]:
for var in var_list:
    path = "%s.xlsx" % var
    main_sheet = variable_description[var]
    with pd.ExcelWriter(path) as writer:
        CPI_M.to_excel(writer, sheet_name=main_sheet, index=True)
        pd.DataFrame.from_dict(first_last_date['CPI_M'], orient='columns').to_excel(writer, sheet_name="first_last", index=True)
        pd.DataFrame.from_dict(all_missing_dates['CPI_M'], orient='index').to_excel(writer, sheet_name="all_missing", index=True)
    