In [80]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error
from tqdm import tqdm
from sklearn.model_selection import TimeSeriesSplit
from pandas.tseries.offsets import DateOffset
import json
from datetime import datetime
import copy
import warnings
import wandb
import os
import sys
warnings.filterwarnings('ignore')
os.environ["WANDB_SILENT"] = "true"

In [81]:
%load_ext autoreload
%autoreload 2
from models.models import HoltWintersWrapper, ProphetWrapper, VarimaWrapper, AutoArimaWrapper

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [117]:
cpi = pd.read_csv('cpi.csv')
fuel_df = pd.read_csv('fuel.csv')
sabor_df = pd.read_csv('sabor.csv')
currency = pd.read_csv('currency_data.csv')
credit_df = pd.read_excel('credit.xlsx', header=1)
jse = pd.read_csv('jse_indices.csv')
cpi_weights = pd.read_excel('cpi_weights.xlsx', header=1)   

In [118]:
fuel_df.loc[fuel_df[fuel_df['date'] == '2021-08'].tail(1).index,'date'] = '2021-09'
fuel_df = fuel_df.drop_duplicates(subset=['date'], keep='first')

In [119]:
cpi_columns = list(cpi.columns[2:-1])

In [120]:
cpi_columns

['Food and non-alcoholic beverages',
 'Alcoholic beverages and tobacco',
 'Clothing and footwear',
 'Housing and utilities',
 'Household contents and services',
 'Health',
 'Transport',
 'Communication',
 'Recreation and culture',
 'Education',
 'Restaurants and hotels',
 'Miscellaneous goods and services',
 'headline CPI']

In [121]:
cpi = pd.merge(cpi,fuel_df, on="date")
cpi = pd.merge(cpi,sabor_df, on="date")

In [122]:
currency['Date'] = pd.to_datetime(currency['Date']) + pd.DateOffset(months=2)
currency['Date'] = currency['Date'].map(lambda x: x.strftime('%Y-%m'))

currency = currency.groupby('Date').mean().reset_index()

In [123]:
sabor_df['date'] = pd.to_datetime(sabor_df['date']) + pd.DateOffset(months=1)
sabor_df['date'] = sabor_df['date'].map(lambda x: x.strftime('%Y-%m'))

sabor_df = sabor_df.groupby('date').mean().reset_index()

In [124]:
credit_df = credit_df.replace(' ', np.nan)
credit_df = credit_df.dropna()

credit_df['Credit card advances'] = credit_df['Credit card advances'].str.replace(',','').astype(float)
credit_df['Total loans and advances : Households'] = credit_df['Total loans and advances : Households'].str.replace(',','').astype(float) 

credit_df = credit_df[['Date', 'Credit card advances', 'Total loans and advances : Households']]

credit_df = credit_df.groupby('Date').mean().reset_index()

credit_df['date'] = credit_df['Date']
credit_df['date'] = pd.to_datetime(credit_df['date']) + pd.DateOffset(months=2)
credit_df['date'] = credit_df['date'].dt.strftime('%Y-%m')
#credit_df['Credit card advances mean'] = credit_df['Credit card advances'].mean()
#credit_df['Total loans and advances : Households mean'] = credit_df['Total loans and advances : Households'].mean()
credit_df = credit_df[['date', 'Credit card advances', 'Total loans and advances : Households']]

In [125]:
fuel_offeset_df = fuel_df.copy()
fuel_offeset_2_df = fuel_df.copy()
fuel_offeset_3_df = fuel_df.copy()

In [126]:
fuel_offeset_df['date'] = pd.to_datetime(fuel_offeset_df['date']) + pd.DateOffset(months=1)
fuel_offeset_df['date'] = fuel_offeset_df['date'].map(lambda x: x.strftime('%Y-%m'))
fuel_offeset_df.dropna(inplace=True)

In [127]:
fuel_offeset_2_df['date'] = pd.to_datetime(fuel_offeset_2_df['date']) + pd.DateOffset(months=2)
fuel_offeset_2_df['date'] = fuel_offeset_2_df['date'].map(lambda x: x.strftime('%Y-%m'))
fuel_offeset_2_df.dropna(inplace=True)

In [128]:
fuel_offeset_3_df['date'] = pd.to_datetime(fuel_offeset_3_df['date']) + pd.DateOffset(months=3)
fuel_offeset_3_df['date'] = fuel_offeset_3_df['date'].map(lambda x: x.strftime('%Y-%m'))
fuel_offeset_3_df.dropna(inplace=True)

In [129]:
merged_df = pd.merge(fuel_df, sabor_df, on='date', how='left')

fuel_offeset_df.rename(columns={'fuel_price': 'fuel_offset_1'}, inplace=True)

merged_df = pd.merge(merged_df, fuel_offeset_df, on='date', how='left')

fuel_offeset_3_df.rename(columns={'fuel_price': 'fuel_offset_3'}, inplace=True)
currency.rename(columns={'Date': 'date'}, inplace=True)

merged_df = pd.merge(merged_df, fuel_offeset_3_df, on='date', how='left')
merged_df = pd.merge(merged_df, currency, on='date', how='left')

merged_df = pd.merge(merged_df, credit_df, on='date', how='left')

In [130]:
merged_df = merged_df.dropna()

In [131]:
merged_df

Unnamed: 0,date,fuel_price,sabor,fuel_offset_1,fuel_offset_3,USD/ZAR,GBP/ZAR,EUR/ZAR,Credit card advances,Total loans and advances : Households
1,2023-07,21.34,8.218091,21.58,21.92,19.089853,23.817278,20.761558,147743.0,2079298.0
2,2023-06,21.58,7.804391,22.29,22.00,18.156630,22.598746,19.953257,146599.0,2072226.0
3,2023-05,22.29,7.713450,21.92,20.73,18.264031,22.163418,19.565940,144636.0,2066873.0
4,2023-04,21.92,7.223304,22.00,20.45,17.916716,21.611035,19.168071,145007.0,2063280.0
5,2023-03,22.00,7.211850,20.73,22.51,17.076137,20.900737,18.413888,144108.0,2049891.0
...,...,...,...,...,...,...,...,...,...,...
61,2018-07,15.43,6.477952,15.20,13.89,12.513433,16.881612,14.814248,108539.0,1573984.0
62,2018-06,15.20,6.495217,14.38,13.27,12.067608,16.989794,14.818127,107860.0,1566812.0
63,2018-05,14.38,6.504286,13.89,13.63,11.843666,16.529828,14.597329,106994.0,1564570.0
64,2018-04,13.89,6.732000,13.27,13.93,11.818639,16.556277,14.617775,107286.0,1560607.0


In [132]:
df = merged_df.drop(['date'], axis=1)
normalized_df=(df-df.mean())/df.std()

In [133]:
normalized_df['date'] = merged_df['date']

In [134]:
#merged_df = normalized_df

In [135]:
models = [  AutoArimaWrapper(),
            VarimaWrapper(extra_data=merged_df, name_postfix="all_data"),
            HoltWintersWrapper(seasonal_periods=3),
            HoltWintersWrapper(seasonal_periods=6), 
            HoltWintersWrapper(seasonal_periods=12),
            HoltWintersWrapper(seasonal_periods=18),
            ProphetWrapper(extra_data=merged_df, name_postfix="all_data", changepoint_prior_scale=10, changepoint_range=0.8),
           ProphetWrapper(extra_data=merged_df, name_postfix="all_data", changepoint_prior_scale=5, changepoint_range=0.8, n_changepoints=20), 
            ProphetWrapper(extra_data=merged_df, name_postfix="all_data", changepoint_prior_scale=1, changepoint_range=0.8),]


In [136]:
test_set = cpi[cpi['date'] >= '2023-04']
train_val_set = cpi[cpi['date'] < '2023-04']
tscv = TimeSeriesSplit(n_splits=15,test_size=1)

In [138]:
results = {}
for model in models:
    model_results = []


    for category in tqdm(cpi_columns):

        if category == 'headline CPI':
            model_results.append(0)
        else:
        
            intermediate_results = {'test': [], 'pred': []}
            for i, (train_index, test_index) in enumerate(tscv.split(train_val_set[['date',category]])):
                model.fit(model.getExtraData(), cpi[['date',category]].iloc[train_index])
                intermediate_results['test'].append(*cpi[category].iloc[test_index].values)
                intermediate_results['pred'].append(*model.predict(len(test_index)))
            result = mean_squared_error(intermediate_results['test'], intermediate_results['pred'], squared=True)
            model_results.append(result)
        
    results[model.getModelName()] = model_results

100%|██████████| 13/13 [16:53<00:00, 77.99s/it]
100%|██████████| 13/13 [00:01<00:00,  6.84it/s]
100%|██████████| 13/13 [00:19<00:00,  1.48s/it]
100%|██████████| 13/13 [00:20<00:00,  1.60s/it]
100%|██████████| 13/13 [00:20<00:00,  1.60s/it]
100%|██████████| 13/13 [00:22<00:00,  1.74s/it]
100%|██████████| 13/13 [28:08<00:00, 129.87s/it]
100%|██████████| 13/13 [28:12<00:00, 130.20s/it]
100%|██████████| 13/13 [28:00<00:00, 129.29s/it]


In [139]:
results_df = pd.DataFrame(results, index=cpi_columns)

In [140]:
best_model_table = dict(results_df.T.apply(lambda x: x.idxmin()))
best_model_table

{'Food and non-alcoholic beverages': 'HoltWinters_mul_mul_18',
 'Alcoholic beverages and tobacco': 'HoltWinters_mul_mul_12',
 'Clothing and footwear': 'HoltWinters_mul_mul_18',
 'Housing and utilities': 'HoltWinters_mul_mul_12',
 'Household contents and services': 'AutoArima_',
 'Health': 'HoltWinters_mul_mul_12',
 'Transport': 'AutoArima_',
 'Communication': 'AutoArima_',
 'Recreation and culture': 'HoltWinters_mul_mul_3',
 'Education': 'HoltWinters_mul_mul_12',
 'Restaurants and hotels': 'HoltWinters_mul_mul_18',
 'Miscellaneous goods and services': 'HoltWinters_mul_mul_18',
 'headline CPI': 'AutoArima_'}

In [141]:
model_table  = {}
for model in models:
    model_table[model.getModelName()] = copy.deepcopy(model)

In [150]:
cpi_weights.set_index('Headline_CPI', inplace=True)
cpi_weights = cpi_weights.set_axis([entry.strip() for entry in cpi_weights.index], axis='index')

In [160]:
results_table = {}
results = ""
for date in test_set['date']:
    input_data = cpi[cpi['date'] < date]
    for entry in best_model_table:

        if entry == "headline CPI":

            head_line = 0
            for component in cpi_weights.index:
                head_line = head_line + float((cpi_weights.loc[component] * results_table[component])/100)
            results_table[entry] = head_line
        else: 
            model_table[best_model_table[entry]].fit(model.getExtraData(), input_data[['date',str(entry)]])
            #print(f"{entry} {model_table[best_model_table[entry]].predict(1)}")
            results_table[entry] = model_table[best_model_table[entry]].predict(1)[0]
    rmse = mean_squared_error(cpi[cpi['date'] == date][list(results_table.keys())], pd.DataFrame(results_table, index=[0]), squared=True)
    results = results + f"{date}: {rmse} \n"
    print(f"{date}: {rmse}")

2023-04: 0.37762810877263087
2023-05: 0.21995753080149416
2023-06: 0.284964046483965
2023-07: 0.38224154257754833


In [161]:
results

'2023-04: 0.37762810877263087 \n2023-05: 0.21995753080149416 \n2023-06: 0.284964046483965 \n2023-07: 0.38224154257754833 \n'

In [162]:
results_table

{'Food and non-alcoholic beverages': 119.57075463643407,
 'Alcoholic beverages and tobacco': 111.40076613407308,
 'Clothing and footwear': 104.38187283273494,
 'Housing and utilities': 107.32215255061381,
 'Household contents and services': 107.52237461140032,
 'Health': 110.79509822450954,
 'Transport': 111.93295180591072,
 'Communication': 99.54675291622932,
 'Recreation and culture': 105.63272104478366,
 'Education': 110.35993202032165,
 'Restaurants and hotels': 111.24655147390193,
 'Miscellaneous goods and services': 110.23839300535309,
 'headline CPI': 110.65298665264577}

In [163]:
results_table = {}
for date in ['2023-08']:
    input_data = cpi[cpi['date'] < date]
    for entry in best_model_table:
        
        if entry == "headline CPI":

            head_line = 0
            for component in cpi_weights.index:
                head_line = head_line + float((cpi_weights.loc[component] * results_table[component])/100)
            results_table[entry] = head_line

        model_table[best_model_table[entry]].fit(model.getExtraData(), input_data[['date',str(entry)]])
        #print(f"{entry} {model_table[best_model_table[entry]].predict(1)}")
        results_table[entry] = model_table[best_model_table[entry]].predict(1)[0]
    print(f"{date} {rmse}")

2023-08 0.38224154257754833


In [164]:
results_table

{'Food and non-alcoholic beverages': 118.99792218108912,
 'Alcoholic beverages and tobacco': 112.03677886969798,
 'Clothing and footwear': 104.66179996857969,
 'Housing and utilities': 108.51651669094672,
 'Household contents and services': 108.50077496901581,
 'Health': 110.7138412271973,
 'Transport': 113.4345600230269,
 'Communication': 99.44615384615385,
 'Recreation and culture': 105.85196499754676,
 'Education': 110.36316804610668,
 'Restaurants and hotels': 111.44570955293615,
 'Miscellaneous goods and services': 111.07354649824681,
 'headline CPI': 111.32754240500508}

In [165]:
cpi[cpi['date'] == '2023-06']

Unnamed: 0.1,Unnamed: 0,index,Food and non-alcoholic beverages,Alcoholic beverages and tobacco,Clothing and footwear,Housing and utilities,Household contents and services,Health,Transport,Communication,Recreation and culture,Education,Restaurants and hotels,Miscellaneous goods and services,headline CPI,date,fuel_price,sabor
77,77,cpi_M202306,118.3,110.9,104.3,105.4,107.7,110.8,112.3,99.6,105.3,110.4,110.0,109.6,109.8,2023-06,21.58,8.218091


In [166]:
cpi[cpi['date'] == '2023-06'][list(results_table.keys())]
pd.DataFrame(results_table, index=[0])

Unnamed: 0,Food and non-alcoholic beverages,Alcoholic beverages and tobacco,Clothing and footwear,Housing and utilities,Household contents and services,Health,Transport,Communication,Recreation and culture,Education,Restaurants and hotels,Miscellaneous goods and services,headline CPI
0,118.997922,112.036779,104.6618,108.516517,108.500775,110.713841,113.43456,99.446154,105.851965,110.363168,111.44571,111.073546,111.327542


In [167]:
mean_squared_error(cpi[cpi['date'] == '2023-06'][list(results_table.keys())], pd.DataFrame(results_table, index=[0]), squared=True)

1.5756399373906393

In [168]:
sabor_df

Unnamed: 0,date,sabor
0,2007-09,9.358478
1,2007-10,9.596100
2,2007-11,9.914913
3,2007-12,10.120636
4,2008-01,10.508286
...,...,...
187,2023-04,7.223304
188,2023-05,7.713450
189,2023-06,7.804391
190,2023-07,8.218091


In [169]:
mean_squared_error(intermediate_results['test'], intermediate_results['pred'], squared=True)

12.31123780599075

In [170]:
for date in test_set['date']:
    print(date)

2023-04
2023-05
2023-06
2023-07


In [171]:
merged_df

Unnamed: 0,date,fuel_price,sabor,fuel_offset_1,fuel_offset_3,USD/ZAR,GBP/ZAR,EUR/ZAR,Credit card advances,Total loans and advances : Households
1,2023-07,21.34,8.218091,21.58,21.92,19.089853,23.817278,20.761558,147743.0,2079298.0
2,2023-06,21.58,7.804391,22.29,22.00,18.156630,22.598746,19.953257,146599.0,2072226.0
3,2023-05,22.29,7.713450,21.92,20.73,18.264031,22.163418,19.565940,144636.0,2066873.0
4,2023-04,21.92,7.223304,22.00,20.45,17.916716,21.611035,19.168071,145007.0,2063280.0
5,2023-03,22.00,7.211850,20.73,22.51,17.076137,20.900737,18.413888,144108.0,2049891.0
...,...,...,...,...,...,...,...,...,...,...
61,2018-07,15.43,6.477952,15.20,13.89,12.513433,16.881612,14.814248,108539.0,1573984.0
62,2018-06,15.20,6.495217,14.38,13.27,12.067608,16.989794,14.818127,107860.0,1566812.0
63,2018-05,14.38,6.504286,13.89,13.63,11.843666,16.529828,14.597329,106994.0,1564570.0
64,2018-04,13.89,6.732000,13.27,13.93,11.818639,16.556277,14.617775,107286.0,1560607.0


In [172]:
currency

Unnamed: 0,date,USD/ZAR,GBP/ZAR,EUR/ZAR
0,2018-03,12.197131,16.812471,14.852223
1,2018-04,11.818639,16.556277,14.617775
2,2018-05,11.843666,16.529828,14.597329
3,2018-06,12.067608,16.989794,14.818127
4,2018-07,12.513433,16.881612,14.814248
...,...,...,...,...
61,2023-04,17.916716,21.611035,19.168071
62,2023-05,18.264031,22.163418,19.565940
63,2023-06,18.156630,22.598746,19.953257
64,2023-07,19.089853,23.817278,20.761558


In [173]:
sabor_df

Unnamed: 0,date,sabor
0,2007-09,9.358478
1,2007-10,9.596100
2,2007-11,9.914913
3,2007-12,10.120636
4,2008-01,10.508286
...,...,...
187,2023-04,7.223304
188,2023-05,7.713450
189,2023-06,7.804391
190,2023-07,8.218091


In [174]:
fuel_df

Unnamed: 0,date,fuel_price
0,2023-08,21.71
1,2023-07,21.34
2,2023-06,21.58
3,2023-05,22.29
4,2023-04,21.92
...,...,...
181,2008-05,9.29
182,2008-04,8.74
183,2008-03,8.07
184,2008-02,7.46


In [175]:
prediction_df = pd.DataFrame.from_dict(results_table,orient='index',columns=['Value'])

In [176]:
pred_month = 'August'

In [177]:
# Create a dictionary to update the category names to allow a submission to Zindi.
# month="May"
pred_category_dict = [pred_month+"_food and non-alcoholic beverages",
                      pred_month+"_alcoholic beverages and tobacco",
                      pred_month+"_clothing and footwear",
                      pred_month+"_housing and utilities",
                      pred_month+"_household contents and services",
                      pred_month+"_health",
                      pred_month+"_transport",
                      pred_month+"_communication",
                      pred_month+"_recreation and culture",
                      pred_month+"_education",
                      pred_month+"_restaurants and hotels",
                      pred_month+"_miscellaneous goods and services",
                      pred_month+"_headline CPI"
]

In [178]:
prediction_df['ID'] = pred_category_dict

In [179]:
# output to csv file
prediction_df[['ID', 'Value']].to_csv( f'submissions/multi_model_{pred_month}.csv', index = 0)

In [180]:
prediction_df

Unnamed: 0,Value,ID
Food and non-alcoholic beverages,118.997922,August_food and non-alcoholic beverages
Alcoholic beverages and tobacco,112.036779,August_alcoholic beverages and tobacco
Clothing and footwear,104.6618,August_clothing and footwear
Housing and utilities,108.516517,August_housing and utilities
Household contents and services,108.500775,August_household contents and services
Health,110.713841,August_health
Transport,113.43456,August_transport
Communication,99.446154,August_communication
Recreation and culture,105.851965,August_recreation and culture
Education,110.363168,August_education


In [181]:
results_table

{'Food and non-alcoholic beverages': 118.99792218108912,
 'Alcoholic beverages and tobacco': 112.03677886969798,
 'Clothing and footwear': 104.66179996857969,
 'Housing and utilities': 108.51651669094672,
 'Household contents and services': 108.50077496901581,
 'Health': 110.7138412271973,
 'Transport': 113.4345600230269,
 'Communication': 99.44615384615385,
 'Recreation and culture': 105.85196499754676,
 'Education': 110.36316804610668,
 'Restaurants and hotels': 111.44570955293615,
 'Miscellaneous goods and services': 111.07354649824681,
 'headline CPI': 111.32754240500508}

In [182]:
merged_df

Unnamed: 0,date,fuel_price,sabor,fuel_offset_1,fuel_offset_3,USD/ZAR,GBP/ZAR,EUR/ZAR,Credit card advances,Total loans and advances : Households
1,2023-07,21.34,8.218091,21.58,21.92,19.089853,23.817278,20.761558,147743.0,2079298.0
2,2023-06,21.58,7.804391,22.29,22.00,18.156630,22.598746,19.953257,146599.0,2072226.0
3,2023-05,22.29,7.713450,21.92,20.73,18.264031,22.163418,19.565940,144636.0,2066873.0
4,2023-04,21.92,7.223304,22.00,20.45,17.916716,21.611035,19.168071,145007.0,2063280.0
5,2023-03,22.00,7.211850,20.73,22.51,17.076137,20.900737,18.413888,144108.0,2049891.0
...,...,...,...,...,...,...,...,...,...,...
61,2018-07,15.43,6.477952,15.20,13.89,12.513433,16.881612,14.814248,108539.0,1573984.0
62,2018-06,15.20,6.495217,14.38,13.27,12.067608,16.989794,14.818127,107860.0,1566812.0
63,2018-05,14.38,6.504286,13.89,13.63,11.843666,16.529828,14.597329,106994.0,1564570.0
64,2018-04,13.89,6.732000,13.27,13.93,11.818639,16.556277,14.617775,107286.0,1560607.0


In [47]:
credit

Unnamed: 0,Date,Instalment sale credit,Leasing finance,Mortgage advances,Overdrafts,General loans and advances,Credit card advances,Of which: Total to households,Total loans and advances : Households,Claims on the domestic private sector,...,Leasing finance.1,Mortgage advances.1,Overdrafts.1,General loans and advances.1,Credit card advances.1,Of which: Total to corporations,Claims on the domestic private sector.1,Total loans and advances : Corporations,Share of corporations as a % of total credit,Share of corporations as a % of total loans & advances
0,"May, 2023",367056,1393,1221559,45709,295838,147743,2079298,2079298,4497380,...,9173,567433,214288,1076565,7833,2418082,4497380,2045612,53.77,49.59
1,"Apr, 2023",364977,1348,1218633,46140,294529,146599,2072226,2072226,4502435,...,8374,565443,245363,1107596,7621,2430209,4502435,2102765,53.98,50.37
2,"Mar, 2023",364306,1316,1215516,46040,295060,144636,2066873,2066873,4510065,...,9278,565277,232727,1121593,7834,2443192,4510065,2102130,54.17,50.42
3,"Feb, 2023",360106,1322,1214179,48072,294594,145007,2063280,2063280,4498880,...,8288,564899,225445,1123377,7412,2435599,4498880,2093562,54.14,50.36
4,"Jan, 2023",358774,1224,1205442,46228,294114,144108,2049891,2049891,4464464,...,8869,560414,216473,1120285,6797,2414573,4464464,2073890,54.08,50.29
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
694,"Jul, 1965",,,,,,,,,4333,...,,,,,,,4333,,,
695,"Jun, 1965",,,,,,,,,4318,...,,,,,,,4318,,,
696,"May, 1965",,,,,,,,,4296,...,,,,,,,4296,,,
697,"Apr, 1965",,,,,,,,,4257,...,,,,,,,4257,,,


In [40]:
cpi_weights[0]

Headline_CPI
Alcoholic beverages and tobacco      6.26
Clothing and footwear                3.65
Communication                        2.42
Education                            2.62
Food and non-alcoholic beverages    17.14
Health                               1.44
Household contents and services      4.37
Housing and utilities               24.49
Miscellaneous goods and services    14.81
Recreation and culture               5.20
Restaurants and hotels               3.25
Transport                           14.35
Name: 0, dtype: float64

In [10]:
for component in cpi_weights['Headline_CPI']:
    print(component)

Alcoholic beverages and tobacco
Clothing and footwear
Communication
Education
Food and non-alcoholic beverages
Health
Household contents and services
Housing and utilities
Miscellaneous goods and services
Recreation and culture
Restaurants and hotels 
Transport


In [15]:
cpi.iloc[3]

Unnamed: 0                                    3
index                               cpi_M201704
Food and non-alcoholic beverages           82.3
Alcoholic beverages and tobacco            80.1
Clothing and footwear                      93.3
Housing and utilities                      82.8
Household contents and services            90.5
Health                                     83.4
Transport                                  77.0
Communication                             102.9
Recreation and culture                     96.4
Education                                  79.4
Restaurants and hotels                     89.2
Miscellaneous goods and services           82.0
headline CPI                               83.6
date                                    2017-04
Name: 3, dtype: object

In [18]:
cpi_weights.set_index('Headline_CPI', inplace=True)

In [25]:
cpi_weights.sum()

0    100.0
dtype: float64

In [145]:
((cpi_weights[0] * cpi.iloc[70]).dropna())

Series([], dtype: object)

In [38]:
cpi.iloc[70]

Unnamed: 0                                   70
index                               cpi_M202211
Food and non-alcoholic beverages          112.0
Alcoholic beverages and tobacco           106.4
Clothing and footwear                     102.6
Housing and utilities                     103.8
Household contents and services           105.2
Health                                    104.7
Transport                                 113.1
Communication                              99.2
Recreation and culture                    102.8
Education                                 104.4
Restaurants and hotels                    108.0
Miscellaneous goods and services          104.7
headline CPI                              106.8
date                                    2022-11
Name: 70, dtype: object

In [44]:
for component in cpi_weights[0]:
    print(component)

6.26
3.65
2.42
2.62
17.14
1.44
4.37
24.49
14.81
5.2
3.25
14.35


In [147]:
cpi_weights

Unnamed: 0,Headline_CPI,0
0,Alcoholic beverages and tobacco,6.26
1,Clothing and footwear,3.65
2,Communication,2.42
3,Education,2.62
4,Food and non-alcoholic beverages,17.14
5,Health,1.44
6,Household contents and services,4.37
7,Housing and utilities,24.49
8,Miscellaneous goods and services,14.81
9,Recreation and culture,5.2


In [72]:
cpi_weights.index

Index(['Alcoholic beverages and tobacco', 'Clothing and footwear',
       'Communication', 'Education', 'Food and non-alcoholic beverages',
       'Health', 'Household contents and services', 'Housing and utilities',
       'Miscellaneous goods and services', 'Recreation and culture',
       'Restaurants and hotels ', 'Transport'],
      dtype='object', name='Headline_CPI')

In [74]:
cpi_weights = cpi_weights.set_axis([entry.strip() for entry in cpi_weights.index], axis='index')

In [75]:
head_line = 0
for entry in cpi_weights.index:
    head_line = head_line + (cpi_weights.loc[entry] * cpi.iloc[70][entry])

In [77]:
head_line/100

0    106.85532
dtype: float64

In [79]:
cpi.iloc[70]

Unnamed: 0                                   70
index                               cpi_M202211
Food and non-alcoholic beverages          112.0
Alcoholic beverages and tobacco           106.4
Clothing and footwear                     102.6
Housing and utilities                     103.8
Household contents and services           105.2
Health                                    104.7
Transport                                 113.1
Communication                              99.2
Recreation and culture                    102.8
Education                                 104.4
Restaurants and hotels                    108.0
Miscellaneous goods and services          104.7
headline CPI                              106.8
date                                    2022-11
Name: 70, dtype: object

In [53]:
cpi_weights.index

Index(['Alcoholic beverages and tobacco', 'Clothing and footwear',
       'Communication', 'Education', 'Food and non-alcoholic beverages',
       'Health', 'Household contents and services', 'Housing and utilities',
       'Miscellaneous goods and services', 'Recreation and culture',
       'Restaurants and hotels ', 'Transport'],
      dtype='object', name='Headline_CPI')

In [71]:
cpi_weights.loc['Food and non-alcoholic beverages']

0    17.14
Name: Food and non-alcoholic beverages, dtype: float64

In [154]:
results_table['Transport']

0    106.85532
dtype: float64

In [153]:
best_model_table

{'Food and non-alcoholic beverages': 'HoltWinters_mul_mul_18',
 'Alcoholic beverages and tobacco': 'HoltWinters_mul_mul_12',
 'Clothing and footwear': 'HoltWinters_mul_mul_18',
 'Housing and utilities': 'HoltWinters_mul_mul_12',
 'Household contents and services': 'AutoArima_',
 'Health': 'HoltWinters_mul_mul_12',
 'Transport': 'AutoArima_',
 'Communication': 'AutoArima_',
 'Recreation and culture': 'HoltWinters_mul_mul_3',
 'Education': 'HoltWinters_mul_mul_12',
 'Restaurants and hotels': 'HoltWinters_mul_mul_18',
 'Miscellaneous goods and services': 'HoltWinters_mul_mul_18',
 'headline CPI': 'AutoArima_'}

In [183]:
best_model_table['headline CPI'] = 'Weighted'

In [184]:
best_model_table

{'Food and non-alcoholic beverages': 'HoltWinters_mul_mul_18',
 'Alcoholic beverages and tobacco': 'HoltWinters_mul_mul_12',
 'Clothing and footwear': 'HoltWinters_mul_mul_18',
 'Housing and utilities': 'HoltWinters_mul_mul_12',
 'Household contents and services': 'AutoArima_',
 'Health': 'HoltWinters_mul_mul_12',
 'Transport': 'AutoArima_',
 'Communication': 'AutoArima_',
 'Recreation and culture': 'HoltWinters_mul_mul_3',
 'Education': 'HoltWinters_mul_mul_12',
 'Restaurants and hotels': 'HoltWinters_mul_mul_18',
 'Miscellaneous goods and services': 'HoltWinters_mul_mul_18',
 'headline CPI': 'Weighted'}