#### Importing Required libraries for modeling and representation

In [1]:
# Import necessary libraries
from numpy import array
import pandas as pd
import numpy as np
from datetime import datetime
from numpy import asarray
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import ShuffleSplit, KFold
import random
from sklearn.utils import shuffle
from matplotlib import pyplot as plt
import matplotlib.dates as mdates
import tensorflow as tf
import pickle
import csv
from dateutil.relativedelta import relativedelta

#### Reading historical data from multiple files and cleaning and formatting the data

In [2]:
# Read CPI data from CSV file
cpih_monthly_1950_1988 = pd.read_csv('../input_data/cpi_data_Jan1950_Dec1988.csv')
cpih_monthly_1989_2023 = pd.read_csv('../input_data/cpi_data_Jan1989_Oct2023.csv')
# Concat both the dataframes
cpih_data = pd.concat([cpih_monthly_1950_1988, cpih_monthly_1989_2023], axis=0)
cpih_data.tail()

# Extract Year and Month from 'Dates' column
cpih_data['Month'] = cpih_data['Dates'].str.split('-').str[0].astype('str')
cpih_data['Year'] = cpih_data['Dates'].str.split('-').str[1].astype('str')
# Create a new column 'FormattedDate' in the desired format
cpih_data['FormattedDate'] = pd.to_datetime(cpih_data['Month'] + ' ' + cpih_data['Year'], format='%b %y').dt.strftime('%m/%d/%Y %H:%M')
cpih_data['Dates'] = cpih_data['FormattedDate']
# Dropping temperary columns
cpih_data = cpih_data.drop(columns=['Year', 'Month', 'FormattedDate'], axis=1)
# Setting idex as Dates column
cpih_data.set_index('Dates', inplace=True)
cpih_data.tail()

Unnamed: 0_level_0,CPIH,CPI,Average CPIH
Dates,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
06/01/2023 00:00,7.3,7.9,2.6
07/01/2023 00:00,6.4,6.8,2.6
08/01/2023 00:00,6.3,6.7,2.6
09/01/2023 00:00,6.3,6.7,2.6
10/01/2023 00:00,4.7,4.6,2.6


#### Bulding a model for CPIH Prediction using LSTM time varient approach and training on historical data

In [None]:
# Scale data between 0 and 1
cpih_scaler = MinMaxScaler()
scaled_cpih = asarray(cpih_data['CPIH']).reshape(-1, 1)
scaled_cpih = cpih_scaler.fit_transform(scaled_cpih)
# Omit the last 50 observations for out-of-sample forecast
cpih_sample_forecast_input = scaled_cpih[len(scaled_cpih) - 50:, 0]
scaled_cpih = scaled_cpih[:len(scaled_cpih) - 50, 0]

# let's transform our remaning data into a univariate supervised learning problem
# Functions transforms our time series sequence into a supervised leaning problem
# Transform data into a univariate supervised learning problem
def split_sequences(sequence, n_steps_in, n_steps_out):
    X, y = list(), list()
    for i in range(len(sequence) - (n_steps_in + n_steps_out)):
        append_X = []
        append_y = []
        for j in range(n_steps_in):
            append_X.append(sequence[i + j])
        for k in range(n_steps_out):
            append_y.append(sequence[i + n_steps_in + k + 1])

        X.append(append_X)
        y.append(append_y)

    return np.array(X), np.array(y)

# Set the number of lags and forecast periods
cpih_n_steps_in = 50
cpih_n_steps_out = 12

# Create sequences for input (X) and output (y)
X, y = split_sequences(scaled_cpih, cpih_n_steps_in, cpih_n_steps_out)

# Split data into training and testing 80:20
cpih_total_rows = X.shape[0]
cpih_train_rows = int(cpih_total_rows * 0.8)
cpih_test_rows = cpih_total_rows - cpih_train_rows

# Obtain training and testing data
X_train_cpih = X[:cpih_train_rows]
X_test_cpih = X[cpih_train_rows:]
y_train_cpih = y[:cpih_train_rows]
y_test_cpih = y[cpih_train_rows:]

X_train_cpih = X_train_cpih.reshape(X_train_cpih.shape[0], X_train_cpih.shape[1], 1)
y_train_cpih = y_train_cpih.reshape(y_train_cpih.shape[0], y_train_cpih.shape[1], 1)

# Build and train the LSTM model
model = Sequential()
model.add(LSTM(75, activation='relu', return_sequences=True, input_shape=(cpih_n_steps_in, 1)))
model.add(LSTM(75, activation='relu', return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(cpih_n_steps_out))
model.compile(loss='mse', optimizer='adam')
cpih_training_model = model.fit(X_train_cpih, y_train_cpih, epochs=50, verbose=1)

# Perform out-of-sample forecast
cpih_sample_forecast_input = asarray(cpih_sample_forecast_input).reshape(1, cpih_n_steps_in)
cpih_sample_forecast = model.predict(cpih_sample_forecast_input, verbose=0)

# Inversely scale the forecasted data and save it to a CSV file
cpih_list_forecast = cpih_scaler.inverse_transform(cpih_sample_forecast).tolist()
print('Predicted CPIH Data for next 12 months: ', cpih_list_forecast[0])

# Inversely scale the forecasted data and save it to a CSV file
cpih_list_actual_50 = cpih_scaler.inverse_transform(cpih_sample_forecast_input).tolist()
print('Actual last 12 Months CPIH data: ', cpih_list_actual_50[0][38:])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50

#### Preparing CPIH data lists for represntation

In [None]:
# Creating list of data
cpih_latestdata = cpih_list_actual_50[0]
cpih_prediction = cpih_list_forecast[0]
cpih_prediction.insert(0, cpih_latestdata[-1])

#### Representing the last 50 monts and next 12 months predicted data in yearly format.

In [None]:
# Assuming 'latestdata' and 'prediction' are lists of corresponding values
month = 12
s = '08/2019'

# Generate date ranges for plotting
x1 = [datetime.strptime(s, '%m/%Y') + relativedelta(months=i) for i in range(50)]
x2 = [datetime.strptime(s, '%m/%Y') + relativedelta(months=49 + i) for i in range(month + 1)]

# Ensure that the lengths of x1 and x2 match the lengths of latestdata and prediction
x1 = x1[:len(cpih_latestdata)]
x2 = x2[:len(cpih_prediction)]

# Increase the width of the graph by setting the figsize parameter
plt.figure(figsize=(12, 4))  # Adjust the width (15 inches) and height (6 inches) as needed

# Plotting
plt.plot(x1, cpih_latestdata, 'r', label='Historical Data', marker='o', markersize=2)
plt.plot(x2, cpih_prediction, 'b--', label='Predictions', marker='o', markersize=2)
plt.xticks(rotation=30)
plt.yticks(range(12))  # Adjust Y axis values

# Add a blurred grid
# plt.grid(True, which='both', linestyle='--', linewidth=0.5, zorder=0, rasterized=True)
# Add horizontal grid lines
plt.grid(True, which='major', axis='y', linestyle='--', linewidth=0.5, zorder=0, rasterized=True)

plt.title('CPIH Inflation Prediction over {} months'.format(month))
plt.xlabel('Yearly Representation')
plt.ylabel('Inflation Rate in %')
plt.legend()
plt.show()

#### Representing the last 50 monts and next 12 months predicted data in querterly format.

In [None]:
# Increase the width of the graph by setting the figsize parameter
plt.figure(figsize=(12, 4))  # Adjust the width (12 inches) and height (6 inches) as needed

# Plotting
plt.plot(x1, cpih_latestdata, 'r', label='Historical Data', marker='o', markersize=2)
plt.plot(x2, cpih_prediction, 'b--', label='Predictions', marker='o', markersize=2)

# Set x-axis ticks to display quarterly
plt.gca().xaxis.set_major_locator(mdates.MonthLocator(bymonthday=-1, interval=3))
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))

plt.xticks(rotation=30)
plt.yticks(range(12))  # Adjust Y-axis values

# Add a blurred grid
# plt.grid(True, which='both', linestyle='--', linewidth=0.5, zorder=0, rasterized=True)
# Add horizontal grid lines
plt.grid(True, which='major', axis='y', linestyle='--', linewidth=0.5, zorder=0, rasterized=True)

plt.title('CPIH Inflation Prediction over {} months'.format(month))
plt.xlabel('Querterly Representation')
plt.ylabel('Inflation Rate in %')
plt.legend()
plt.show()

#### Representing the last 50 monts and next 12 months predicted data in monthly format.

In [None]:
# Increase the width of the graph by setting the figsize parameter
plt.figure(figsize=(12, 4))  # Adjust the width (15 inches) and height (6 inches) as needed
# Plotting
plt.plot(x1, cpih_latestdata, 'r', label='Historical Data', marker='o', markersize=3)
plt.plot(x2, cpih_prediction, 'b--', label='Predictions', marker='o', markersize=3)
plt.xticks(rotation=70, ha="right")  # Adjust rotation for better readability
plt.yticks(range(12))  # Adjust Y axis values
# Set x-axis ticks to display monthly
plt.gca().xaxis.set_major_locator(plt.matplotlib.dates.MonthLocator())  # Set ticks at every month
plt.gca().xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter('%b %Y'))  # Format tick labels
# Add a blurred grid
# plt.grid(True, which='both', linestyle='--', linewidth=0.5, zorder=0, rasterized=True)
# Add horizontal grid lines
plt.grid(True, which='major', axis='y', linestyle='--', linewidth=0.5, zorder=0, rasterized=True)

plt.title('CPIH Inflation Prediction over {} months'.format(month))
plt.xlabel('Monthly Representation')
plt.ylabel('Inflation Rate in %')
plt.legend()
plt.show()

In [None]:
# Increase the width of the graph by setting the figsize parameter
plt.figure(figsize=(12, 4))  # Adjust the width (15 inches) and height (6 inches) as needed

plt.xticks(rotation=70, ha="right")  # Adjust rotation for better readability

plt.yticks(range(12))  # Adjust Y axis values

# Set x-axis ticks to display monthly
plt.gca().xaxis.set_major_locator(plt.matplotlib.dates.MonthLocator())  # Set ticks at every month
plt.gca().xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter('%b %Y'))  # Format tick labels

# Create a scatter plot
plt.scatter(x1, cpih_latestdata, color='green', marker='o')
plt.scatter(x2, cpih_prediction, color='red', marker='o')

# Add horizontal grid lines
plt.grid(True, which='major', axis='y', linestyle='--', linewidth=0.5, zorder=0, rasterized=True)
# Add labels and title
plt.xlabel('Monthly Representation')
plt.ylabel('Inflation Rate in %')
plt.title('Scatter Plot CPIH Inflation Prediction over {} months'.format(month))

# Show the plot
plt.show()


In [None]:
# Increase the width of the graph by setting the figsize parameter
plt.figure(figsize=(12, 4))  # Adjust the width (15 inches) and height (6 inches) as needed

# Adjust rotation for better readability
plt.xticks(rotation=70, ha="right")  
plt.yticks(range(12))  # Adjust Y axis values
# Create a bar chart
plt.bar(x1, cpih_latestdata, color='skyblue')
plt.bar(x2, cpih_prediction, color='red')

# Set x-axis ticks to display monthly
plt.gca().xaxis.set_major_locator(plt.matplotlib.dates.MonthLocator())  # Set ticks at every month
plt.gca().xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter('%b %Y'))  # Format tick labels
# Add horizontal grid lines
plt.grid(True, which='major', axis='y', linestyle='--', linewidth=0.5, zorder=0, rasterized=True)
# Add labels and title
plt.xlabel('Monthly Representation')
plt.ylabel('Inflation Rate in %')
plt.title('Bar Chart CPIH Inflation Prediction over {} months'.format(month))

# Show the plot
plt.show()
