#### Importing Required Libraries

In [1]:
from numpy import array
import pandas as pd
import numpy as np
from datetime import datetime
from numpy import asarray
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import ShuffleSplit, KFold
import random
from sklearn.utils import shuffle
from matplotlib import pyplot as plt
import matplotlib.dates as mdates
import tensorflow as tf
import pickle
import csv
from dateutil.relativedelta import relativedelta

#### Reading historical data from multiple files and cleaning and formatting the data

In [36]:
# Read CPI data from CSV file
cpi_monthly_1950_1988 = pd.read_csv('../input_data/cpi_data_Jan1950_Dec1988.csv')
cpi_monthly_1989_2023 = pd.read_csv('../input_data/cpi_data_Jan1989_Oct2023.csv')
gdp_2007_2023 = pd.read_csv('../input_data/gdp_data_Jan2007_Sept2023.csv')

# Concat both the dataframes
cpi_data = pd.concat([cpi_monthly_1950_1988, cpi_monthly_1989_2023], axis=0)
cpi_data.tail()
# Extract Year and Month from 'Dates' column from CPI data
cpi_data['Month'] = cpi_data['Dates'].str.split('-').str[0].astype('str')
cpi_data['Year'] = cpi_data['Dates'].str.split('-').str[1].astype('str')
# Extract Year and Month from 'Dates' column from GDP data
gdp_2007_2023['Month'] = gdp_2007_2023['Dates'].str.split('-').str[0].astype('str')
gdp_2007_2023['Year'] = gdp_2007_2023['Dates'].str.split('-').str[1].astype('str')

# Create a new column 'FormattedDate' in the desired format
cpi_data['FormattedDate'] = pd.to_datetime(cpi_data['Month'] + ' ' + cpi_data['Year'], format='%b %y').dt.strftime('%m/%d/%Y %H:%M')
cpi_data['Dates'] = cpi_data['FormattedDate']
# Dropping temperary columns
cpi_data = cpi_data.drop(columns=['Year', 'Month', 'FormattedDate'], axis=1)
# Create a new column 'FormattedDate' in the desired format
gdp_2007_2023['FormattedDate'] = pd.to_datetime(gdp_2007_2023['Month'] + ' ' + gdp_2007_2023['Year'], format='%b %y').dt.strftime('%m/%d/%Y %H:%M')
gdp_2007_2023['Dates'] = gdp_2007_2023['FormattedDate']
gdp_2007_2023 = gdp_2007_2023.drop(columns=['Month', 'Year', 'FormattedDate'])
print(cpi_data.tail())
print(gdp_2007_2023.tail())

                Dates  CPIH  CPI  Average CPIH
413  06/01/2023 00:00   7.3  7.9           2.6
414  07/01/2023 00:00   6.4  6.8           2.6
415  08/01/2023 00:00   6.3  6.7           2.6
416  09/01/2023 00:00   6.3  6.7           2.6
417  10/01/2023 00:00   4.7  4.6           2.6
                Dates  Monthly GDP
194  03/01/2023 00:00        102.3
195  04/01/2023 00:00        102.5
196  05/01/2023 00:00        102.3
197  06/01/2023 00:00        103.0
198  07/01/2023 00:00        102.4


#### Transform data into a univariate supervised learning problem

In [3]:
# let's transform our remaning data into a univariate supervised learning problem
# Functions transforms our time series sequence into a supervised leaning problem
# Transform data into a univariate supervised learning problem
def split_sequences(sequence, n_steps_in, n_steps_out):
    X, y = list(), list()
    for i in range(len(sequence) - (n_steps_in + n_steps_out)):
        append_X = []
        append_y = []
        for j in range(n_steps_in):
            append_X.append(sequence[i + j])
        for k in range(n_steps_out):
            append_y.append(sequence[i + n_steps_in + k + 1])

        X.append(append_X)
        y.append(append_y)

    return np.array(X), np.array(y)

#### Bulding a model for CPI Prediction using LSTM time varient approach and training the model on historical data

In [4]:
def predict_cpi_inflation(df):
    # Scale data between 0 and 1
    scaler = MinMaxScaler()
    scaled_cpi = asarray(df['CPI']).reshape(-1, 1)
    scaled_cpi = scaler.fit_transform(scaled_cpi)
    # Omit the last 50 observations for out-of-sample forecast
    out_of_sample_forecast_input = scaled_cpi[len(scaled_cpi) - 50:, 0]
    scaled_cpi = scaled_cpi[:len(scaled_cpi) - 50, 0]    
    
    # Set the number of lags and forecast periods
    n_steps_in = 50
    n_steps_out = 12
    
    # Create sequences for input (X) and output (y)
    X, y = split_sequences(scaled_cpi, n_steps_in, n_steps_out)
    
    # Split data into training and testing 80:20
    total_rows = X.shape[0]
    train_rows = int(total_rows * 0.8)
    test_rows = total_rows - train_rows
    
    # Obtain training and testing data
    X_train = X[:train_rows]
    X_test = X[train_rows:]
    y_train = y[:train_rows]
    y_test = y[train_rows:]
    
    X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
    y_train = y_train.reshape(y_train.shape[0], y_train.shape[1], 1)
    
    # Build and train the LSTM model
    model = Sequential()
    model.add(LSTM(75, activation='relu', return_sequences=True, input_shape=(n_steps_in, 1)))
    model.add(LSTM(75, activation='relu', return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(n_steps_out))
    model.compile(loss='mse', optimizer='adam')
    training_model = model.fit(X_train, y_train, epochs=50, verbose=1)
    
    # Perform out-of-sample forecast
    out_of_sample_forecast_input = asarray(out_of_sample_forecast_input).reshape(1, n_steps_in)
    out_of_sample_forecast = model.predict(out_of_sample_forecast_input, verbose=0)
    
    # Inversely scale the forecasted data and save it to a CSV file
    list_forecast = scaler.inverse_transform(out_of_sample_forecast).tolist()
    print('Predicted CPI Data for next 12 months: ', list_forecast[0])
    return list_forecast

#### Bulding a model for CPIH Prediction using LSTM time varient approach and training the model on historical data

In [5]:
def predict_cpih_inflation(df):
    # Scale data between 0 and 1
    cpih_scaler = MinMaxScaler()
    scaled_cpih = asarray(df['CPIH']).reshape(-1, 1)
    scaled_cpih = cpih_scaler.fit_transform(scaled_cpih)
    # Omit the last 50 observations for out-of-sample forecast
    cpih_sample_forecast_input = scaled_cpih[len(scaled_cpih) - 50:, 0]
    scaled_cpih = scaled_cpih[:len(scaled_cpih) - 50, 0]
    
    # Set the number of lags and forecast periods
    cpih_n_steps_in = 50
    cpih_n_steps_out = 12
    
    # Create sequences for input (X) and output (y)
    X, y = split_sequences(scaled_cpih, cpih_n_steps_in, cpih_n_steps_out)
    
    # Split data into training and testing 80:20
    cpih_total_rows = X.shape[0]
    cpih_train_rows = int(cpih_total_rows * 0.8)
    cpih_test_rows = cpih_total_rows - cpih_train_rows
    
    # Obtain training and testing data
    X_train_cpih = X[:cpih_train_rows]
    X_test_cpih = X[cpih_train_rows:]
    y_train_cpih = y[:cpih_train_rows]
    y_test_cpih = y[cpih_train_rows:]
    
    X_train_cpih = X_train_cpih.reshape(X_train_cpih.shape[0], X_train_cpih.shape[1], 1)
    y_train_cpih = y_train_cpih.reshape(y_train_cpih.shape[0], y_train_cpih.shape[1], 1)
    
    # Build and train the LSTM model
    model = Sequential()
    model.add(LSTM(75, activation='relu', return_sequences=True, input_shape=(cpih_n_steps_in, 1)))
    model.add(LSTM(75, activation='relu', return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(cpih_n_steps_out))
    model.compile(loss='mse', optimizer='adam')
    cpih_training_model = model.fit(X_train_cpih, y_train_cpih, epochs=50, verbose=1)
    
    # Perform out-of-sample forecast
    cpih_sample_forecast_input = asarray(cpih_sample_forecast_input).reshape(1, cpih_n_steps_in)
    cpih_sample_forecast = model.predict(cpih_sample_forecast_input, verbose=0)
    
    # Inversely scale the forecasted data and save it to a CSV file
    cpih_list_forecast = cpih_scaler.inverse_transform(cpih_sample_forecast).tolist()
    print('Predicted CPIH Data for next 12 months: ', cpih_list_forecast[0])
    return cpih_list_forecast    

#### Bulding a model for GDP Growth using LSTM time varient approach and training the model on historical data

In [None]:
def predict_gdp_growth(df):
    

#### Calling Both CPI and CPH LSTM Models and passing input data for prediction

In [6]:
# Setting idex as Dates column
input_df = cpi_data.copy()
input_df.set_index('Dates', inplace=True)
cpi_predicted_list = predict_cpi_inflation(input_df)
cpih_predicted_list = predict_cpih_inflation(input_df)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Predicted CPI Data for next 12 months:  [6.667708396911621, 7.10439395904541, 7.358459949493408, 7.400247573852539, 7.721502304077148, 7.6093010902404785, 7.478518486022949, 7.8217644691467285, 7.752566814422607, 7.788948059082031, 8.362661361694336, 7.834422588348389]
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoc

#### Generating output data for Graphical representation

##### Creating CSV file of Predicted Output of CPI and CPIH Data

In [29]:
# Assuming 'prediction_start_date' is the base date, starting from November 2023
prediction_start_date = datetime.strptime('11/2023', '%m/%Y')
# Specified the 12 number becuase we want months future months list
month = 12
# Create the list of datetime objects
future_date_list = [prediction_start_date + relativedelta(months=i) for i in range(month)]
# Creating Final Dataframe of predicted CPI and CPIH data
prediction_df = pd.DataFrame({'Dates':future_date_list, 'CPI':cpi_predicted_list[0], 'CPIH':cpih_predicted_list[0]})
prediction_df['Average CPIH'] = 2.6 # UK Average CPIH is 2.6
prediction_df[['CPI', 'CPIH', 'Average CPIH']] = prediction_df[['CPI', 'CPIH', 'Average CPIH']].round(2)
prediction_df.to_csv('../output_data/CPI_Inflation_Over_Next_12_Months.csv', index=False)
prediction_df.head()

Unnamed: 0,Dates,CPI,CPIH,Average CPIH
0,2023-11-01,6.67,5.85,2.6
1,2023-12-01,7.1,5.75,2.6
2,2024-01-01,7.36,5.88,2.6
3,2024-02-01,7.4,5.64,2.6
4,2024-03-01,7.72,5.59,2.6


##### Creating CSV file of Actual Historical CPI and CPIH Data

In [34]:
# Formating Dates column data.
cpi_data['Dates'] = pd.to_datetime(cpi_data['Dates']).dt.strftime('%Y-%m-%d')
# Saving historical data in output folder
cpi_data.to_csv('../output_data/CPI_Inflation_Historical_data.csv', index=False)
cpi_data.head()

Unnamed: 0,Dates,CPIH,CPI,Average CPIH
0,2050-01-01,4.3,4.1,5.8
1,2050-02-01,4.3,4.1,5.8
2,2050-03-01,4.8,4.7,5.8
3,2050-04-01,5.4,5.5,5.8
4,2050-05-01,3.7,3.7,5.8


In [None]:
prediction_df.to_csv('../output_data/CPI_Inflation_Over_Next_12_Months.csv', index=False)
prediction_df.head()