In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
#Getting data of crude price
df = pd.read_html('https://www.eia.gov/opendata/qb.php?sdid=PET.RBRTE.D')

In [None]:
len(df) #df is list

In [None]:
df = df[0] #Getting dataframe out of list

In [None]:
df.head()

In [None]:
#Just taking time and crude price
df = df[['Period','Value']]

In [None]:
df

In [None]:
#Converting period to datetime
df['Date'] = pd.to_datetime(df['Period'], format='%Y%m%d')

In [None]:
df

In [None]:
df = df.drop('Period',axis = 1)

In [None]:
df = df.set_index('Date')

In [None]:
#Reversing
df = df[-1::-1]

In [None]:
df

In [None]:
df.plot(figsize = (15,7))

In [None]:
#Getting value of crude price
df1 = df.reset_index()['Value']

In [None]:
df1

In [None]:
#Scaling data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
df1 = scaler.fit_transform(np.array(df1).reshape(-1,1))

In [None]:
df1.shape

In [None]:
df1

In [None]:
plt.figure(figsize=(15,7))
plt.plot(df.index,df1)

In [None]:
w = int(input(' Enter window size: '))

In [None]:
training_size = int(len(df1)*0.96)
test_size = len(df1) - training_size
train_data,test_data = df1[0:training_size+w,:],df1[training_size:len(df1),:]

In [None]:
train_data.shape, test_data.shape

In [None]:
plt.plot(df.index[:training_size+100],train_data)


In [None]:
def create_dataset(dataset,time_step = 1):

    dataX, dataY = [],[]

    for i in range(len(dataset) - time_step):

        a = dataset[i:(i+time_step),0]   #i = 0 then 0,1,2,3 will be x and y = 4 (if our time step is 4)

        dataX.append(a)

        dataY.append(dataset[i+time_step,0])

    return np.array(dataX), np.array(dataY)

In [None]:
time_step = w
#Using 100 consecutive values predicting 101th value
#f1-------f100 = X
#f101 = y
X_train, y_train = create_dataset(train_data,time_step)
X_test, y_test = create_dataset(test_data,time_step)

In [None]:
X_train.shape,X_test.shape

In [None]:
#Making data 3D
X_train = X_train.reshape(X_train.shape[0],X_train.shape[1],1)
X_test = X_test.reshape(X_test.shape[0],X_test.shape[1],1)

In [None]:
X_train.shape,X_test.shape

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM

In [None]:
model = Sequential() #sequential model
model.add(LSTM(50,return_sequences = True, input_shape = (100,1))) # input shape of batch
model.add(LSTM(50,return_sequences = True))
model.add(LSTM(50))
model.add(Dense(1))
model.compile(loss = 'mean_squared_error', optimizer = 'adam')

In [None]:
model.summary()

In [None]:
model.fit(X_train,y_train,validation_data = (X_test,y_test),epochs = 10,batch_size = 128, verbose =1)

In [None]:
train_predict = model.predict(X_train)
test_predict = model.predict(X_test)

In [None]:
train_predict = scaler.inverse_transform(train_predict)
test_predict = scaler.inverse_transform(test_predict)

In [None]:
import math
from sklearn.metrics import mean_squared_error
math.sqrt(mean_squared_error(y_train,train_predict))

In [None]:
math.sqrt(mean_squared_error(y_test,test_predict))

In [None]:
train_predict.shape

In [None]:
test_predict.shape

In [None]:
a = train_predict.reshape(train_predict.shape[0],)

In [None]:
b = test_predict.reshape(test_predict.shape[0],)

In [None]:
c = list(a)+list(b) # Total

In [None]:
len(c)

In [None]:
df

In [None]:
# exact difference of 100

In [None]:
t = []
for i in range(100):
    t.append(np.NaN)

In [None]:
d = t+c

In [None]:
len(d)

In [None]:
df['Predicted Price'] = d

In [None]:
df

In [None]:
plt.figure(figsize = (16,7))
plt.grid(True)
plt.title('Crude price Forecasting Training and Validation')
plt.plot(df.index,df['Value'],label = "Original Value",c = 'blue')
plt.plot(df[:train_predict.shape[0]+w].index,df['Predicted Price'][:train_predict.shape[0]+w],label = "Predicted Training Price",c = 'red')
plt.plot(df.index[train_predict.shape[0]+100:],df['Predicted Price'][train_predict.shape[0]+w:],label = "Predicted Validating Price", c = 'green')
plt.axvline(df.index[train_predict.shape[0]+w], color='black',lw=3)

plt.legend()

## Forecaasting in future

In [None]:
ddf = df[100:]

In [None]:
ddf

In [None]:
test_data.shape

In [None]:
# for next one day price, last 100 days data is needed
x_input = test_data[-100:].reshape(1,-1)
x_input.shape

In [None]:
x_input

In [None]:
temp_input = list(x_input)


In [None]:
temp_input

In [None]:
temp_input = temp_input[0].tolist()

In [None]:
temp_input

## Demonstrate predictions for next 30 days:

- First time else loop will run, reshape is done and after that it passes to model and prediction is done for 101st day, after that this 101th day data is added in final output. And this 101 day data will be used in previous input list. Now the input list will have 101 elements so the if loop will run and the data for first day is dropped so that we can take data from day 2 to day 101 and predict data for day 102, in a similary way the loops will run for 30 days

- for i = 0 (first loop in while), the else loop will run because 100 elements are present in temp_input. After adding data for 101 day from forecasting, the if loop will run and 1st day data will be excluded

In [None]:
lst_output = []
n_steps = 100
i = 0 
while i<30:   #For 30 days forecast
    if(len(temp_input)>100):
        #print(temp_input)
        x_input = np.array(temp_input[1:]) # Taking x_input values from 2nd value onward, so that total value will be 100
        print('{} day input {}'.format(i,x_input))
        x_input = x_input.reshape(1,-1)
        x_input = x_input.reshape((1,n_steps,1)) #making tensor of 1 batch, with n rows and 1 column
        #print(x_input)
        yhat = model.predict(x_input,verbose = 1)
        print('{} day output {}'.format(i,yhat))
        temp_input.extend(yhat[0].tolist()) #Adding forecasted value to the temp_input, for further forecasting, now there are 102 values in temp_input
        temp_input = temp_input[1:] #Because after adding the above yhat[0], total number of elements in temp_input is 102, so we will select last 101 elements so that again if loop will go on running for 30 days
        #print(temp_input)
        lst_output.extend(yhat.tolist())
        i = i+1
    else:        #first loop will go inside this
        x_input = x_input.reshape((1,n_steps,1))  #last 100 days data, nsteps = 100 and reshaping it so that we can feed it in LSTM
        yhat = model.predict(x_input,verbose = 0) #Taking prediction from model 
        print(yhat[0])
        temp_input.extend(yhat[0].tolist()) #Adding predicted value of 101 day in temp_input, so that this value can be used for forecasting values for days starting from day 102
        print(len(temp_input))
        lst_output.extend(yhat.tolist())  #Adding 101 day forecast to Output forecasting list 
        i = i+1
        
print(lst_output)

In [None]:
lst_output

In [None]:
forecast = scaler.inverse_transform(lst_output)

In [None]:
forecast.shape

In [None]:
forecast = forecast.reshape(30,)

In [None]:
ddf

In [None]:
t_fut = pd.date_range('2022-09-26 00:00:00+00:00',periods=30)

In [None]:
t_fut

In [None]:
dffor = pd.DataFrame({'Forecast Price':forecast},index = t_fut)

In [None]:
plt.figure(figsize = (16,7))
plt.grid(True)
plt.title('Crude price Forecasting Training and Validation')
plt.plot(df.index,df['Value'],label = "Original Value",c = 'blue')
plt.plot(df[:train_predict.shape[0]+w].index,df['Predicted Price'][:train_predict.shape[0]+w],label = "Predicted Training Price",c = 'red')
plt.plot(df.index[train_predict.shape[0]+100:],df['Predicted Price'][train_predict.shape[0]+w:],label = "Predicted Validating Price", c = 'green')

plt.plot(dffor.index,dffor['Forecast Price'], label = 'Forecasted Price',c = 'black')


plt.axvline(df.index[train_predict.shape[0]+w], color='black',lw=3)
plt.axvline(dffor.index[0], color='black',lw=2)

plt.legend()

## Thank You