In [1]:
# Importing the required libraries
import pandas as pd
import numpy as np
import tensorflow
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

In [2]:
# Load the dataset
data = pd.read_csv(r"data\cleansing\filled\mean_filled.csv")

In [3]:
# print the shape of the data
print(f"Shape of the data: {data.shape}")
# print the columns of the data
print(f"Columns of the data: {data.columns}")

Shape of the data: (3171, 23)
Columns of the data: Index(['Date', 'Rice', 'Wheat', 'Atta (Wheat)', 'Gram Dal', 'Tur/Arhar Dal',
       'Urad Dal', 'Moong Dal', 'Masoor Dal', 'Sugar', 'Milk @',
       'Groundnut Oil (Packed)', 'Mustard Oil (Packed)', 'Vanaspati (Packed)',
       'Soya Oil (Packed)', 'Sunflower Oil (Packed)', 'Palm Oil (Packed)',
       'Gur', 'Tea Loose', 'Salt Pack (Iodised)', 'Potato', 'Onion', 'Tomato'],
      dtype='object')


In [None]:
# print the first 5 rows of the data
print(f"First 5 rows of the data: {data.head()}")

In [5]:
# Convert the date column to datetime
data["Date"] = pd.to_datetime(data["Date"])
# Set the date column as the index
data.set_index("Date", inplace=True)

In [None]:
# print the first 5 rows of the data
print(f"First 5 rows of the data: \n{data.head()}")

In [None]:
# check for missing values
print(f"Missing values in the data: \n{data.isnull().sum()}")

In [8]:
# take first 5 columns of the data
data = data.iloc[:, :5]

In [None]:
# print the first 5 rows of the data
print(f"First 5 rows of the data: \n{data.head()}")

In [10]:
# scale the data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data)

In [11]:
# data preparation for LSTM
def create_sequence(dataset, look_back=1):
    X, y = [], []
    for i in range(len(dataset) - look_back - 1):
        a = dataset[i:(i + look_back), 0]
        X.append(a)
        y.append(dataset[i + look_back, 0])
    return np.array(X), np.array(y)


In [18]:
# split the data into train and test sets take test size as 20% at end of the data
train_size = int(len(X) * 0.8)
test_size = len(X) - train_size
train_data, test_data = X[0:train_size, :], X[train_size:len(X), :]

In [None]:
# print the shape of the train and test data
print(f"Shape of the train data: {train_data.shape}")
print(f"Shape of the test data: {test_data.shape}")

In [23]:
# set the look back and create the sequence
look_back = 7
X_train, X_test = create_sequence(train_data, look_back)
y_train, y_test = create_sequence(test_data, look_back)

In [None]:
# print the shape of the train and test data
print(f"Shape of the train data: {X_train.shape}")
print(f"Shape of the test data: {X_test.shape}")

In [None]:
# reshape the data
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], X_train.shape[2]))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], X_test.shape[2]))

In [None]:
# print the shape of the train and test sets
print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")

In [None]:
# prepare data for LSTM

In [None]:
# create the LSTM model
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
#model.add(Dropout(0.2))
model.add(LSTM(units=50))
model.add(Dense(X_train.shape[2]))
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X_train, y_train, epochs=100, batch_size=1) # the batch size is set to be 1,2,3,4........


In [None]:
model.summary()

In [None]:
# make predictions
train_predict = model.predict(X_train)
test_predict = model.predict(X_test)

In [None]:
# make predictions
train_predict = model.predict(X_train)
test_predict = model.predict(X_test)


In [None]:
# invert the predictions
train_predict = scaler.inverse_transform(train_predict)
y_train = scaler.inverse_transform([y_train])
test_predict = scaler.inverse_transform(test_predict)
y_test = scaler.inverse_transform([y_test])
