In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense


In [2]:
# select the data
def select_data(data, start=2016, end=2024, columns=None):
    # convert the data to a pandas dataframe
    data['Date'] = pd.to_datetime(data['Date'])
    # create a mask for the data
    mask = (data['Date'].dt.year >= start) & (data['Date'].dt.year <= end)
    # select the data
    data = data.loc[mask]
    if columns is not None:
        data = data[columns]
    else:
        # drop the date column
        data = data.drop(columns='Date')
    return data

In [3]:
def prepare_dataset(dataset, look_back=1):
    X, Y = [], []
    for i in range(len(dataset) - look_back):
        a = dataset[i:(i + look_back), :]
        X.append(a)
        Y.append(dataset[i + look_back, :])
    return np.array(X), np.array(Y)

In [4]:
def create_scaler(dataset):
    scaler = MinMaxScaler(feature_range=(0, 1))
    dataset = scaler.fit_transform(dataset)
    return scaler, dataset 
def to_scalar(scaler, data):
    return scaler.transform(data)

In [5]:
def create_model(dataset, look_back=1, epochs=100, batch_size=1, verbose=1):
    X, Y = prepare_dataset(dataset, look_back)
    X = np.reshape(X, (X.shape[0], X.shape[1], X.shape[2]))
    Y = np.reshape(Y, (Y.shape[0], Y.shape[1]))
    
    model = Sequential()
    model.add(LSTM(units=50, return_sequences=True, input_shape=(X.shape[1], X.shape[2])))
    # model.add(Dropout(0.2))
    model.add(LSTM(units=50))
    model.add(Dense(X.shape[2]))  # Assuming single feature for simplicity
    model.compile(optimizer='adam', loss='mean_squared_error')
    model.fit(X, Y, epochs=epochs, batch_size=batch_size, verbose=verbose)
    
    return model

In [19]:
# declare the path of the data
path_to_data = "data/data_mean.csv"

# load the data
# import pandas as pd
data = pd.read_csv(path_to_data)

col=['Rice', 'Wheat', 'Atta (Wheat)', 'Gram Dal', 'Tur/Arhar Dal']

# select the desired data
req_data = select_data(data=data, start=2022, end=2024, columns=col)
print(req_data.shape)

(979, 5)


In [9]:
# split the data into training and testing
train_size = int(len(req_data) * 0.80)
test_size = len(req_data) - train_size
train, test = req_data[0:train_size], req_data[train_size:len(req_data)]

In [10]:
# scale the data
scaler, train_scaler = create_scaler(train) # scaled_data is a numpy array

In [11]:
# set the look back
look_back = 7
# create the model
myModel = create_model(train_scaler, look_back, epochs=100, batch_size=1, verbose=1)

  super().__init__(**kwargs)


Epoch 1/100
[1m192/192[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.0697
Epoch 2/100
[1m192/192[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0165
Epoch 3/100
[1m192/192[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0150
Epoch 4/100
[1m192/192[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0174
Epoch 5/100
[1m192/192[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0185
Epoch 6/100
[1m192/192[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0172
Epoch 7/100
[1m192/192[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0135
Epoch 8/100
[1m192/192[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0138
Epoch 9/100
[1m192/192[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0147
Epoch 10/100
[1m192/192[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms

In [12]:
# prepare test data
# convert to scaler
test_scalar = scaler.transform(test)
# split features and outcome
x_test, y_test = prepare_dataset(test_scalar, look_back)

In [13]:
x_train, y_train = prepare_dataset(train_scaler, look_back)

In [14]:
# predict the results
test_pred_scalar = myModel.predict(x_test)
train_pred_scalar = myModel.predict(x_train)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 287ms/step
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 


In [15]:
# invert the predictions
test_pred = scaler.inverse_transform(test_pred_scalar)
train_pred = scaler.inverse_transform(train_pred_scalar)

# invert the actual values
y_test = scaler.inverse_transform(y_test)
y_train = scaler.inverse_transform(y_train)

In [16]:
# print the shape of the data
print(train_pred.shape)
print(y_train.shape)
print(test_pred.shape)
print(y_test.shape)

(192, 5)
(192, 5)
(43, 5)
(43, 5)


In [17]:
# print the results dataframes first 5 rows
print(pd.DataFrame(train_pred).head())
print(pd.DataFrame(y_train).head())
print(pd.DataFrame(test_pred).head())
print(pd.DataFrame(y_test).head())

           0          1          2          3           4
0  60.221565  45.067490  53.333714  86.774315  170.161926
1  61.016460  45.037193  53.374462  87.137794  170.864182
2  60.709705  45.135170  53.708282  86.789841  170.270020
3  60.344238  45.112530  53.382866  86.170029  168.353348
4  60.055420  44.833691  53.118141  85.852928  167.246994
       0      1      2      3       4
0  59.89  44.97  53.51  86.06  168.91
1  60.06  45.03  52.71  86.00  169.03
2  60.11  45.12  53.11  86.03  168.64
3  59.81  45.14  52.58  86.00  167.92
4  59.83  44.74  52.81  86.08  167.11
           0          1          2          3           4
0  61.657261  45.920818  55.658291  93.827431  181.156097
1  61.969162  45.826633  55.670788  93.811020  180.626022
2  62.475628  44.980373  55.530186  93.680702  179.432449
3  62.043938  44.515839  54.986710  93.832695  179.232758
4  61.605549  46.117050  54.558151  94.186989  180.398163
       0      1      2      3       4
0  61.91  45.87  54.25  93.90  177.58


In [18]:
# calculate the error
from sklearn.metrics import mean_squared_error
train_error = mean_squared_error(y_train, train_pred)
test_error = mean_squared_error(y_test, test_pred)
print("Train error: ", train_error)
print("Test error: ", test_error)


Train error:  0.2935303994072985
Test error:  16.749637028710474
