In [2]:
# shape of x is number of samples x time steps(lookback) x number of features (type of data)
# shape of y is number of expected outcome x number of features (type of data)

In [3]:
# import libraries
import numpy as np

def create_sequence(dataset, look_back=1):
    X, Y = [], []
    for i in range(len(dataset) - look_back):
        a = dataset[i:(i + look_back), :]
        X.append(a)
        Y.append(dataset[i + look_back, :])
    return np.array(X), np.array(Y)

In [5]:
# import libraries
import numpy as np
import pandas as pd
# select the data
def select_data(data, start=2016, end=2024, columns=None):
    # convert the data to a pandas dataframe
    data['Date'] = pd.to_datetime(data['Date'])
    # create a mask for the data
    if columns is None:
        mask = (data['Date'].dt.year >= start) & (data['Date'].dt.year<= end)
    else:
        mask = (data['Date'].dt.year >= start) & (data['Date'].dt.year<= end) & (data.columns.isin(columns))
    # select the data
    data = data.loc[mask]
    # drop the date column
    data = data.drop(columns='Date')
    return data

In [6]:
# import libraries
from sklearn.preprocessing import MinMaxScaler

def create_scaler(dataset):
    scaler = MinMaxScaler(feature_range=(0, 1))
    dataset = scaler.fit_transform(dataset)
    return scaler, dataset 

In [7]:
# import libraries
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout

def create_model(dataset, look_back=1, epochs=100, batch_size=1, verbose=1):
    X, Y = create_sequence(dataset, look_back)
    X = np.reshape(X, (X.shape[0], X.shape[1], X.shape[2]))
    Y = np.reshape(Y, (Y.shape[0], Y.shape[1]))
    
    model = Sequential()
    model.add(LSTM(units=50, return_sequences=True, input_shape=(X.shape[1], X.shape[2])))
    #model.add(Dropout(0.2))
    model.add(LSTM(units=50))
    model.add(Dense(X.shape[2]))  # Assuming single feature for simplicity
    model.compile(optimizer='adam', loss='mean_squared_error')
    model.fit(X, Y, epochs=epochs, batch_size=batch_size, verbose=verbose)
    
    return model

In [8]:
def create_input_data(dataset, look_back=1):
    X = []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), :]
        X.append(a)
    return np.array(X)

In [16]:

# declare the path of the data
path_to_data = r"data\cleansing\filled\mean_filled.csv"

# load the data
# import pandas as pd
data = pd.read_csv(path_to_data)

# select the desired data
req_data = select_data(data=data, start=2023, end=2024)

In [17]:
# scale the data
scaler, scaled_data = create_scaler(req_data) # scaled_data is a numpy array

In [18]:
# split the data into training and testing
train_size = int(len(scaled_data) * 0.80)
test_size = len(scaled_data) - train_size
train, test = scaled_data[0:train_size], scaled_data[train_size:len(scaled_data)]

In [19]:
print(train.shape)

(491, 22)


In [20]:
# create the model
model = create_model(train, look_back=1, epochs=100, batch_size=1, verbose=0)

  super().__init__(**kwargs)


In [24]:
# make prediction
input_data = create_input_data(test, look_back=7)
prediction=model.predict(input_data)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step  


           Date   Rice  Wheat  Atta (Wheat)  Gram Dal  Tur/Arhar Dal  \
0    2016-01-01  29.50  30.50         26.67     70.50         155.00   
1    2016-01-02  29.60  31.40         27.50     71.40         154.60   
2    2016-01-03  29.60  31.40         27.50     71.40         154.00   
3    2016-01-04  29.60  31.40         27.50     71.40         151.60   
4    2016-01-05  29.50  31.75         27.50     71.75         151.50   
...         ...    ...    ...           ...       ...            ...   
3166 2024-09-01  56.12  44.88         52.88     99.76         172.82   
3167 2024-09-02  57.31  45.73         54.06     99.75         173.53   
3168 2024-09-03  57.53  45.80         54.75     99.88         172.23   
3169 2024-09-04  57.29  45.79         55.19     99.48         172.30   
3170 2024-09-05  57.47  45.64         53.77    100.30         173.54   

      Urad Dal  Moong Dal  Masoor Dal  Sugar  ...  Vanaspati (Packed)  \
0       160.00     105.75       90.00  32.75  ...             

In [None]:
result = make_prediction(model, data, look_back=7, date)

In [25]:
# inverse the scaled data
prediction = scaler.inverse_transform(prediction)
test = scaler.inverse_transform(test)

# print the prediction
print(prediction)
print(test)

[[ 66.521255   46.973354   60.623352  ...  73.96332    44.30353
   -7.0007057]
 [ 66.40137    46.697002   60.780437  ...  72.98297    45.123283
  -14.687267 ]
 [ 66.45916    46.604897   60.874443  ...  73.360435   45.93052
  -16.456154 ]
 ...
 [ 63.774437   45.58271    58.743275  ...  83.70634    75.41007
    6.087458 ]
 [ 63.491386   45.519745   58.85807   ...  83.29533    75.0313
    2.889969 ]
 [ 63.282093   45.41768    58.86044   ...  82.95403    75.20582
    2.2422278]]
[[61.33 45.16 54.48 ... 44.03 31.03 25.55]
 [61.27 45.72 53.67 ... 44.06 31.39 24.97]
 [61.41 45.66 54.29 ... 44.06 31.65 25.68]
 ...
 [57.53 45.8  54.75 ... 48.88 54.41 24.53]
 [57.29 45.79 55.19 ... 49.32 55.45 26.58]
 [57.47 45.64 53.77 ... 49.9  56.   27.13]]
