In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense


In [8]:
# select the data
def select_data(data, start=2016, end=2024, columns=None):
    # convert the data to a pandas dataframe
    data['Date'] = pd.to_datetime(data['Date'])
    # create a mask for the data
    mask = (data['Date'].dt.year >= start) & (data['Date'].dt.year <= end)
    # select the data
    data = data.loc[mask]
    if columns is not None:
        data = data[columns]
    else:
        # drop the date column
        data = data.drop(columns='Date')
    return data

In [None]:
def prepare_dataset(dataset, look_back=1):
    X, Y = [], []
    for i in range(len(dataset) - look_back):
        a = dataset[i:(i + look_back), :]
        X.append(a)
        Y.append(dataset[i + look_back, :])
    return np.array(X), np.array(Y)

In [None]:
def create_scaler(dataset):
    scaler = MinMaxScaler(feature_range=(0, 1))
    dataset = scaler.fit_transform(dataset)
    return scaler, dataset 
def to_scalar(scaler, data):
    return scaler.transform(data)

In [None]:
def create_model(dataset, look_back=1, epochs=100, batch_size=1, verbose=1):
    X, Y = prepare_dataset(dataset, look_back)
    X = np.reshape(X, (X.shape[0], X.shape[1], X.shape[2]))
    Y = np.reshape(Y, (Y.shape[0], Y.shape[1]))
    
    model = Sequential()
    model.add(LSTM(units=50, return_sequences=True, input_shape=(X.shape[1], X.shape[2])))
    #model.add(Dropout(0.2))
    model.add(LSTM(units=50))
    model.add(Dense(X.shape[2]))  # Assuming single feature for simplicity
    model.compile(optimizer='adam', loss='mean_squared_error')
    model.fit(X, Y, epochs=epochs, batch_size=batch_size, verbose=verbose)
    
    return model

In [9]:
# declare the path of the data
path_to_data = r"data\cleansing\filled\mean_filled.csv"

# load the data
# import pandas as pd
data = pd.read_csv(path_to_data)

col=['Rice', 'Wheat', 'Atta (Wheat)', 'Gram Dal', 'Tur/Arhar Dal']

# select the desired data
req_data = select_data(data=data, start=2023, end=2024, columns=col)

In [None]:
# split the data into training and testing
train_size = int(len(req_data) * 0.80)
test_size = len(req_data) - train_size
train, test = req_data[0:train_size], req_data[train_size:len(req_data)]

In [None]:
# scale the data
scaler, train_scaler = create_scaler(train) # scaled_data is a numpy array

In [None]:
# create the model
myModel = create_model(train_scaler, look_back=1, epochs=100, batch_size=1, verbose=0)

In [None]:
# prepare test data
# convert to scaler
test_scalar = scaler.transform(test)
# split features and outcome
x_test, y_test = prepare_dataset(test)

In [None]:
# predict the results
x_pred_scaler = myModel.predict
y_pred_scaler = myModel.predict(x_test)
# inverse the scaler
x_pred = scaler.inverse_transform(x_pred_scaler)
y_pred = scaler.inverse_transform(y_pred_scaler)

In [None]:
def create_input_data(model, scaler, dataset, date, look_back=1):
    # date format: 'yyyy-mm-dd'
    scaled_data = scaler.transform(dataset)

    dataset['Date'] = pd.to_datetime(dataset['Date'])

    # find the start and end date of dataset
    start_date = dataset['Date'].min()
    end_date = dataset['Date'].max()

    # chect if the date is in the dataset

    # chect if the date is within the range of the dataset
    if pd.to_datetime(date) <= end_date:
        # take the last look_back days
        to_date = pd.to_datetime(date)
        from_date = to_date - pd.DateOffset(days=look_back-1)
        mask = (dataset['Date'] >= from_date) & (dataset['Date'] <= to_date)
        input_data = dataset.loc[mask]
        input_data = input_data.drop(columns='Date')
        input_data = scaler.transform(input_data)
        X = []
        for i in range(len(input_data) - look_back):
            a = input_data[i:(i + look_back), :]
            X.append(a)
        X = np.array(X)
        return X
    else:
        # take the last look_back days
        req_to_date = pd.to_datetime(date)
        #req_from_date = req_to_date - pd.DateOffset(days=look_back-1)
        to_date = end_date
        from_date = to_date - pd.DateOffset(days=look_back-1)
        mask = (dataset['Date'] >= from_date) & (dataset['Date'] <= to_date)
        input_data = dataset.loc[mask]
        input_data = input_data.drop(columns='Date')
        input_data = scaler.transform(input_data)
        # calculate the number of days to predict
        days_to_predict = (req_to_date - to_date).days
        for i in range(days_to_predict):
            X = []
            for i in range(len(input_data) - look_back):
                a = input_data[i:(i + look_back), :]
                X.append(a)
            X = np.array(X)
            y_pred = model.predict(X)
            input_data = np.append(input_data, y_pred, axis=0)
            input_data = input_data[1:]
        return input_data

In [None]:
'''    X = []
    for i in range(len(dataset)-look_back):
        a = dataset[i:(i+look_back), :]
        X.append(a)
    return np.array(X) '''

In [7]:
d = pd.read_csv(path_to_data)
print(d.columns)

Index(['Date', 'Rice', 'Wheat', 'Atta (Wheat)', 'Gram Dal', 'Tur/Arhar Dal',
       'Urad Dal', 'Moong Dal', 'Masoor Dal', 'Sugar', 'Milk @',
       'Groundnut Oil (Packed)', 'Mustard Oil (Packed)', 'Vanaspati (Packed)',
       'Soya Oil (Packed)', 'Sunflower Oil (Packed)', 'Palm Oil (Packed)',
       'Gur', 'Tea Loose', 'Salt Pack (Iodised)', 'Potato', 'Onion', 'Tomato'],
      dtype='object')


In [13]:
print(d['Date'])

0       2016-01-01
1       2016-01-02
2       2016-01-03
3       2016-01-04
4       2016-01-05
           ...    
3166    2024-09-01
3167    2024-09-02
3168    2024-09-03
3169    2024-09-04
3170    2024-09-05
Name: Date, Length: 3171, dtype: object
