In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense


In [2]:
# select the data
def select_data(data, start=2016, end=2024, columns=None):
    # convert the data to a pandas dataframe
    data['Date'] = pd.to_datetime(data['Date'])
    # create a mask for the data
    mask = (data['Date'].dt.year >= start) & (data['Date'].dt.year <= end)
    # select the data
    data = data.loc[mask]
    if columns is not None:
        data = data[columns]
    else:
        # drop the date column
        data = data.drop(columns='Date')
    return data

In [3]:
def prepare_dataset(dataset, look_back=1):
    X, Y = [], []
    for i in range(len(dataset) - look_back):
        a = dataset[i:(i + look_back), :]
        X.append(a)
        Y.append(dataset[i + look_back, :])
    return np.array(X), np.array(Y)

In [4]:
def create_scaler(dataset):
    scaler = MinMaxScaler(feature_range=(0, 1))
    dataset = scaler.fit_transform(dataset)
    return scaler, dataset 
def to_scalar(scaler, data):
    return scaler.transform(data)

In [5]:
def create_model(dataset, look_back=1, epochs=100, batch_size=1, verbose=1):
    X, Y = prepare_dataset(dataset, look_back)
    X = np.reshape(X, (X.shape[0], X.shape[1], X.shape[2]))
    Y = np.reshape(Y, (Y.shape[0], Y.shape[1]))
    
    model = Sequential()
    model.add(LSTM(units=50, return_sequences=True, input_shape=(X.shape[1], X.shape[2])))
    #model.add(Dropout(0.2))
    model.add(LSTM(units=50))
    model.add(Dense(X.shape[2]))  # Assuming single feature for simplicity
    model.compile(optimizer='adam', loss='mean_squared_error')
    model.fit(X, Y, epochs=epochs, batch_size=batch_size, verbose=verbose)
    
    return model

In [30]:
# select the data
def select_data_01(data, start=2016, end=2024, columns=None):
    # convert the data to a pandas dataframe
    data['Date'] = pd.to_datetime(data['Date'])
    # create a mask for the data
    mask = (data['Date'].dt.year >= start) & (data['Date'].dt.year <= end)
    # select the data
    data = data.loc[mask]
    if columns is not None:
        data = data[columns]
    else:
        # drop the date column
        data = data.drop(columns='Date')
    return data

In [6]:
# declare the path of the data
path_to_data = r"data\cleansing\filled\mean_filled.csv"

# load the data
# import pandas as pd
data = pd.read_csv(path_to_data)

col=['Rice', 'Wheat', 'Atta (Wheat)', 'Gram Dal', 'Tur/Arhar Dal']

# select the desired data
req_data = select_data(data=data, start=2024, end=2024, columns=col)
print(req_data.shape)

(249, 5)


In [31]:
# in last
col=['Date','Rice', 'Wheat', 'Atta (Wheat)', 'Gram Dal', 'Tur/Arhar Dal']
inp_data = select_data_01(data=data, start=2024, end=2024, columns=col)

In [32]:
print(inp_data.head())

           Date   Rice  Wheat  Atta (Wheat)  Gram Dal  Tur/Arhar Dal
2922 2024-01-01  59.04  44.45         54.08     86.88         171.79
2923 2024-01-02  58.89  44.91         53.22     86.83         171.58
2924 2024-01-03  58.74  45.12         52.94     86.80         170.77
2925 2024-01-04  59.44  45.00         53.40     86.37         170.26
2926 2024-01-05  60.03  44.69         53.00     86.88         170.38


In [7]:
# split the data into training and testing
train_size = int(len(req_data) * 0.80)
test_size = len(req_data) - train_size
train, test = req_data[0:train_size], req_data[train_size:len(req_data)]

In [8]:
# scale the data
scaler, train_scaler = create_scaler(train) # scaled_data is a numpy array

In [9]:
# create the model
myModel = create_model(train_scaler, look_back=1, epochs=100, batch_size=1, verbose=0)

  super().__init__(**kwargs)


In [14]:
print(type(train))  

<class 'pandas.core.frame.DataFrame'>


In [None]:
# prepare test data
# convert to scaler
test_scalar = scaler.transform(test)
# split features and outcome
x_test, y_test = prepare_dataset(test_scalar, look_back=1)

In [16]:
x_train, y_train = prepare_dataset(train_scaler, look_back=1)

In [18]:
# predict the results
test_pred_scalar = myModel.predict(x_test)
train_pred_scalar = myModel.predict(x_train)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


In [19]:
# invert the predictions
test_pred = scaler.inverse_transform(test_pred_scalar)
train_pred = scaler.inverse_transform(train_pred_scalar)

In [21]:
# compare the predictions with the actual data
# train_pred with y_train
# test_pred with y_test

Train Error: 83.414795629143
Test Error: 86.79806511415654


In [22]:
# print the shape of the data
print(train_pred.shape)
print(y_train.shape)
print(test_pred.shape)
print(y_test.shape)


(198, 5)
(198, 5)
(49, 5)
(49, 5)


In [33]:
new_input_data = create_input_data(myModel, scaler, inp_data, date='2024-01-01', look_back=1)

In [36]:
print(new_input_data)

[]


In [27]:
def create_input_data(model, scaler, dataset, date, look_back=1):
    # date format: 'yyyy-mm-dd'
    #scaled_data = scaler.transform(dataset)

    dataset['Date'] = pd.to_datetime(dataset['Date'])

    # find the start and end date of dataset
    start_date = dataset['Date'].min()
    end_date = dataset['Date'].max()

    # chect if the date is in the dataset

    # chect if the date is within the range of the dataset
    if pd.to_datetime(date) <= end_date:
        # take the last look_back days
        to_date = pd.to_datetime(date)
        from_date = to_date - pd.DateOffset(days=look_back-1)
        mask = (dataset['Date'] >= from_date) & (dataset['Date'] <= to_date)
        input_data = dataset.loc[mask]
        input_data = input_data.drop(columns='Date')
        input_data = scaler.transform(input_data)
        X = []
        for i in range(len(input_data) - look_back):
            a = input_data[i:(i + look_back), :]
            X.append(a)
        X = np.array(X)
        return X
    else:
        # take the last look_back days
        req_to_date = pd.to_datetime(date)
        #req_from_date = req_to_date - pd.DateOffset(days=look_back-1)
        to_date = end_date
        from_date = to_date - pd.DateOffset(days=look_back-1)
        mask = (dataset['Date'] >= from_date) & (dataset['Date'] <= to_date)
        input_data = dataset.loc[mask]
        input_data = input_data.drop(columns='Date')
        input_data = scaler.transform(input_data)
        # calculate the number of days to predict
        days_to_predict = (req_to_date - to_date).days
        for i in range(days_to_predict):
            X = []
            for i in range(len(input_data) - look_back):
                a = input_data[i:(i + look_back), :]
                X.append(a)
            X = np.array(X)
            y_pred = model.predict(X)
            input_data = np.append(input_data, y_pred, axis=0)
            input_data = input_data[1:]
        return input_data

In [None]:
'''    X = []
    for i in range(len(dataset)-look_back):
        a = dataset[i:(i+look_back), :]
        X.append(a)
    return np.array(X) '''

In [None]:
d = pd.read_csv(path_to_data)
print(d.columns)

In [None]:
print(d['Date'])