In [1]:
import pandas as pd
import numpy as np
import tensorflow
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

In [2]:
'''data columns are: Index(['Day', 'Month', 'Year', 'States/UTs', 'Rice', 'Wheat', 'Atta (Wheat)',
       'Gram Dal', 'Tur/Arhar Dal', 'Urad Dal', 'Moong Dal', 'Masoor Dal',
       'Sugar', 'Milk @', 'Groundnut Oil (Packed)', 'Mustard Oil (Packed)',
       'Vanaspati (Packed)', 'Soya Oil (Packed)', 'Sunflower Oil (Packed)',
       'Palm Oil (Packed)', 'Gur', 'Tea Loose', 'Salt Pack (Iodised)',
       'Potato', 'Onion', 'Tomato'],
      dtype='object')'''

"data columns are: Index(['Day', 'Month', 'Year', 'States/UTs', 'Rice', 'Wheat', 'Atta (Wheat)',\n       'Gram Dal', 'Tur/Arhar Dal', 'Urad Dal', 'Moong Dal', 'Masoor Dal',\n       'Sugar', 'Milk @', 'Groundnut Oil (Packed)', 'Mustard Oil (Packed)',\n       'Vanaspati (Packed)', 'Soya Oil (Packed)', 'Sunflower Oil (Packed)',\n       'Palm Oil (Packed)', 'Gur', 'Tea Loose', 'Salt Pack (Iodised)',\n       'Potato', 'Onion', 'Tomato'],\n      dtype='object')"

In [3]:
# Load the data from the CSV file
df = pd.read_csv(r"data\cleansing\filled\data.csv")

In [4]:
print(df.shape)

(11666, 9)


In [5]:
df=df[df['Year']>2022]
print(df.shape)

(11666, 9)


In [6]:
# Convert date columns to a datetime object
df['Date'] = pd.to_datetime(df[['Day', 'Month', 'Year']])
df.set_index('Date', inplace=True)

In [7]:
# Select the crops for prediction
crops = ["Rice", "Wheat", "Atta (Wheat)", "Gram Dal", "Tur/Arhar Dal"]

In [8]:
print(len(crops))

5


In [9]:
# Create a dictionary to store the last known data for each state
last_known_data = {}
for state in df['States/UTs'].unique():
    last_known_data[state] = df[df['States/UTs'] == state].iloc[-1][crops].values

In [10]:
print(df)

            Day  Month  Year     States/UTs   Rice  Wheat  Atta (Wheat)  \
Date                                                                      
2023-01-01    1      1  2023          Assam  35.00  30.86         36.17   
2023-01-01    1      1  2023          Bihar  36.39  29.36         35.57   
2023-01-01    1      1  2023   Chhattisgarh  31.60  30.60         35.20   
2023-01-01    1      1  2023          Delhi  34.00  28.00         32.00   
2023-01-01    1      1  2023        Gujarat  38.75  34.00         35.25   
...         ...    ...   ...            ...    ...    ...           ...   
2023-12-31   31     12  2023      Rajasthan  36.50  28.75         32.25   
2023-12-31   31     12  2023     Tamil Nadu  60.00  45.05         53.83   
2023-12-31   31     12  2023      Telangana  49.00  38.00         49.33   
2023-12-31   31     12  2023  Uttar Pradesh  37.53  26.45         33.02   
2023-12-31   31     12  2023    West Bengal  41.54  29.67         37.46   

            Gram Dal  Tu

In [11]:
# Prepare data for LSTM
data = df[crops].values
print(data.shape)
data

(11666, 5)


array([[ 35.  ,  30.86,  36.17,  69.5 , 108.5 ],
       [ 36.39,  29.36,  35.57,  67.57, 109.32],
       [ 31.6 ,  30.6 ,  35.2 ,  72.4 , 102.  ],
       ...,
       [ 49.  ,  38.  ,  49.33,  79.33, 164.  ],
       [ 37.53,  26.45,  33.02,  79.6 , 151.25],
       [ 41.54,  29.67,  37.46,  80.23, 150.69]])

In [12]:
# Scale the data to be between 0 and 1
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data)
print(scaled_data)

[[0.25139814 0.38987742 0.28434109 0.21340698 0.15307858]
 [0.28841545 0.33056544 0.26573643 0.16495104 0.16237669]
 [0.1608522  0.37959668 0.25426357 0.28621642 0.07937408]
 ...
 [0.62423435 0.67220245 0.6924031  0.46020587 0.78240163]
 [0.31877497 0.2155002  0.18666667 0.46698468 0.63782742]
 [0.42556591 0.34282325 0.32434109 0.48280191 0.63147749]]


In [13]:
# Split the data into training and testing sets
train_data, test_data = train_test_split(scaled_data, test_size=0.2)
#print(train_data)

In [14]:
# Create the function to create the dataset
def create_dataset(dataset, look_back=1):
    X, Y = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), :]
        X.append(a)
        Y.append(dataset[i + look_back, :])
    return np.array(X), np.array(Y)

In [15]:
# Set look_back period (number of previous days to consider)
look_back = 7
X_train, Y_train = create_dataset(train_data, look_back)
X_test, Y_test = create_dataset(test_data, look_back)

In [16]:
X_train

array([[[0.58881491, 0.60616845, 0.67193798, 0.42681396, 0.74464225],
        [0.42450067, 0.30486358, 0.35224806, 0.32111474, 0.42192992],
        [0.38455393, 0.25029656, 0.1448062 , 0.39342204, 0.54802132],
        ...,
        [0.44207723, 0.4132068 , 0.30511628, 0.57644991, 0.57149337],
        [0.43169108, 0.41122974, 0.28744186, 0.60030128, 0.65665041],
        [0.27723036, 0.23013049, 0.16682171, 0.29600803, 0.47998639]],

       [[0.42450067, 0.30486358, 0.35224806, 0.32111474, 0.42192992],
        [0.38455393, 0.25029656, 0.1448062 , 0.39342204, 0.54802132],
        [0.29800266, 0.69711348, 0.34511628, 0.41426061, 0.44381449],
        ...,
        [0.43169108, 0.41122974, 0.28744186, 0.60030128, 0.65665041],
        [0.27723036, 0.23013049, 0.16682171, 0.29600803, 0.47998639],
        [0.43781625, 0.56662713, 0.22728682, 0.47702737, 0.23812224]],

       [[0.38455393, 0.25029656, 0.1448062 , 0.39342204, 0.54802132],
        [0.29800266, 0.69711348, 0.34511628, 0.41426061, 0.4

In [17]:
# Reshape input to be [samples, time steps, features]
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], X_train.shape[2]))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], X_test.shape[2]))

In [18]:
# Create and fit the LSTM network
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(LSTM(units=50))
model.add(Dense(X_train.shape[2]))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(X_train, Y_train, epochs=100, batch_size=1, verbose=2)


Epoch 1/100


  super().__init__(**kwargs)


9324/9324 - 24s - 3ms/step - loss: 0.0452
Epoch 2/100
9324/9324 - 23s - 2ms/step - loss: 0.0442
Epoch 3/100
9324/9324 - 21s - 2ms/step - loss: 0.0441
Epoch 4/100
9324/9324 - 22s - 2ms/step - loss: 0.0440
Epoch 5/100
9324/9324 - 21s - 2ms/step - loss: 0.0440
Epoch 6/100
9324/9324 - 21s - 2ms/step - loss: 0.0439
Epoch 7/100
9324/9324 - 21s - 2ms/step - loss: 0.0439
Epoch 8/100
9324/9324 - 20s - 2ms/step - loss: 0.0439
Epoch 9/100
9324/9324 - 20s - 2ms/step - loss: 0.0439
Epoch 10/100
9324/9324 - 21s - 2ms/step - loss: 0.0439
Epoch 11/100
9324/9324 - 20s - 2ms/step - loss: 0.0439
Epoch 12/100
9324/9324 - 20s - 2ms/step - loss: 0.0439
Epoch 13/100
9324/9324 - 19s - 2ms/step - loss: 0.0438
Epoch 14/100
9324/9324 - 19s - 2ms/step - loss: 0.0439
Epoch 15/100
9324/9324 - 19s - 2ms/step - loss: 0.0438
Epoch 16/100
9324/9324 - 18s - 2ms/step - loss: 0.0438
Epoch 17/100
9324/9324 - 18s - 2ms/step - loss: 0.0439
Epoch 18/100
9324/9324 - 18s - 2ms/step - loss: 0.0438
Epoch 19/100
9324/9324 - 18s - 

<keras.src.callbacks.history.History at 0x1d3de9672c0>

In [19]:
# save the model
model.save('model03.keras')
# load the model


In [20]:
# Save the scaler
import joblib
joblib.dump(scaler, 'scaler.pkl')

['scaler.pkl']

In [21]:
# Scale the data to be between 0 and 1
#scaler = MinMaxScaler(feature_range=(0, 1))
last_week_data = df[crops].iloc[-7:].values 
scaled_last_week = scaler.transform(last_week_data) 
reshaped_last_week = np.reshape(scaled_last_week, (1, look_back, len(crops))) 

In [22]:
# Predict
prediction_scaled = model.predict(reshaped_last_week)
prediction = scaler.inverse_transform(prediction_scaled)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 335ms/step


In [23]:
# Create DataFrame for 6/9/2024 predictions
predictions_6_9_2024 = pd.DataFrame(data=prediction, columns=crops)

In [24]:
# Predict for all states using the last known data
for state, state_data in last_known_data.items():
    scaled_state_data = scaler.transform(state_data.reshape(1,-1))
    reshaped_state_data = np.reshape(scaled_state_data, (1, 1, len(crops)))

    state_prediction_scaled = model.predict(reshaped_state_data)
    state_prediction = scaler.inverse_transform(state_prediction_scaled)

    predictions_6_9_2024.loc[state] = state_prediction[0]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms

In [25]:
# Display the predictions
print(predictions_6_9_2024)

                       Rice      Wheat  Atta (Wheat)   Gram Dal  Tur/Arhar Dal
0                 43.316242  33.794727     38.736542  81.159355     147.276001
Assam             42.062222  33.524918     38.695454  79.585724     147.624252
Bihar             39.355324  30.807825     37.408260  74.996895     138.386642
Chhattisgarh      44.123730  35.298187     40.008839  80.695869     149.605957
Delhi             39.961868  31.390638     37.465149  76.736496     139.671646
Gujarat           44.092281  35.166843     39.906273  80.524040     149.580444
Haryana           40.555340  31.928391     37.571049  77.880318     145.217499
Himachal Pradesh  37.656456  29.351982     38.753254  63.584805     114.450760
Jharkhand         41.373669  32.997677     38.339325  79.224144     145.362122
Karnataka         44.463890  34.962662     40.084515  79.370956     145.493713
Madhya Pradesh    40.544147  31.952803     37.775608  78.087639     142.504440
Maharashtra       46.799461  37.252972     41.754463

# --------------------------------------------------

In [26]:
# Predict for all states using a rolling window of past data
predictions_by_state = {}  # Dictionary to store predictions for each state

In [27]:
for state in df['States/UTs'].unique():
    state_data = df[df['States/UTs'] == state][crops].iloc[-look_back:].values # Last 'look_back' days of data
    scaled_state_data = scaler.transform(state_data)
    reshaped_state_data = np.reshape(scaled_state_data, (1, look_back, len(crops)))

    state_prediction_scaled = model.predict(reshaped_state_data)
    state_prediction = scaler.inverse_transform(state_prediction_scaled)
    predictions_by_state[state] = state_prediction[0] # Store prediction for the state


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19

In [28]:
predictions_df = pd.DataFrame(predictions_by_state).T # Convert to DataFrame for better display
predictions_df.columns = crops # Add column names
print(predictions_df)

                       Rice      Wheat  Atta (Wheat)   Gram Dal  Tur/Arhar Dal
Assam             40.802727  31.754515     36.860775  78.237541     139.268845
Bihar             42.090591  32.865471     37.969440  79.331451     142.238113
Chhattisgarh      41.146519  32.057789     37.134720  78.456497     139.878357
Delhi             41.837280  32.568020     37.673134  79.275429     142.370178
Gujarat           40.562901  31.613672     36.700436  77.840805     138.122147
Haryana           41.559498  32.415249     37.532211  78.887413     141.020126
Himachal Pradesh  40.957966  32.005600     37.130123  77.608658     138.105820
Jharkhand         41.583580  32.412769     37.492733  78.849602     140.931076
Karnataka         47.216167  37.208366     42.127743  83.494698     155.841568
Madhya Pradesh    41.545918  32.399506     37.488213  78.741776     140.724915
Maharashtra       45.204346  35.502224     40.369030  82.593788     152.486023
Meghalaya         40.336132  31.389622     36.482998