In [11]:
import pandas as pd
import numpy as np
import tensorflow
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

In [None]:
'''data columns are: Index(['Day', 'Month', 'Year', 'States/UTs', 'Rice', 'Wheat', 'Atta (Wheat)',
       'Gram Dal', 'Tur/Arhar Dal', 'Urad Dal', 'Moong Dal', 'Masoor Dal',
       'Sugar', 'Milk @', 'Groundnut Oil (Packed)', 'Mustard Oil (Packed)',
       'Vanaspati (Packed)', 'Soya Oil (Packed)', 'Sunflower Oil (Packed)',
       'Palm Oil (Packed)', 'Gur', 'Tea Loose', 'Salt Pack (Iodised)',
       'Potato', 'Onion', 'Tomato'],
      dtype='object')'''

In [7]:
# Load the data from the CSV file
df = pd.read_csv(r"data\cleansing\filled\data.csv")

In [None]:
print(df.shape)

In [None]:
df = df
print(df.shape)

In [7]:
# Convert date columns to a datetime object
df['Date'] = pd.to_datetime(df[['Day', 'Month', 'Year']])
df.set_index('Date', inplace=True)

In [8]:
# Select the crops for prediction
crops = ["Rice", "Wheat", "Atta (Wheat)", "Gram Dal", "Tur/Arhar Dal", "Urad Dal", "Moong Dal", "Masoor Dal", "Sugar", "Milk @", "Groundnut Oil (Packed)", "Mustard Oil (Packed)", "Vanaspati (Packed)", "Soya Oil (Packed)", "Sunflower Oil (Packed)", "Palm Oil (Packed)", "Gur", "Tea Loose", "Salt Pack (Iodised)", "Potato", "Onion", "Tomato"]

In [None]:
print(len(crops))

In [10]:
# Create a dictionary to store the last known data for each state
last_known_data = {}
for state in df['States/UTs'].unique():
    last_known_data[state] = df[df['States/UTs'] == state].iloc[-1][crops].values

In [None]:
# Prepare data for LSTM
data = df[crops].values
print(data.shape)

In [12]:
# Scale the data to be between 0 and 1
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data)

In [12]:
# Split the data into training and testing sets
train_data, test_data = train_test_split(scaled_data, test_size=0.2)
#print(train_data)

In [13]:
# Create the function to create the dataset
def create_dataset(dataset, look_back=1):
    X, Y = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), :]
        X.append(a)
        Y.append(dataset[i + look_back, :])
    return np.array(X), np.array(Y)

In [14]:
# Set look_back period (number of previous days to consider)
look_back = 7
X_train, Y_train = create_dataset(train_data, look_back)
X_test, Y_test = create_dataset(test_data, look_back)

In [15]:
# Reshape input to be [samples, time steps, features]
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], X_train.shape[2]))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], X_test.shape[2]))

In [None]:
# Create and fit the LSTM network
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(LSTM(units=50))
model.add(Dense(X_train.shape[2]))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(X_train, Y_train, epochs=100, batch_size=1, verbose=2)


In [17]:
# save the model
model.save('model02.keras')

In [None]:
# Save the scaler
import joblib
joblib.dump(scaler, 'scaler.pkl')

In [18]:
# Scale the data to be between 0 and 1
#scaler = MinMaxScaler(feature_range=(0, 1))
last_week_data = df[crops].iloc[-7:].values 
scaled_last_week = scaler.transform(last_week_data) 
reshaped_last_week = np.reshape(scaled_last_week, (1, look_back, len(crops))) 

In [None]:
# Predict
prediction_scaled = model.predict(reshaped_last_week)
prediction = scaler.inverse_transform(prediction_scaled)

In [20]:
# Create DataFrame for 6/9/2024 predictions
predictions_6_9_2024 = pd.DataFrame(data=prediction, columns=crops)

In [None]:
# Predict for all states using the last known data
for state, state_data in last_known_data.items():
    scaled_state_data = scaler.transform(state_data.reshape(1,-1))
    reshaped_state_data = np.reshape(scaled_state_data, (1, 1, len(crops)))

    state_prediction_scaled = model.predict(reshaped_state_data)
    state_prediction = scaler.inverse_transform(state_prediction_scaled)

    predictions_6_9_2024.loc[state] = state_prediction[0]

In [None]:
# Display the predictions
print(predictions_6_9_2024)

# --------------------------------------------------

In [None]:
# Predict for all states using a rolling window of past data
predictions_by_state = {}  # Dictionary to store predictions for each state

In [None]:
for state in df['States/UTs'].unique():
    state_data = df[df['States/UTs'] == state][crops].iloc[-look_back:].values # Last 'look_back' days of data
    scaled_state_data = scaler.transform(state_data)
    reshaped_state_data = np.reshape(scaled_state_data, (1, look_back, len(crops)))

    state_prediction_scaled = model.predict(reshaped_state_data)
    state_prediction = scaler.inverse_transform(state_prediction_scaled)
    predictions_by_state[state] = state_prediction[0] # Store prediction for the state


In [None]:
predictions_df = pd.DataFrame(predictions_by_state).T # Convert to DataFrame for better display
predictions_df.columns = crops # Add column names
print(predictions_df)