In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from torch.utils.data import Dataset, DataLoader

class CommodityPriceDataset(Dataset):
    def __init__(self, data, seq_length):
        self.data = data
        self.seq_length = seq_length
        self.scaler = MinMaxScaler()
        self.price = self.scaler.fit_transform(data['price'].values.reshape(-1, 1))

    def __len__(self):
        return len(self.data) - self.seq_length

    def __getitem__(self, idx):
        features = self.data[idx:idx + self.seq_length].values  # Get sequence of features
        target = self.price[idx + self.seq_length]             # Get target price
        return features, target

# Load and process data
df = pd.read_csv('2023allchennai2.csv')
df = df.astype({'commodity_group': 'category'})
    # Change column type to category for column: 'commodity_id'
df = df.astype({'commodity_id': 'category'})
    # Change column type to category for column: 'commodity'
df = df.astype({'commodity': 'category'})
df['date'] = pd.to_datetime(df['date']) 
df['weekday'] = df['date'].dt.weekday
df['month'] = df['date'].dt.month
df['day'] = df['date'].dt.day

le = LabelEncoder()
df['commodity_encoded'] = le.fit_transform(df['commodity'])

# Hyperparameters
seq_length = 7  # Use 7 days of data to predict the next day's price
batch_size = 32

dataset = CommodityPriceDataset(df[['weekday', 'month', 'day', 'commodity_encoded', 'price']], seq_length)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [2]:
import torch
import torch.nn as nn

class LSTMPricePredictor(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMPricePredictor, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        
        out, _ = self.lstm(x, (h0, c0))  
        out = self.fc(out[:, -1, :])  # Take output from last time step
        return out

# Model hyperparameters
input_size = 5   # Number of features (weekday, month, day, commodity_encoded, price)
hidden_size = 128 
num_layers = 2
output_size = 1  # Predicting a single price value

# Instantiate the model
model = LSTMPricePredictor(input_size, hidden_size, num_layers, output_size)

In [3]:
import torch.optim as optim
from tqdm import tqdm

# Training parameters
num_epochs = 20
learning_rate = 0.001
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 
model.to(device)
criterion = nn.MSELoss()  # Mean Squared Error Loss
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    loop = tqdm(dataloader, leave=True)
    for features, target in loop:
        features = features.float().to(device)
        target = target.float().to(device)

        outputs = model(features)
        loss = criterion(outputs, target)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        loop.set_description(f'Epoch [{epoch+1}/{num_epochs}]')
        loop.set_postfix(loss=loss.item())

Epoch [1/20]: 100%|██████████| 245/245 [00:04<00:00, 51.10it/s, loss=0.0032]  
Epoch [2/20]: 100%|██████████| 245/245 [00:03<00:00, 65.65it/s, loss=0.000678]
Epoch [3/20]: 100%|██████████| 245/245 [00:04<00:00, 60.18it/s, loss=0.000484]
Epoch [4/20]: 100%|██████████| 245/245 [00:04<00:00, 60.46it/s, loss=0.000171]
Epoch [5/20]: 100%|██████████| 245/245 [00:03<00:00, 65.42it/s, loss=0.00083] 
Epoch [6/20]: 100%|██████████| 245/245 [00:03<00:00, 62.76it/s, loss=0.000521]
Epoch [7/20]: 100%|██████████| 245/245 [00:03<00:00, 63.31it/s, loss=0.000233]
Epoch [8/20]: 100%|██████████| 245/245 [00:04<00:00, 61.23it/s, loss=0.000169]
Epoch [9/20]: 100%|██████████| 245/245 [00:04<00:00, 60.84it/s, loss=0.00129] 
Epoch [10/20]: 100%|██████████| 245/245 [00:04<00:00, 59.32it/s, loss=0.000678]
Epoch [11/20]: 100%|██████████| 245/245 [00:04<00:00, 55.00it/s, loss=0.000302]
Epoch [12/20]: 100%|██████████| 245/245 [00:04<00:00, 57.37it/s, loss=0.000412]
Epoch [13/20]: 100%|██████████| 245/245 [00:03<00

In [4]:
def predict_price(commodity, future_date):
    future_date = pd.to_datetime(future_date)

    # Create features for the future date
    future_features = pd.DataFrame({
        'weekday': [future_date.weekday()],
        'month': [future_date.month],
        'day': [future_date.day],
        'commodity_encoded': [le.transform([commodity])[0]] 
    })

    # Normalize price from future features (keep price same as it is not used for prediction)
    future_features['price'] = 0 # dummy value
    future_features['price'] = dataset.scaler.transform(future_features['price'].values.reshape(-1, 1))

    # Prepare data for prediction (create a sequence)
    input_data = future_features[['weekday', 'month', 'day', 'commodity_encoded', 'price']].values
    input_data = input_data.reshape(1, 1, -1)  # (batch_size, seq_length, input_size)
    input_data = torch.tensor(input_data).float().to(device)

    # Make prediction
    model.eval()  # Set model to evaluation mode
    with torch.no_grad():
        predicted_price = model(input_data)

    # Inverse transform to get actual price
    predicted_price = dataset.scaler.inverse_transform(predicted_price.cpu().numpy())
    return predicted_price[0][0]

commodity_name = 'Rice'
date_to_predict = '2023-01-01'

predicted_price = predict_price(commodity_name, date_to_predict)
print(f"Predicted price of {commodity_name} on {date_to_predict}: {predicted_price:.2f}")

Predicted price of Rice on 2023-01-01: 62.43
