In [17]:
import torch
import torch.nn as nn
import pandas as pd

In [None]:
df = pd.read_csv("A:/spark_data/train.csv")

In [8]:
def get_last_sequence(df, store, item, sequence_length):
    group = df[(df['store'] == store) & (df['item'] == item)]
    group = group.sort_values('date')
    
    sales = group['sales'].values

    if len(sales) < sequence_length:
        raise ValueError("Not enough data for prediction")

    max_val = sales.max()
    if max_val == 0:
        return None  # no prediction for zero sales

    sales = sales / max_val  # normalize

    last_seq = sales[-sequence_length:]
    return torch.tensor(last_seq.reshape(1, sequence_length, 1), dtype=torch.float32), max_val


In [None]:
def predict_sales(model, df, store, item, sequence_length):
    try:
        x_tensor, max_val = get_last_sequence(df, store, item, sequence_length)
        model.eval()
        with torch.no_grad():
            pred = model(x_tensor).item()
        return pred * max_val  # reverse normalization
    except ValueError as e:
        print(f"{e} for ({store}, {item})")
        return None


In [10]:
class BiLSTM_CNN_Model(nn.Module):
    def __init__(self, input_size, sequence_length, hidden_size=64, lstm_layers=2, kernel_size=3, cnn_out_channels=32):
        super(BiLSTM_CNN_Model, self).__init__()
        self.sequence_length = sequence_length

        self.bilstm = nn.LSTM(input_size=input_size,
                              hidden_size=hidden_size,
                              num_layers=lstm_layers,
                              batch_first=True,
                              bidirectional=True)

        self.cnn = nn.Conv1d(in_channels=2*hidden_size,
                             out_channels=cnn_out_channels,
                             kernel_size=kernel_size)

        self.relu = nn.ReLU()
        cnn_output_seq_len = sequence_length - kernel_size + 1
        self.flatten = nn.Flatten()
        self.fc = nn.Linear(cnn_out_channels * cnn_output_seq_len, 1)

    def forward(self, x):
        lstm_out, _ = self.bilstm(x)  # [B, T, F]
        lstm_out = lstm_out.permute(0, 2, 1)  # [B, F, T]
        cnn_out = self.relu(self.cnn(lstm_out))  # [B, C, T']
        out = self.flatten(cnn_out)
        return self.fc(out)


In [39]:
# Recreate the exact model manually
model = BiLSTM_CNN_Model(input_size=1, sequence_length=14)
model.load_state_dict(torch.load('A:/spark/model/bilstm_cnn_model.pth'))
model.eval()

BiLSTM_CNN_Model(
  (bilstm): LSTM(1, 64, num_layers=2, batch_first=True, bidirectional=True)
  (cnn): Conv1d(128, 32, kernel_size=(3,), stride=(1,))
  (relu): ReLU()
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc): Linear(in_features=384, out_features=1, bias=True)
)

In [None]:
store = 2
item = 14
predicted_sales = predict_sales(store, item, model= model,  df= df,sequence_length= 14)

if predicted_sales is not None:
    print(f"Predicted sales for {store}-{item} tomorrow: {predicted_sales:.2f}")


Predicted sales for 2-14 tomorrow: 51.92
