In [None]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

In [7]:
df = pd.read_csv("A:/spark_data/train.csv")

In [9]:
sequence_length = 14 


def create_sequences(data, seq_len):
    xs, ys = [], []
    for i in range(len(data) - seq_len):
        x = data[i:i+seq_len]
        y = data[i+seq_len]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

all_X, all_y = [], []

for (store, item), group in df.groupby(['store', 'item']):
    group = group.sort_values('date')
    
    sales = group['sales'].values.reshape(-1, 1) 

    max_val = sales.max()
    if max_val == 0:
        continue  
    sales = sales / max_val  

    X_seq, y_seq = create_sequences(sales, sequence_length)
    all_X.append(X_seq)
    all_y.append(y_seq)

X_all = np.vstack(all_X)  # shape: (total_samples, seq_len, 1)
y_all = np.concatenate(all_y)  # shape: (total_samples,)

x_train_tensor = torch.tensor(X_all, dtype=torch.float32)
y_train_tensor = torch.tensor(y_all, dtype=torch.float32).view(-1, 1)


In [10]:
class BiLSTM_CNN_Model(nn.Module):
    def __init__(self, input_size, sequence_length, hidden_size=64, lstm_layers=2, kernel_size=3, cnn_out_channels=32):
        super(BiLSTM_CNN_Model, self).__init__()
        self.sequence_length = sequence_length

        self.bilstm = nn.LSTM(input_size=input_size,
                              hidden_size=hidden_size,
                              num_layers=lstm_layers,
                              batch_first=True,
                              bidirectional=True)

        self.cnn = nn.Conv1d(in_channels=2*hidden_size,
                             out_channels=cnn_out_channels,
                             kernel_size=kernel_size)

        self.relu = nn.ReLU()
        cnn_output_seq_len = sequence_length - kernel_size + 1
        self.flatten = nn.Flatten()
        self.fc = nn.Linear(cnn_out_channels * cnn_output_seq_len, 1)

    def forward(self, x):
        lstm_out, _ = self.bilstm(x)  # [B, T, F]
        lstm_out = lstm_out.permute(0, 2, 1)  # [B, F, T]
        cnn_out = self.relu(self.cnn(lstm_out))  # [B, C, T']
        out = self.flatten(cnn_out)
        return self.fc(out)


In [None]:
class SalesSequenceDataset(Dataset):
    def __init__(self, df, sequence_length=14):
        self.sequence_length = sequence_length
        self.samples = []
        self.max_vals = []

        for (store, item), group in df.groupby(['store', 'item']):
            group = group.sort_values('date')
            sales = group['sales'].values.reshape(-1, 1)

            if len(sales) < sequence_length + 1:
                continue

            max_val = sales.max()
            if max_val == 0:
                continue

            sales = sales / max_val  # normalize

            for i in range(len(sales) - sequence_length):
                x_seq = sales[i:i + sequence_length]
                y = sales[i + sequence_length]
                self.samples.append((x_seq, y))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        x, y = self.samples[idx]
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)


In [None]:
dataset = SalesSequenceDataset(df, sequence_length=14)
dataloader = DataLoader(dataset, batch_size=64, shuffle=True)

In [14]:
model = BiLSTM_CNN_Model(input_size=1, sequence_length=14)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(10):
    model.train()
    total_loss = 0
    for x_batch, y_batch in dataloader:
        optimizer.zero_grad()
        output = model(x_batch)
        loss = criterion(output, y_batch.view(-1, 1))
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")


Epoch 1, Loss: 110.6767
Epoch 2, Loss: 100.5794
Epoch 3, Loss: 99.5177
Epoch 4, Loss: 99.1150
Epoch 5, Loss: 98.8482
Epoch 6, Loss: 98.6670
Epoch 7, Loss: 98.5471
Epoch 8, Loss: 98.3768
Epoch 9, Loss: 98.2864
Epoch 10, Loss: 98.1845


In [15]:
torch.save(model.state_dict(), "bilstm_cnn_model.pth")

In [16]:
model = BiLSTM_CNN_Model(input_size=1, sequence_length=14)

model.load_state_dict(torch.load("bilstm_cnn_model.pth"))

model.eval()


BiLSTM_CNN_Model(
  (bilstm): LSTM(1, 64, num_layers=2, batch_first=True, bidirectional=True)
  (cnn): Conv1d(128, 32, kernel_size=(3,), stride=(1,))
  (relu): ReLU()
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc): Linear(in_features=384, out_features=1, bias=True)
)

In [17]:
torch.save({
    'model_state_dict': model.state_dict(),
    'input_size': 1,
    'sequence_length': 14
}, "bilstm_cnn_full.pth")
