In [35]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import Dataset, DataLoader, random_split
import matplotlib.pyplot as plt

In [39]:
import sys
sys.path.append('main/xLSTM/xlstm')

In [24]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [25]:
print(torch.cuda.is_available())
print(torch.__version__)

True
2.0.1+cu117


In [8]:
df = pd.read_csv("../datasets/dummy_data/customer_phone_contracts.csv")

In [26]:
customer_sequences = []
for customer_id, group in df.groupby("customer_id"):
    group = group.sort_values("contract_start_date")
    customer_sequences.append(group["item_id"].values)

In [27]:
le = LabelEncoder()
all_item_ids = np.concatenate(customer_sequences)
le.fit(all_item_ids)
encoded_sequences = [le.transform(seq) for seq in customer_sequences]

In [28]:
class PhoneSequenceDataset(Dataset):
    def __init__(self, sequences, seq_len):
        self.sequences = sequences
        self.seq_len = seq_len

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        seq = self.sequences[idx]
        input_seq = seq[:-1]
        target_seq = seq[1:]
        input_seq = np.pad(input_seq, (0, self.seq_len - len(input_seq)), 'constant', constant_values=0)
        target_seq = np.pad(target_seq, (0, self.seq_len - len(target_seq)), 'constant', constant_values=0)
        return torch.tensor(input_seq, dtype=torch.long), torch.tensor(target_seq, dtype=torch.long)

In [29]:
seq_len = 10
batch_size = 32
embedding_dim = 64
num_heads = 4
num_blocks = 7
context_length = seq_len
vocab_size = len(le.classes_)

In [30]:
dataset = PhoneSequenceDataset(encoded_sequences, seq_len)
train_size = int(0.8 * len(dataset))
val_size = int(0.1 * len(dataset))
test_size = len(dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [40]:
from xlstm import (
    xLSTMBlockStack,
    xLSTMBlockStackConfig,
    mLSTMBlockConfig,
    mLSTMLayerConfig,
    sLSTMBlockConfig,
    sLSTMLayerConfig,
    FeedForwardConfig,
)

In [41]:
cfg = xLSTMBlockStackConfig(
    mlstm_block=mLSTMBlockConfig(
        mlstm=mLSTMLayerConfig(
            conv1d_kernel_size=4, qkv_proj_blocksize=4, num_heads=num_heads
        )
    ),
    slstm_block=sLSTMBlockConfig(
        slstm=sLSTMLayerConfig(
            backend="cuda",
            num_heads=num_heads,
            conv1d_kernel_size=4,
            bias_init="powerlaw_blockdependent",
        ),
        feedforward=FeedForwardConfig(proj_factor=1.3, act_fn="gelu"),
    ),
    context_length=context_length,
    num_blocks=num_blocks,
    embedding_dim=embedding_dim,
    slstm_at=[1],
)

In [42]:
xlstm_stack = xLSTMBlockStack(cfg)
model = nn.Sequential(
    nn.Embedding(vocab_size, embedding_dim),
    xlstm_stack,
    nn.Linear(embedding_dim, vocab_size)
)

{'verbose': True, 'with_cuda': True, 'extra_ldflags': ['-LC:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.8\\lib', '-lcublas'], 'extra_cflags': ['-DSLSTM_HIDDEN_SIZE=64', '-DSLSTM_BATCH_SIZE=8', '-DSLSTM_NUM_HEADS=4', '-DSLSTM_NUM_STATES=4', '-DSLSTM_DTYPE_B=float', '-DSLSTM_DTYPE_R=__nv_bfloat16', '-DSLSTM_DTYPE_W=__nv_bfloat16', '-DSLSTM_DTYPE_G=__nv_bfloat16', '-DSLSTM_DTYPE_S=__nv_bfloat16', '-DSLSTM_DTYPE_A=float', '-DSLSTM_NUM_GATES=4', '-DSLSTM_SIMPLE_AGG=true', '-DSLSTM_GRADIENT_RECURRENT_CLIPVAL_VALID=false', '-DSLSTM_GRADIENT_RECURRENT_CLIPVAL=0.0', '-DSLSTM_FORWARD_CLIPVAL_VALID=false', '-DSLSTM_FORWARD_CLIPVAL=0.0', '-U__CUDA_NO_HALF_OPERATORS__', '-U__CUDA_NO_HALF_CONVERSIONS__', '-U__CUDA_NO_BFLOAT16_OPERATORS__', '-U__CUDA_NO_BFLOAT16_CONVERSIONS__', '-U__CUDA_NO_BFLOAT162_OPERATORS__', '-U__CUDA_NO_BFLOAT162_CONVERSIONS__'], 'extra_cuda_cflags': ['-Xptxas="-v"', '-gencode', 'arch=compute_80,code=compute_80', '-res-usage', '--use_fast_math', '-O3', '-Xptxas -O

Using C:\Users\agneg\AppData\Local\torch_extensions\torch_extensions\Cache\py310_cu117 as PyTorch extensions root...


FileNotFoundError: [Errno 2] No such file or directory: 'd:\\Miniconda\\lib\\site-packages\\xlstm\\blocks\\slstm\\src\\cuda\\slstm.cc'