<a href="https://colab.research.google.com/github/ZerXXX0/sales-prediction/blob/main/FTTransformer_MLQ.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

In [2]:
train_df = pd.read_csv('https://raw.githubusercontent.com/ZerXXX0/sales-prediction/refs/heads/main/dataset/train_final.csv')
test_df = pd.read_csv('https://raw.githubusercontent.com/ZerXXX0/sales-prediction/refs/heads/main/dataset/test_final.csv')

In [3]:
# === Preprocessing ===
drop_cols = ['Unnamed: 0', 'TransactionID', 'MemberID']
train_df_clean = train_df.drop(columns=drop_cols)

X = train_df_clean.drop(columns=['next_buy'])
y = train_df_clean['next_buy']

imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_imputed)

In [4]:
X_train, X_val, y_train, y_val = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val.values, dtype=torch.long)

In [5]:
train_loader = DataLoader(TensorDataset(X_train_tensor, y_train_tensor), batch_size=512, shuffle=True)
val_loader = DataLoader(TensorDataset(X_val_tensor, y_val_tensor), batch_size=512)

In [6]:
# === FT-Transformer Model ===
class FTTransformer(nn.Module):
    def __init__(self, input_dim, dim=64, depth=3, heads=4, dropout=0.1):
        super().__init__()
        self.embedding = nn.Linear(input_dim, dim)
        encoder_layer = nn.TransformerEncoderLayer(d_model=dim, nhead=heads, dropout=dropout, batch_first=True)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=depth)
        self.cls_token = nn.Parameter(torch.randn(1, 1, dim))
        self.head = nn.Sequential(
            nn.LayerNorm(dim),
            nn.Linear(dim, 2)
        )

    def forward(self, x):
        B = x.shape[0]
        x = self.embedding(x).unsqueeze(1)
        cls_tokens = self.cls_token.expand(B, -1, -1)
        x = torch.cat((cls_tokens, x), dim=1)
        x = self.transformer(x)
        return self.head(x[:, 0])


In [7]:
# === Training Loop ===
model = FTTransformer(input_dim=X_train.shape[1])
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)

In [8]:
def train_model(model, train_loader, val_loader, epochs=50, save_path="best_ft_transformer.pth"):
    best_acc = 0.0

    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for xb, yb in train_loader:
            optimizer.zero_grad()
            preds = model(xb)
            loss = criterion(preds, yb)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        model.eval()
        correct = total = 0
        with torch.no_grad():
            for xb, yb in val_loader:
                preds = model(xb)
                predicted = torch.argmax(preds, dim=1)
                correct += (predicted == yb).sum().item()
                total += yb.size(0)
        acc = correct / total * 100

        # Save the best model
        if acc > best_acc:
            best_acc = acc
            torch.save(model.state_dict(), save_path)
            print(f"✅ Saved new best model (epoch {epoch+1}, val acc: {acc:.2f}%)")

        print(f"Epoch {epoch+1}: Train Loss={total_loss:.4f}, Val Acc={acc:.2f}%")

In [9]:
train_model(model, train_loader, val_loader, epochs=10)

✅ Saved new best model (epoch 1, val acc: 83.62%)
Epoch 1: Train Loss=75.3570, Val Acc=83.62%
✅ Saved new best model (epoch 2, val acc: 83.90%)
Epoch 2: Train Loss=70.6215, Val Acc=83.90%
Epoch 3: Train Loss=69.8157, Val Acc=83.73%
Epoch 4: Train Loss=69.3489, Val Acc=83.37%
Epoch 5: Train Loss=69.2305, Val Acc=83.75%
Epoch 6: Train Loss=68.8645, Val Acc=83.88%
✅ Saved new best model (epoch 7, val acc: 84.07%)
Epoch 7: Train Loss=68.8890, Val Acc=84.07%
Epoch 8: Train Loss=68.6198, Val Acc=83.45%
Epoch 9: Train Loss=68.5532, Val Acc=83.96%
Epoch 10: Train Loss=68.5617, Val Acc=83.99%


In [10]:
# Re-create the model
model = FTTransformer(input_dim=X_train.shape[1])
model.load_state_dict(torch.load("best_ft_transformer.pth"))
model.eval()

FTTransformer(
  (embedding): Linear(in_features=8, out_features=64, bias=True)
  (transformer): TransformerEncoder(
    (layers): ModuleList(
      (0-2): 3 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
        )
        (linear1): Linear(in_features=64, out_features=2048, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=2048, out_features=64, bias=True)
        (norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
    )
  )
  (head): Sequential(
    (0): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
    (1): Linear(in_features=64, out_features=2, bias=True)
  )
)

In [11]:
# === Preprocess test set ===
member_ids = test_df["MemberID"]  # Save MemberID before dropping

drop_cols = ['Unnamed: 0', 'TransactionID', 'MemberID']
test_df_clean = test_df.drop(columns=drop_cols)

# Use the same preprocessing steps
imputer = SimpleImputer(strategy='mean')
X_test_imputed = imputer.fit_transform(test_df_clean)

scaler = StandardScaler()
X_test_scaled = scaler.fit_transform(X_test_imputed)

X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)

In [12]:
# === Predict ===
with torch.no_grad():
    preds = model(X_test_tensor)
    predicted_classes = torch.argmax(preds, dim=1).numpy()

# === Export predictions with MemberID ===
output = pd.DataFrame({
    "MemberID": member_ids,
    "next_buy_predicted": predicted_classes
})

output.to_csv("ft_transformer_predictions.csv", index=False)
print("✅ Predictions saved to ft_transformer_predictions.csv")

✅ Predictions saved to ft_transformer_predictions.csv


In [14]:
submission_df = pd.read_csv("/content/ft_transformer_predictions.csv")

In [16]:
# prompt: rename a column

submission_df = submission_df.rename(columns={"next_buy_predicted": "next_buy"})
submission_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21098 entries, 0 to 21097
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   MemberID  21098 non-null  object
 1   next_buy  21098 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 329.8+ KB


In [17]:
submit_df = pd.read_csv("https://raw.githubusercontent.com/ZerXXX0/sales-prediction/refs/heads/main/dataset/sample_submission.csv")
# First, create a new dataframe from submission_df with duplicate MemberIDs removed.
# We keep the 'last' entry for each member.
submission_df_unique = submission_df.drop_duplicates(subset=['MemberID'], keep='last')

# Now, create the lookup map from this de-duplicated dataframe.
# This will succeed because the 'MemberID' index is now unique.
next_buy_lookup = submission_df_unique.set_index('MemberID')['next_buy']

# Proceed with the map operation as before. This will now work correctly.
submit_df['next_buy'] = submit_df['MemberID'].map(next_buy_lookup)

In [18]:
submit_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6381 entries, 0 to 6380
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   MemberID  6381 non-null   object
 1   next_buy  6381 non-null   int64 
dtypes: int64(1), object(1)
memory usage: 99.8+ KB


In [19]:
submit_df.to_csv('submission_FTTransformer.csv', index=False)