# FT-Transformer の実装

In [26]:
from typing import Any, Dict

import numpy as np
import rtdl
import scipy.special
import sklearn.datasets
import sklearn.metrics
import sklearn.model_selection
import sklearn.preprocessing
import torch
import torch.nn as nn
import torch.nn.functional as F
import zero
import pandas as pd
import numpy as np
import category_encoders as ce
from torch.autograd import Variable

In [27]:
df_train = pd.read_csv('../train.csv')
df_test = pd.read_csv('../test.csv')

In [28]:
df_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 617 entries, 0 to 616
Data columns (total 58 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Id      617 non-null    object 
 1   AB      617 non-null    float64
 2   AF      617 non-null    float64
 3   AH      617 non-null    float64
 4   AM      617 non-null    float64
 5   AR      617 non-null    float64
 6   AX      617 non-null    float64
 7   AY      617 non-null    float64
 8   AZ      617 non-null    float64
 9   BC      617 non-null    float64
 10  BD      617 non-null    float64
 11  BN      617 non-null    float64
 12  BP      617 non-null    float64
 13  BQ      557 non-null    float64
 14  BR      617 non-null    float64
 15  BZ      617 non-null    float64
 16  CB      615 non-null    float64
 17  CC      614 non-null    float64
 18  CD      617 non-null    float64
 19  CF      617 non-null    float64
 20  CH      617 non-null    float64
 21  CL      617 non-null    float64
 22  CR

In [29]:
device = torch.device("cpu")
seed = 112
zero.improve_reproducibility(seed=seed)

112

In [30]:
dataset = df_train.fillna(0)
task_type = "binclass"
catfeatures = ["EJ"]
numfeatures = dataset.drop(["Id","Class"]+catfeatures, axis=1).columns.tolist()
X_all = dataset[numfeatures].astype("double")
y_all = dataset["Class"].astype("int64")
y_all = sklearn.preprocessing.LabelEncoder().fit_transform(y_all).astype("int64")

X = {}
y = {}
X["train"], X["test"], y["train"], y["test"] = sklearn.model_selection.train_test_split(
    dataset.drop(["Id", "Class"], axis=1), dataset["Class"], train_size=0.8, random_state=seed, stratify=dataset["Class"]
)
X["train"], X["val"], y["train"], y["val"] = sklearn.model_selection.train_test_split(
    X["train"], y["train"], train_size=0.8, random_state=seed, stratify=y["train"]
)
n_features = dataset.drop(["Class"], axis=1).shape[1]

#Calculate cardinalities as number of unique category values for each categorical features
cardinalities = [df_train.iloc[:,0].nunique(), df_train.iloc[:,1].nunique(), df_train.iloc[:,2].nunique(), df_train.iloc[:,3].nunique()]

In [31]:
# not the best way to preprocess features, but for the demonstration purpose
def PreprocessNumFeatures(X, y, numfeatures):
    "normalize features"
    preprocess = sklearn.preprocessing.StandardScaler().fit(X["train"][numfeatures])
    X = {
        k: torch.tensor(preprocess.fit_transform(v[numfeatures]))
        for k, v in X.items()
    }
    y = {k: torch.tensor(v.tolist()) for k, v in y.items()}
    y_mean = y["train"].float().mean().item()
    y_std = y["train"].float().std().item()
    y = {k: (v - y_mean) / y_std for k, v in y.items()}
    y = {k: v.float() for k, v in y.items()}
    return X, y, y_mean, y_std, preprocess

def PreprocessCatFeatures(X, catfeatures):
    "Convert categorical(string format) to numerical category"
    preprocess = sklearn.preprocessing.OrdinalEncoder().fit(X["train"][catfeatures])
    X = {
        k: torch.tensor(preprocess.fit_transform(v[catfeatures])).to(torch.int64)
        for k, v in X.items()
    }
    return X, preprocess

X_num, y,y_mean, y_std, scale = PreprocessNumFeatures(X, y, numfeatures)
X_cat, ordinal = PreprocessCatFeatures(X, catfeatures)

In [32]:
def preprocess_test(df, scale, ordinal, numfeatures, catfeatures):
    X_cat = torch.from_numpy(ordinal.fit_transform(df[catfeatures]))
    X_num = torch.from_numpy(scale.fit_transform(df[numfeatures]))

    return X_cat,X_num

test_cat, test_num = preprocess_test(df_test.drop(["Id"], axis=1),scale, ordinal, numfeatures, catfeatures)

In [33]:
model = rtdl.FTTransformer.make_default(
    n_num_features=X_num["train"].shape[1],
    cat_cardinalities=cardinalities,
    last_layer_query_idx=[-1],  # it makes the model faster and does NOT affect its output
    d_out=1,
)

In [34]:
model.to(device)
optimizer = (
    model.make_default_optimizer()
    if isinstance(model, rtdl.FTTransformer)
    else torch.optim.AdamW(model.parameters())
)
loss_fn = F.cross_entropy

In [35]:
def apply_model(x_num, x_cat=None):
    if isinstance(model, rtdl.FTTransformer):
        return model(x_num.float(), x_cat)
    elif isinstance(model, (rtdl.MLP, rtdl.ResNet)):
        assert x_cat is None
        return model(x_num)
    else:
        raise NotImplementedError(
            f'Looks like you are using a custom model: {type(model)}.'
            ' Then you have to implement this branch first.'
        )

In [36]:
@torch.no_grad()
def evaluate(part):
    model.eval()
    prediction = []
    for batch in zero.iter_batches(torch.cat((X_num[part],X_cat[part]), 1) ,1024):
        prediction.append(apply_model(batch[:,:55], batch[:, -1:].to(torch.int64)))
    prediction = torch.cat(prediction).squeeze(1).cpu().numpy()
    target = y[part].cpu().numpy()
    prediction = np.round(scipy.special.expit(prediction))

    # Reverse the normalization of the target labels
    target = (target * y_std) + y_mean
    target = np.round(target)

    score = sklearn.metrics.accuracy_score(target, prediction)
    return score


batch_size = 64
train_loader = zero.data.IndexLoader(len(X["train"]), batch_size, device=device)

progress = zero.ProgressTracker(patience=100)

print(f'Test score before training: {evaluate("test"):.4f}')

Test score before training: 0.1774


In [44]:
n_epochs = 50 # can increase the epoch size
report_frequency = len(X['train']) // batch_size // 5
for epoch in range(1, n_epochs + 1):
    for iteration, batch_idx in enumerate(train_loader):
        model.train()
        optimizer.zero_grad()
        x_num_batch = X_num['train'][batch_idx]
        x_cat_batch = X_cat['train'][batch_idx].to(torch.int64)
        y_batch = y['train'][batch_idx]
        loss = loss_fn(apply_model(x_num_batch, x_cat_batch).squeeze(1), y_batch)
        loss.backward()
        optimizer.step()
        if iteration % report_frequency == 0:
            print(f'(epoch) {epoch} (batch) {iteration} (loss) {loss.item():.4f}')

    val_score = evaluate('val')
    test_score = evaluate('test')
    print(f'Epoch {epoch:03d} | Validation score: {val_score:.4f} | Test score: {test_score:.4f}', end='')
    progress.update((-1 if task_type == 'regression' else 1) * val_score)
    if progress.success:
        print(' <<< BEST VALIDATION EPOCH', end='')
    print()
    if progress.fail:
        break

(epoch) 1 (batch) 0 (loss) -141.4786
(epoch) 1 (batch) 1 (loss) -74.6255
(epoch) 1 (batch) 2 (loss) -121.8938
(epoch) 1 (batch) 3 (loss) -122.0004
(epoch) 1 (batch) 4 (loss) -71.0981
(epoch) 1 (batch) 5 (loss) -115.0926
(epoch) 1 (batch) 6 (loss) 1.5214
Epoch 001 | Validation score: 0.9192 | Test score: 0.9032
(epoch) 2 (batch) 0 (loss) -147.3284
(epoch) 2 (batch) 1 (loss) -77.9790
(epoch) 2 (batch) 2 (loss) -119.1450
(epoch) 2 (batch) 3 (loss) -119.7935
(epoch) 2 (batch) 4 (loss) -74.1565
(epoch) 2 (batch) 5 (loss) -123.9295
(epoch) 2 (batch) 6 (loss) -0.5170
Epoch 002 | Validation score: 0.9192 | Test score: 0.9032
(epoch) 3 (batch) 0 (loss) -143.4414
(epoch) 3 (batch) 1 (loss) -89.9877
(epoch) 3 (batch) 2 (loss) -123.9265
(epoch) 3 (batch) 3 (loss) -116.0043
(epoch) 3 (batch) 4 (loss) -65.7333
(epoch) 3 (batch) 5 (loss) -117.1241
(epoch) 3 (batch) 6 (loss) 1.7708
Epoch 003 | Validation score: 0.9192 | Test score: 0.9032
(epoch) 4 (batch) 0 (loss) -144.4074
(epoch) 4 (batch) 1 (loss)

In [38]:
torch.save(model, 'model_params.pt')

In [39]:
USE_GPU = True

if USE_GPU and torch.cuda.is_available():
    print('using device: cuda')
else:
    print('using device: cpu')

using device: cpu


In [40]:
model = torch.load('model_params.pt')

In [41]:
if USE_GPU and torch.cuda.is_available():
    test_num = test_num.float().cuda()
    test_cat = test_cat.to(torch.int64).cuda()
    dtype1 = torch.cuda.FloatTensor
    dtype2 = torch.cuda.IntTensor
    model.cuda()
else:
    test_num = test_num.float()
    test_cat = test_cat.to(torch.int64)
    dtype1 = torch.FloatTensor
    dtype2 = torch.IntTensor  # Change this line

test_num = Variable(test_num).type(dtype1)
test_cat = Variable(test_cat).type(dtype2)

In [42]:
model.eval()
with torch.no_grad():
    predict = model(test_num, test_cat)

In [43]:
Submission = pd.DataFrame()
Submission["Id"] = df_test["Id"]
Submission["Class"] = (predict.cpu().detach().numpy() * y_std) + y_mean
Submission.to_csv('submission.csv', index=False)