In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from dataclasses import dataclass

!pip install wandb
import wandb

import numpy as np
from pathlib import Path


torch.backends.cuda.matmul.allow_tf32 = False
torch.backends.cudnn.allow_tf32 = False



In [3]:
datapath = "/content/drive/My Drive/datafiles/numerai/v4.1"
filters = [('era', '=', "0123")]
df = pd.read_parquet(datapath + "/era_123.parquet", engine='pyarrow', filters=filters).dropna(axis="columns")

features = [f for f in df.columns if f.startswith("feature_")]
targets  = [t for t in df.columns if t.startswith("target_")]
num_features = len(features)
TARGET = "target_nomi_v4_20"

print("Done!")


Done!


In [4]:
@dataclass
class ModelConfig:
    block_size: int = num_features            # num_features
    vocab_size: int = 5                       # 0..4
    n_layer: int = 6
    n_head: int = 8
    n_embed: int = 32
    dropout: float = 0.0


class Transformer(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.seq_embed = nn.Embedding(config.vocab_size, config.n_embed)
        self.pos_embed = nn.Embedding(config.block_size, config.n_embed)
        self.transformer = nn.TransformerEncoder(
            encoder_layer   = nn.TransformerEncoderLayer(
                                    d_model         = config.n_embed,
                                    nhead           = config.n_head ,
                                    dim_feedforward = config.n_embed * 4 ,
                                    dropout         = config.dropout ,
                                    activation      = "relu" ,
                                    batch_first     = True ,
                                    bias            = True      # Debería ser false, pero hay un bug en torch 2.2.2
                                ) ,
            num_layers      = config.n_layer
        )
        self.fc = nn.Linear(config.n_embed, config.vocab_size, bias=False)

        n_params = sum(p.numel() for p in self.parameters())
        print("number of parameters: %.2fk" % (n_params/1e3,))


    def forward(self, src, tgt=None):
        device = src.device
        b, t = src.size()          # (tamaño del batch, longitud de secuencia de src)

        pos = torch.arange(0, t, dtype=torch.long, device=device).unsqueeze(0)   # shape (1, t)

        src_embed = self.seq_embed(src)                 # shape (b, t, n_embed)
        pos_embed = self.pos_embed(pos)                 # shape (1, t, n_embed)
        x = self.transformer(src_embed + pos_embed)     # shape (b, t, n_embed)
        x = x[:, -1, :]                                 # shape (b, n_embed)
        logits = self.fc(x)                             # shape (b, vocab_size-1)

        return logits

In [5]:
# Generar datos de train y test
split = int(len(df) * 0.8)
X_train = torch.from_numpy(df.loc[df.index[:split], features].to_numpy() * 4.0).to(torch.long)
X_test  = torch.from_numpy(df.loc[df.index[split:], features].to_numpy() * 4.0).to(torch.long)
y_train = torch.from_numpy(df.loc[df.index[:split], TARGET].to_numpy() * 4.0).to(torch.long)
y_test  = torch.from_numpy(df.loc[df.index[split:], TARGET].to_numpy() * 4.0).to(torch.long)

num_rows_train = len(X_train)
num_rows_test  = len(X_test)
print(num_rows_train, num_rows_test)

3128 783


In [6]:
filemodel = '/content/drive/My Drive/Colab Notebooks/numerai_era_01.model'

device      = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
config      = ModelConfig()
model       = Transformer(config).to(device)
optimizer   = torch.optim.AdamW(model.parameters(), lr=1e-3)

print("Device: ", device)


# Load model from disk
model_loaded_from_disk = False
if Path(filemodel+'--').is_file():
    print("Modelo cargado de disco")
    model_loaded_from_disk = True
    saved_data = torch.load(filemodel, map_location=torch.device('cpu'))
    model.load_state_dict(saved_data['model_state_dict'])
    optimizer.load_state_dict(saved_data['optimizer_state_dict'])
    print(f"Train loss: {saved_data['train_loss']:.3f}, Test loss: {saved_data['test_loss']:.3f}")

number of parameters: 123.46k
Device:  cuda


In [7]:
# Init WandB logging
wandb.init(
    project = "numerai_era_00.py",
    config = {
        "num_features":             config.block_size,
        "era":                      123,
        "filemodel":                filemodel,
        "model_loaded_from_disk":   model_loaded_from_disk,
        "transformer/num_layers":   config.n_layer,
        "transformer/num_heads":    config.n_head,
        "transformer/dim":          config.n_embed,
        "transformer/dropout":      config.dropout,
    }
)

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [None]:
batch_size = 512        # Optimizado para la instancia T4 de Colab
best_test_loss = 1e9

for epoch in range(1000):
    model.train()
    optimizer.zero_grad(set_to_none=True)
    train_loss = []

    for i in range((num_rows_train+1) // batch_size):
        ini = i * batch_size
        fin = min(num_rows_train, (i+1)*batch_size)
        X = X_train[ini:fin].to(device)
        y = y_train[ini:fin].to(device)

        out = model(X)
        loss = F.cross_entropy(out, y)
        loss.backward()
        train_loss.append(loss.item())

    if epoch % 1 == 0:
        with torch.no_grad():
            #model.eval()      # <--- por lo que sea, esto provoca errores extraños al ejecutar 'model(X)'
            test_loss = []
            for i in range((num_rows_test+1) // batch_size):
                ini = i * batch_size
                fin = min(num_rows_test, (i+1)*batch_size)
                X = X_test[ini:fin].to(device)
                y = y_test[ini:fin].to(device)
                out = model(X)
                test_loss.append(F.cross_entropy(out, y).item())

            test_loss_mean = np.mean(test_loss)
            msg = f"Epoch {epoch:4d} - Train Loss: {np.mean(train_loss):.6f} - Test Loss: {test_loss_mean:.6f}"
            wandb.log({"train_loss": np.mean(train_loss), "test_loss": test_loss_mean})

            if test_loss_mean < best_test_loss:
                best_test_loss = test_loss_mean
                torch.save({
                    'epoch'                 : epoch,
                    'model_state_dict'      : model.state_dict(),
                    'optimizer_state_dict'  : optimizer.state_dict(),
                    'train_loss'            : np.mean(train_loss),
                    'test_loss'             : test_loss_mean,
                    }, filemodel)    # Almacenar los weights porque son los mejores hasta el momento
                msg += " [*]"

            print(msg)


    # Actualizar weights
    optimizer.step()


Epoch    0 - Train Loss: 1.484213 - Test Loss: 1.480178 [*]
Epoch    1 - Train Loss: 1.350970 - Test Loss: 1.342418 [*]
Epoch    2 - Train Loss: 1.312004 - Test Loss: 1.301006 [*]
Epoch    3 - Train Loss: 1.301045 - Test Loss: 1.288991 [*]
Epoch    4 - Train Loss: 1.299235 - Test Loss: 1.286936 [*]
Epoch    5 - Train Loss: 1.299493 - Test Loss: 1.287742
Epoch    6 - Train Loss: 1.299463 - Test Loss: 1.288876
Epoch    7 - Train Loss: 1.298968 - Test Loss: 1.289944
Epoch    8 - Train Loss: 1.298488 - Test Loss: 1.291223
Epoch    9 - Train Loss: 1.298375 - Test Loss: 1.292826
Epoch   10 - Train Loss: 1.298564 - Test Loss: 1.294527
Epoch   11 - Train Loss: 1.298661 - Test Loss: 1.295766
Epoch   12 - Train Loss: 1.298313 - Test Loss: 1.296129
Epoch   13 - Train Loss: 1.297470 - Test Loss: 1.295567
Epoch   14 - Train Loss: 1.296351 - Test Loss: 1.294362
Epoch   15 - Train Loss: 1.295261 - Test Loss: 1.292886
Epoch   16 - Train Loss: 1.294438 - Test Loss: 1.291454
Epoch   17 - Train Loss: 1.2

In [None]:
wandb.finish()