In [1]:
import pandas as pd

## load data

In [2]:
df = pd.read_csv("../../../data/lichess_db_standard_rated_2013-01_cleaned.csv", nrows=100_000)

# split data

In [3]:
from sklearn.model_selection import train_test_split

target = df["selected_move"]
labels = df.drop(columns=["selected_move"])

train_x, val_x, train_y, val_y = train_test_split(labels, target, test_size=0.05, random_state=100)

## Scale the elos

In [4]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

scaler.fit(train_x[["white_elo", "black_elo"]])
train_x[["white_elo", "black_elo"]] = scaler.transform(train_x[["white_elo", "black_elo"]])
val_x[["white_elo", "black_elo"]] = scaler.transform(val_x[["white_elo", "black_elo"]])

## Define the network

In [5]:
import torch
from torch import nn

In [6]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()

        # this is identical to the stock fish - engineered features and class based output layer
        self.layers = nn.Sequential(
            nn.Linear(67, 512),
            nn.ReLU(),
            nn.Linear(512, 32),
            nn.ReLU(),
            nn.Linear(32, 32),
            nn.ReLU(),
            nn.Linear(32, 32),
            nn.ReLU(),
            nn.Linear(32, 32),
            nn.ReLU(),
            nn.Linear(32, 32),
            nn.ReLU(),
            nn.Linear(32, 4164),
            nn.Softmax(),
        )

    def forward(self, x):
        return self.layers(x)


model = NeuralNetwork()

In [7]:
from torch.optim import Adam
from torch.utils.data import DataLoader, TensorDataset


def train_model(model, train_x, train_y, val_x, val_y, epochs=10, batch_size=256, lr=0.001):
    # Convert data to tensors
    train_x_tensor = torch.tensor(train_x.values, dtype=torch.float32)
    train_y_tensor = torch.tensor(train_y.values, dtype=torch.long)
    val_x_tensor = torch.tensor(val_x.values, dtype=torch.float32)
    val_y_tensor = torch.tensor(val_y.values, dtype=torch.long)

    # Create data loaders
    train_dataset = TensorDataset(train_x_tensor, train_y_tensor)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    val_dataset = TensorDataset(val_x_tensor, val_y_tensor)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    # Define loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = Adam(model.parameters(), lr=lr)

    # Training loop
    for epoch in range(epochs):
        model.train()
        train_loss = 0.0

        for batch_x, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        # Validation
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0

        with torch.no_grad():
            for batch_x, batch_y in val_loader:
                outputs = model(batch_x)
                loss = criterion(outputs, batch_y)
                val_loss += loss.item()

                _, predicted = torch.max(outputs.data, 1)
                total += batch_y.size(0)
                correct += (predicted == batch_y).sum().item()

        avg_train_loss = train_loss / len(train_loader)
        avg_val_loss = val_loss / len(val_loader)
        val_accuracy = 100 * correct / total

        print(
            f"Epoch [{epoch + 1}/{epochs}], Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}, Val Acc: {val_accuracy:.2f}%"
        )


# Train the model
train_model(model, train_x, train_y, val_x, val_y, epochs=5, batch_size=256)

TypeError: can't convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint64, uint32, uint16, uint8, and bool.