# Evaluate Models on Puzzles

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Xmaster6y/lczerolens/blob/main/docs/source/notebooks/features/probe-concepts.ipynb)

## Setup

In [1]:
MODE = "local"  # "colab" | "colab-dev" | "local"

In [2]:
if MODE == "colab":
    !pip install -q lczerolens
elif MODE == "colab-dev":
    !rm -r lczerolens
    !git clone https://github.com/Xmaster6y/lczerolens -b main
    !pip install -q ./lczerolens

In [3]:
!gdown 15__7FHvIR5-JbJvDg2eGUhIPZpkYyM7X -O lc0-19-1876.onnx
!gdown 1CvMyX3KuYxCJUKz9kOb9VX8zIkfISALd -O lc0-19-4508.onnx

Downloading...
From: https://drive.google.com/uc?id=15__7FHvIR5-JbJvDg2eGUhIPZpkYyM7X
To: /Users/xmaster/Work/lczerolens/docs/source/notebooks/features/lc0-19-1876.onnx
100%|██████████████████████████████████████| 97.1M/97.1M [00:02<00:00, 48.0MB/s]
Downloading...
From: https://drive.google.com/uc?id=1CvMyX3KuYxCJUKz9kOb9VX8zIkfISALd
To: /Users/xmaster/Work/lczerolens/docs/source/notebooks/features/lc0-19-4508.onnx
100%|██████████████████████████████████████| 97.1M/97.1M [00:05<00:00, 16.8MB/s]


## Load a Model

Load a leela network from file (already converted to `onnx`):

In [1]:
from lczerolens import LczeroModel

strong_model = LczeroModel.from_path("lc0-19-4508.onnx")
weak_model = LczeroModel.from_path("lc0-19-1876.onnx")

  from .autonotebook import tqdm as notebook_tqdm


## Import a Game Dataset

In [2]:
from datasets import load_dataset

dataset = load_dataset("lczerolens/tcec-games", split="train")
dataset

Dataset({
    features: ['gameid', 'moves'],
    num_rows: 23297
})

In [3]:
from lczerolens.play import Game


def boards_from_dict(batch):
    new_batch = []
    for game_tuple in zip(*batch.values()):
        game = Game.from_dict(dict(zip(batch.keys(), game_tuple)))
        new_batch.extend(game.to_boards(skip_book_exit=True, output_dict=True))
    return {k: [d[k] for d in new_batch] for k in new_batch[0].keys()}


board_dataset = dataset.select(range(1000)).map(boards_from_dict, batched=True, batch_size=100)
board_datasetdict = board_dataset.train_test_split(test_size=0.1, seed=42)
board_datasetdict

DatasetDict({
    train: Dataset({
        features: ['gameid', 'moves', 'fen'],
        num_rows: 115561
    })
    test: Dataset({
        features: ['gameid', 'moves', 'fen'],
        num_rows: 12841
    })
})

In [24]:
from torch.utils.data import DataLoader

train_dataloader = DataLoader(
    board_datasetdict["train"], batch_size=100, shuffle=True, collate_fn=Game.board_collate_fn
)
first_batch = next(iter(train_dataloader))
print(first_batch)

([LczeroBoard('1R6/q3r1k1/4p2p/3p1pp1/1Q1P1P2/4PK1P/5P2/8 b - - 5 104'), LczeroBoard('8/8/7R/p1k4P/rp6/6K1/6P1/8 w - - 0 51'), LczeroBoard('2r5/8/p2pQppk/1P2b2p/P2NP1qP/4PRP1/5RK1/2r5 w - - 2 39'), LczeroBoard('8/4k3/6P1/p2K1B2/P7/8/8/6b1 w - - 65 120'), LczeroBoard('8/8/4pp2/6k1/pp2P2p/3q4/P3R1PP/6RK w - - 22 49'), LczeroBoard('6Q1/1K6/1P6/5k2/1P6/3N4/7q/6b1 b - - 0 67'), LczeroBoard('rb2r1k1/1p3ppn/2p4p/2Qp4/p2P2PP/4PP2/PP3BBq/1R2RK2 w - - 8 28'), LczeroBoard('3n4/p2k1p2/P1p2R2/1p2P1Q1/1P2q3/2pr1NP1/5PK1/8 w - - 6 52'), LczeroBoard('3r2r1/pq2bk2/2p2p2/Pp1n1p1b/3PN2P/1B3P2/1P1B1QR1/3R3K w - - 0 29'), LczeroBoard('r1b1kbr1/pp1p3p/n2Pp1p1/1N3pn1/q1P4P/PpB1P3/3N1PP1/R2QKB1R b KQq - 0 15'), LczeroBoard('8/5p2/2bN1pk1/7p/3K3P/6P1/8/8 b - - 66 183'), LczeroBoard('8/4K3/3P2n1/8/3k4/8/8/7B w - - 1 224'), LczeroBoard('2r3k1/1p2ppbp/pn1p2p1/3P4/1q1NP3/1P2BP2/P1Q3PP/3R2K1 w - - 1 21'), LczeroBoard('3r1rk1/pb2q1pp/1pnpp3/1N6/2P1BP2/2P1Q1P1/P6P/R2R2K1 b - - 6 25'), LczeroBoard('1K6/P3kp1p/2R5/6p1/

## Create a Concept Dataset

In [5]:
from lczerolens.concepts.threat import HasThreat
from lczerolens.lenses import ActivationLens

concept = HasThreat(piece="Q", relative=True)

In [30]:
import numpy as np


def get_activations_and_labels(model, module_name, dataloader, concept, n_batches=3):
    lens = ActivationLens(pattern=module_name)
    activations_list = []
    labels_list = []
    i = 0
    for result in lens.analyse_batched(model, dataloader):
        activations_list.append(result[module_name + "_output"].detach().cpu().numpy())
        i += 1
        if i > n_batches:
            break
    i = 0
    for boards, _ in dataloader:
        for board in boards:
            labels_list.append(concept.compute_label(board))
        i += 1
        if i > n_batches:
            break
    return np.concatenate(activations_list), np.array(labels_list)

In [49]:
train_activations, train_labels = get_activations_and_labels(
    strong_model, "block18/conv2/relu", train_dataloader, concept, n_batches=10
)
train_activations.shape

(1100, 256, 8, 8)

In [57]:
X_train = train_activations.reshape(train_activations.shape[0], -1)
Y_train = train_labels
(X_train.shape, Y_train.shape)

((1100, 16384), (1100,))

## Train a Linear Probe


In [55]:
from sklearn.linear_model import LogisticRegression

probe = LogisticRegression(max_iter=10000)
probe.fit(X_train, Y_train)

In [56]:
print(HasThreat.compute_metrics(probe.predict(X_train), Y_train))

{'accuracy': 0.9563636363636364, 'precision': 0.0, 'recall': 0.0, 'f1': 0.0}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [35]:
test_dataloader = DataLoader(board_datasetdict["test"], batch_size=100, shuffle=True, collate_fn=Game.board_collate_fn)
test_activations, test_labels = get_activations_and_labels(
    strong_model, "block18/conv2/relu", test_dataloader, concept, n_batches=3
)

In [53]:
X_test = test_activations.reshape(test_activations.shape[0], -1)
Y_test = test_labels
print(HasThreat.compute_metrics(probe.predict(X_test), Y_test))

{'accuracy': 0.965, 'precision': 0.0, 'recall': 0.0, 'f1': 0.0}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [38]:
Y_test.sum()

np.int64(14)

## Evaluate the Probe

In [None]:
# Generic eval using ProbingLens

