### Notebook for concept detection in neural network

In [4]:
import numpy as np
import os
import sys
from tqdm import tqdm
import json
import random
import string

sys.path.append(os.path.abspath(os.path.join(os.path.pardir, 'src')))

import concepts
import env
from policy import ActorCriticNet

model_name = "net"
session_name = "starship"
board_name = "5x5"

agents_to_sample = [0, 150, 300, 450, 600, 750, 900, 1050]

full_model_path = f"../models/saved_sessions/board_size_5/{session_name}/"
board_size = 5

CONCEPT_FUNC = concepts.concept_win_on_pass

CONCEPT_NAME = concepts.concept_win_on_pass.__name__


In [5]:
def load_model(full_name, model_name, epoch):
    model_path = full_name + model_name + "_" + str(epoch) + ".keras"
    model = ActorCriticNet(board_size, model_path)
    return model

agents = [load_model(full_model_path, model_name, epoch) for epoch in agents_to_sample]



In [6]:
def play_match(agents: list[ActorCriticNet], board_size, concept_function):
    go_env = env.GoEnv(board_size)
    state = go_env.reset()

    player_to_start = 1 if np.random.random() > 0.5 else 0
    current_player = player_to_start

    positive_cases = []
    negative_cases = []

    sample_ratio = 0.2

    game_over = False

    while not game_over:
        # Remove array index 3 and 5 from the current state making it an shape of (4, 5, 5)
        state_copy = np.delete(state, [3, 5], axis=0)
        
        if np.random.random() < sample_ratio:
            if concept_function(state):
                positive_cases.append(state_copy)
            else:
                negative_cases.append(state_copy)
        
        action = agents[current_player].best_action(state)

        state, _, game_over, _ = go_env.step(action)

        current_player = (current_player + 1) % 2
    
    return positive_cases, negative_cases

positive_cases = []
negative_cases = []

CASES_TO_SAMPLE = 2500 # 25000

positive_bar = tqdm(total=CASES_TO_SAMPLE, desc="Positive cases")
while len(positive_cases) < CASES_TO_SAMPLE:
    pos, neg = play_match(agents, board_size, CONCEPT_FUNC)
    positive_cases.extend(pos)
    negative_cases.extend(neg)
    positive_bar.update(len(pos))

positive_cases = positive_cases[:CASES_TO_SAMPLE]
negative_cases = negative_cases[:CASES_TO_SAMPLE]

Positive cases: 2501it [08:20,  9.08it/s]                          

In [7]:
positive_cases = np.array(positive_cases)
negative_cases = np.array(negative_cases)

print("Positive cases: ", positive_cases.shape)
print("Negative cases: ", negative_cases.shape)

Positive cases:  (2500, 4, 5, 5)
Negative cases:  (2500, 4, 5, 5)


In [8]:
epochs_to_look_at = [0, 150, 300, 450, 600, 750, 900, 1050]

POSITIONS_TO_CONSIDER = 4000 #40000
VALIDATION_POSITIONS = 10000 #10000

for epoch in epochs_to_look_at:
    path = full_model_path + model_name + "_" + str(epoch) + ".keras"
    model = ActorCriticNet(board_size, path)

    # Will have a length equal to the sum of the numer of rows in the positive and negative cases arrays
    # And will contain 1s for positive cases and 0s for negative cases
    # Is used as labels/targets for the regression
    all_cases = np.concatenate([positive_cases, negative_cases])
    all_labels = [1] * positive_cases.shape[0] + [0] * negative_cases.shape[0]
    all_labels = np.array(all_labels)
    shuffled_indices = np.arange(all_labels.shape[0])

    np.random.shuffle(shuffled_indices)

    all_cases = all_cases[shuffled_indices]
    all_labels = all_labels[shuffled_indices]

    concept_presences = {}
    
    outputs = model.get_all_resblock_outputs(all_cases)

    # Merge outputs
    merged_outputs = []
    for output_batch in outputs:
        for i, output_layer in enumerate(output_batch):
            if len(merged_outputs) <= i:
                merged_outputs.append([])
            merged_outputs[i].extend(output_layer)

    for i, layer_output in enumerate(merged_outputs):
        merged_outputs[i] = np.array(merged_outputs[i])
    
    outputs = merged_outputs

    # Perform regression
    concept_presence_per_layer = []
    for (i, output) in enumerate(outputs):
        points = output.reshape((output.shape[0], np.prod(output.shape[1:])))
        # So one has (n, k) samples where n is the number of positions, and k is the total number of activation values in layer i.
        print(f"Performing regression for layer {i}")
        score = concepts.perform_regression(
            points=points[:POSITIONS_TO_CONSIDER], 
            targets=all_labels[:POSITIONS_TO_CONSIDER], 
            validation_points=points[POSITIONS_TO_CONSIDER:], 
            validation_targets=all_labels[POSITIONS_TO_CONSIDER:], 
            is_binary=False
        )
        concept_presence_per_layer.append(score)

        print(f"The presence of {CONCEPT_NAME} in resblock {i} is {score}")
    concept_presences[CONCEPT_NAME] = concept_presence_per_layer

    # Remove the files if they exist
    if os.path.exists("../concept_presences/{}/{}/{}/".format(board_name, session_name, CONCEPT_NAME)):
        # Test if epoch folder exists
        if os.path.exists("../concept_presences/{}/{}/{}/{}".format(board_name, session_name, CONCEPT_NAME, epoch)):
            # Remove all files in the epoch folder
            for file in os.listdir("../concept_presences/{}/{}/{}/{}".format(board_name, session_name, CONCEPT_NAME, epoch)):
                os.remove("../concept_presences/{}/{}/{}/{}/{}".format(board_name, session_name, CONCEPT_NAME, epoch, file))

    os.makedirs("../concept_presences", exist_ok=True)
    os.makedirs("../concept_presences/{}".format(board_name), exist_ok=True)
    os.makedirs("../concept_presences/{}/{}".format(board_name, session_name), exist_ok=True)
    os.makedirs("../concept_presences/{}/{}/{}".format(board_name, session_name, CONCEPT_NAME), exist_ok=True)
    os.makedirs("../concept_presences/{}/{}/{}/{}".format(board_name, session_name, CONCEPT_NAME, epoch), exist_ok=True)

    random_suffix = ''.join(random.choices(string.ascii_uppercase + string.digits, k=10))

    # Save concept presences in json file
    with open("../concept_presences/{}/{}/{}/{}/{}.json".format(board_name, session_name, CONCEPT_NAME, epoch, random_suffix), "w") as f:
        json.dump(concept_presences[CONCEPT_NAME], f)




Getting resblock outputs: 100%|██████████| 157/157 [00:00<00:00, 246.29it/s]


Performing regression for layer 0
The presence of concept_win_on_pass in resblock 0 is 0.9164571075474393
Performing regression for layer 1
The presence of concept_win_on_pass in resblock 1 is 0.922455791572831
Performing regression for layer 2
The presence of concept_win_on_pass in resblock 2 is 0.9224346859914258
Performing regression for layer 3
The presence of concept_win_on_pass in resblock 3 is 0.9295865936286436


Getting resblock outputs: 100%|██████████| 157/157 [00:00<00:00, 199.88it/s]


Performing regression for layer 0
The presence of concept_win_on_pass in resblock 0 is 0.9215124496247492
Performing regression for layer 1
The presence of concept_win_on_pass in resblock 1 is 0.9232930653169809
Performing regression for layer 2
The presence of concept_win_on_pass in resblock 2 is 0.9224556395410389
Performing regression for layer 3
The presence of concept_win_on_pass in resblock 3 is 0.925773051727351


Getting resblock outputs: 100%|██████████| 157/157 [00:00<00:00, 274.55it/s]


Performing regression for layer 0
The presence of concept_win_on_pass in resblock 0 is 0.9254687346907917
Performing regression for layer 1
The presence of concept_win_on_pass in resblock 1 is 0.9300903185637749
Performing regression for layer 2
The presence of concept_win_on_pass in resblock 2 is 0.9227880082193735
Performing regression for layer 3
The presence of concept_win_on_pass in resblock 3 is 0.9297454119417949


Getting resblock outputs: 100%|██████████| 157/157 [00:00<00:00, 261.48it/s]


Performing regression for layer 0
The presence of concept_win_on_pass in resblock 0 is 0.9158411544681635
Performing regression for layer 1
The presence of concept_win_on_pass in resblock 1 is 0.9238500476325349
Performing regression for layer 2
The presence of concept_win_on_pass in resblock 2 is 0.9215737387161805
Performing regression for layer 3
The presence of concept_win_on_pass in resblock 3 is 0.9228595142289753


Getting resblock outputs: 100%|██████████| 157/157 [00:00<00:00, 275.86it/s]


Performing regression for layer 0
The presence of concept_win_on_pass in resblock 0 is 0.9341621251080776
Performing regression for layer 1
The presence of concept_win_on_pass in resblock 1 is 0.9354035429485894
Performing regression for layer 2
The presence of concept_win_on_pass in resblock 2 is 0.9300050598536933
Performing regression for layer 3
The presence of concept_win_on_pass in resblock 3 is 0.9389120683478436


Getting resblock outputs: 100%|██████████| 157/157 [00:00<00:00, 255.70it/s]


Performing regression for layer 0
The presence of concept_win_on_pass in resblock 0 is 0.9242121226531156
Performing regression for layer 1
The presence of concept_win_on_pass in resblock 1 is 0.9269028252586018
Performing regression for layer 2
The presence of concept_win_on_pass in resblock 2 is 0.9258865830280107
Performing regression for layer 3
The presence of concept_win_on_pass in resblock 3 is 0.9319163497347751


Getting resblock outputs: 100%|██████████| 157/157 [00:00<00:00, 282.69it/s]


Performing regression for layer 0
The presence of concept_win_on_pass in resblock 0 is 0.9249211351725677
Performing regression for layer 1
The presence of concept_win_on_pass in resblock 1 is 0.9282121313423874
Performing regression for layer 2
The presence of concept_win_on_pass in resblock 2 is 0.926862253067785
Performing regression for layer 3
The presence of concept_win_on_pass in resblock 3 is 0.9287654112120789


Getting resblock outputs: 100%|██████████| 157/157 [00:00<00:00, 274.60it/s]


Performing regression for layer 0
The presence of concept_win_on_pass in resblock 0 is 0.8952136574803451
Performing regression for layer 1
The presence of concept_win_on_pass in resblock 1 is 0.8974817630277223
Performing regression for layer 2
The presence of concept_win_on_pass in resblock 2 is 0.8897018440497726
Performing regression for layer 3
The presence of concept_win_on_pass in resblock 3 is 0.8937825462388166


Positive cases: 2501it [08:40,  9.08it/s]