### Notebook for concept detection in neural network

In [1]:
import numpy as np
import os
import sys
from tqdm import tqdm
import json
import random
import string

sys.path.append(os.path.abspath(os.path.join(os.path.pardir, 'src')))

import concepts
import env
from policy import ActorCriticNet

model_name = "net"
board_name = "5x5"

agents_to_sample = [0, 150, 300, 450, 600]

full_model_path = "../models/saved_sessions/board_size_5/"
board_size = 5

CONCEPT_FUNC = concepts.concept_win_on_pass

CONCEPT_NAME = concepts.concept_win_on_pass.__name__


2023-10-18 11:24:45.472435: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
def load_model(full_name, model_name, epoch):
    model_path = full_name + model_name + "_" + str(epoch) + ".keras"
    model = ActorCriticNet(board_size, model_path)
    return model

agents = [load_model(full_model_path, model_name, epoch) for epoch in agents_to_sample]



In [3]:
def play_match(agents: list[ActorCriticNet], board_size, concept_function):
    go_env = env.GoEnv(board_size)
    state = go_env.reset()

    player_to_start = 1 if np.random.random() > 0.5 else 0
    current_player = player_to_start

    positive_cases = []
    negative_cases = []

    sample_ratio = 0.2

    game_over = False

    while not game_over:
        # Remove array index 3 and 5 from the current state making it an shape of (4, 5, 5)
        state_copy = np.delete(state, [3, 5], axis=0)
        
        if np.random.random() < sample_ratio:
            if concept_function(state):
                positive_cases.append(state_copy)
            else:
                negative_cases.append(state_copy)
        
        action = agents[current_player].best_action(state)

        state, _, game_over, _ = go_env.step(action)

        current_player = (current_player + 1) % 2
    
    return positive_cases, negative_cases

positive_cases = []
negative_cases = []

CASES_TO_SAMPLE = 2500 # 25000

positive_bar = tqdm(total=CASES_TO_SAMPLE, desc="Positive cases")
while len(positive_cases) < CASES_TO_SAMPLE:
    pos, neg = play_match(agents, board_size, CONCEPT_FUNC)
    positive_cases.extend(pos)
    negative_cases.extend(neg)
    positive_bar.update(len(pos))

positive_cases = positive_cases[:CASES_TO_SAMPLE]
negative_cases = negative_cases[:CASES_TO_SAMPLE]

Positive cases:   0%|          | 0/2500 [00:00<?, ?it/s]

Positive cases: 100%|██████████| 2500/2500 [05:23<00:00,  5.78it/s]

In [4]:
positive_cases = np.array(positive_cases)
negative_cases = np.array(negative_cases)

print("Positive cases: ", positive_cases.shape)
print("Negative cases: ", negative_cases.shape)

Positive cases:  (2500, 4, 5, 5)
Negative cases:  (2500, 4, 5, 5)


In [7]:
epochs_to_look_at = [0, 150, 300, 450, 600]

POSITIONS_TO_CONSIDER = 4000 #40000
VALIDATION_POSITIONS = 10000 #10000

for epoch in epochs_to_look_at:
    path = full_model_path + model_name + "_" + str(epoch) + ".keras"
    model = ActorCriticNet(board_size, path)

    # Will have a length equal to the sum of the numer of rows in the positive and negative cases arrays
    # And will contain 1s for positive cases and 0s for negative cases
    # Is used as labels/targets for the regression
    all_cases = np.concatenate([positive_cases, negative_cases])
    all_labels = [1] * positive_cases.shape[0] + [0] * negative_cases.shape[0]
    all_labels = np.array(all_labels)
    shuffled_indices = np.arange(all_labels.shape[0])

    np.random.shuffle(shuffled_indices)

    all_cases = all_cases[shuffled_indices]
    all_labels = all_labels[shuffled_indices]

    concept_presences = {}
    
    outputs = model.get_all_resblock_outputs(all_cases)

    # Merge outputs
    merged_outputs = []
    for output_batch in outputs:
        for i, output_layer in enumerate(output_batch):
            if len(merged_outputs) <= i:
                merged_outputs.append([])
            merged_outputs[i].extend(output_layer)

    for i, layer_output in enumerate(merged_outputs):
        merged_outputs[i] = np.array(merged_outputs[i])
    
    outputs = merged_outputs

    # Perform regression
    concept_presence_per_layer = []
    for (i, output) in enumerate(outputs):
        points = output.reshape((output.shape[0], np.prod(output.shape[1:])))
        # So one has (n, k) samples where n is the number of positions, and k is the total number of activation values in layer i.
        print(f"Performing regression for layer {i}")
        score = concepts.perform_regression(
            points=points[:POSITIONS_TO_CONSIDER], 
            targets=all_labels[:POSITIONS_TO_CONSIDER], 
            validation_points=points[POSITIONS_TO_CONSIDER:], 
            validation_targets=all_labels[POSITIONS_TO_CONSIDER:], 
            is_binary=False
        )
        concept_presence_per_layer.append(score)

        print(f"The presence of {CONCEPT_NAME} in resblock {i} is {score}")
    concept_presences[CONCEPT_NAME] = concept_presence_per_layer

    # Remove the files if they exist
    if os.path.exists("../concept_presences/{}/{}/{}/".format(board_name, model_name, CONCEPT_NAME)):
        # Test if epoch folder exists
        if os.path.exists("../concept_presences/{}/{}/{}/{}".format(board_name, model_name, CONCEPT_NAME, epoch)):
            # Remove all files in the epoch folder
            for file in os.listdir("../concept_presences/{}/{}/{}/{}".format(board_name, model_name, CONCEPT_NAME, epoch)):
                os.remove("../concept_presences/{}/{}/{}/{}/{}".format(board_name, model_name, CONCEPT_NAME, epoch, file))

    os.makedirs("../concept_presences", exist_ok=True)
    os.makedirs("../concept_presences/{}".format(board_name), exist_ok=True)
    os.makedirs("../concept_presences/{}/{}".format(board_name, model_name), exist_ok=True)
    os.makedirs("../concept_presences/{}/{}/{}".format(board_name, model_name, CONCEPT_NAME), exist_ok=True)
    os.makedirs("../concept_presences/{}/{}/{}/{}".format(board_name, model_name, CONCEPT_NAME, epoch), exist_ok=True)

    random_suffix = ''.join(random.choices(string.ascii_uppercase + string.digits, k=10))

    # Save concept presences in json file
    with open("../concept_presences/{}/{}/{}/{}/{}.json".format(board_name, model_name, CONCEPT_NAME, epoch, random_suffix), "w") as f:
        json.dump(concept_presences[CONCEPT_NAME], f)




Getting resblock outputs: 100%|██████████| 157/157 [00:00<00:00, 245.45it/s]


Performing regression for layer 0
The presence of concept_win_on_pass in resblock 0 is 0.9999999995884075
Performing regression for layer 1
The presence of concept_win_on_pass in resblock 1 is 0.9999999999983891
Performing regression for layer 2
The presence of concept_win_on_pass in resblock 2 is 0.9999999999980971
Performing regression for layer 3
The presence of concept_win_on_pass in resblock 3 is 0.9999999993174643


Getting resblock outputs: 100%|██████████| 157/157 [00:00<00:00, 277.95it/s]


Performing regression for layer 0
The presence of concept_win_on_pass in resblock 0 is 0.9999999999991089
Performing regression for layer 1
The presence of concept_win_on_pass in resblock 1 is 0.9999999999912036
Performing regression for layer 2
The presence of concept_win_on_pass in resblock 2 is 0.9999999999959196
Performing regression for layer 3
The presence of concept_win_on_pass in resblock 3 is 0.9999999999975416


Getting resblock outputs: 100%|██████████| 157/157 [00:00<00:00, 291.74it/s]


Performing regression for layer 0
The presence of concept_win_on_pass in resblock 0 is 0.9999999999967163
Performing regression for layer 1
The presence of concept_win_on_pass in resblock 1 is 0.999999999998882
Performing regression for layer 2
The presence of concept_win_on_pass in resblock 2 is 0.9999999999990492
Performing regression for layer 3
The presence of concept_win_on_pass in resblock 3 is 0.9999999999985395


Getting resblock outputs: 100%|██████████| 157/157 [00:00<00:00, 295.31it/s]


Performing regression for layer 0
The presence of concept_win_on_pass in resblock 0 is 0.9999999998910021
Performing regression for layer 1
The presence of concept_win_on_pass in resblock 1 is 0.999999999998402
Performing regression for layer 2
The presence of concept_win_on_pass in resblock 2 is 0.9999999999994916
Performing regression for layer 3
The presence of concept_win_on_pass in resblock 3 is 0.9999999999867514


Getting resblock outputs: 100%|██████████| 157/157 [00:00<00:00, 293.81it/s]


Performing regression for layer 0
The presence of concept_win_on_pass in resblock 0 is 0.9999999999981765
Performing regression for layer 1
The presence of concept_win_on_pass in resblock 1 is 0.9999999999985242
Performing regression for layer 2
The presence of concept_win_on_pass in resblock 2 is 0.9999999999808473
Performing regression for layer 3
The presence of concept_win_on_pass in resblock 3 is 0.999999999997777
