### Notebook for concept detection in neural network

In [1]:
import numpy as np
import os
import sys
from tqdm import tqdm
import json
import random
import string

sys.path.append(os.path.abspath(os.path.join(os.path.pardir, 'src')))

import concepts
import env
from policy import ActorCriticNet

model_name = "net"
session_name = "starship"
board_name = "5x5"
board_size = 5

agents_to_sample = [0, 150, 300, 450, 600, 750, 900, 1050]

full_model_path = f"../models/saved_sessions/board_size_{board_size}/{session_name}/"

CONCEPT_FUNC = concepts.concept_two_eyes

CONCEPT_NAME = concepts.concept_two_eyes.__name__


2023-10-23 14:30:10.826971: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
def load_model(full_name, model_name, epoch):
    model_path = full_name + model_name + "_" + str(epoch) + ".keras"
    model = ActorCriticNet(board_size, model_path)
    return model

agents = [load_model(full_model_path, model_name, epoch) for epoch in agents_to_sample]



In [20]:
def play_match(agents: list[ActorCriticNet], board_size, concept_function):
    go_env = env.GoEnv(board_size)
    state = go_env.reset()

    player_to_start = 1 if np.random.random() > 0.5 else 0
    current_player = player_to_start

    total_moves = board_size * board_size * 4
    moves = 0

    positive_cases = []
    negative_cases = []

    sample_ratio = 0.2

    game_over = False

    while not game_over:
        if moves == total_moves:
            break
        # Remove array index 3 and 5 from the current state making it an shape of (4, 5, 5)
        state_copy = np.delete(state, [3, 5], axis=0)
        
        if np.random.random() < sample_ratio:
            if concept_function(state):
                positive_cases.append(state_copy)
            else:
                negative_cases.append(state_copy)
        
        action = agents[current_player].best_action(state)

        state, _, game_over, _ = go_env.step(action)
        moves += 1

        current_player = (current_player + 1) % 2
    
    return positive_cases, negative_cases

positive_cases = []
negative_cases = []

CASES_TO_SAMPLE = 1000 # 25000

positive_bar = tqdm(total=CASES_TO_SAMPLE, desc="Positive cases")

while len(positive_cases) < CASES_TO_SAMPLE:
    for i in range(len(agents)):
        for j in range(i + 1, len(agents)):
            pos, neg = play_match([agents[i], agents[j]], board_size, CONCEPT_FUNC)
            positive_cases.extend(pos)
            negative_cases.extend(neg)
            positive_bar.update(len(pos))

positive_cases = positive_cases[:CASES_TO_SAMPLE]
negative_cases = negative_cases[:CASES_TO_SAMPLE]

Positive cases:   1%|▏         | 13/1000 [00:14<18:07,  1.10s/it]
Positive cases:  28%|██▊       | 281/1000 [03:31<08:13,  1.46it/s]

In [14]:
positive_cases = np.array(positive_cases)
negative_cases = np.array(negative_cases)

print("Positive cases: ", positive_cases.shape)
print("Negative cases: ", negative_cases.shape)

Positive cases:  (250, 4, 5, 5)
Negative cases:  (250, 4, 5, 5)


In [15]:
# Find how many positive cases are identical to other positive cases and count the number of toatal cases
identical_cases = 0
for i in range(len(positive_cases)):
    for j in range(i + 1, len(positive_cases)):
        if np.array_equal(positive_cases[i], positive_cases[j]):
            identical_cases += 1

print(f"Total cases: {len(positive_cases)}")
print(f"Identical cases: {identical_cases}")

Total cases: 250
Identical cases: 472


In [16]:
# Print all the positive cases
for i in range(len(positive_cases)):
    print(f"Positive case {i}:")
    print(positive_cases[i])
    print()

Positive case 0:
[[[0. 0. 0. 0. 0.]
  [1. 1. 0. 0. 0.]
  [0. 1. 1. 0. 0.]
  [1. 1. 1. 0. 0.]
  [0. 1. 1. 0. 0.]]

 [[1. 0. 1. 1. 0.]
  [0. 0. 1. 0. 1.]
  [0. 0. 0. 1. 1.]
  [0. 0. 0. 1. 1.]
  [0. 0. 0. 0. 0.]]

 [[1. 1. 1. 1. 1.]
  [1. 1. 1. 1. 1.]
  [1. 1. 1. 1. 1.]
  [1. 1. 1. 1. 1.]
  [1. 1. 1. 1. 1.]]

 [[1. 1. 1. 1. 1.]
  [1. 1. 1. 1. 1.]
  [1. 1. 1. 1. 1.]
  [1. 1. 1. 1. 1.]
  [1. 1. 1. 1. 1.]]]

Positive case 1:
[[[0. 0. 0. 0. 0.]
  [1. 1. 0. 0. 0.]
  [0. 1. 1. 0. 0.]
  [1. 1. 1. 0. 0.]
  [0. 1. 1. 0. 0.]]

 [[1. 1. 1. 1. 0.]
  [0. 0. 1. 0. 1.]
  [0. 0. 0. 1. 1.]
  [0. 0. 0. 1. 1.]
  [0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]]]

Positive case 2:
[[[0. 0. 0. 0. 0.]
  [1. 1. 0. 0. 0.]
  [0. 1. 1. 0. 0.]
  [1. 1. 1. 0. 0.]
  [0. 1. 1. 0. 0.]]

 [[1. 1. 1. 1. 0.]
  [0. 0. 1. 0. 1.]
  [0. 0. 0. 1. 1.]
  [0. 0. 0. 1. 1.

In [18]:
epochs_to_look_at = [0, 150, 300, 450, 600, 750, 900, 1050]

POSITIONS_TO_CONSIDER = 700 #40000
VALIDATION_POSITIONS = 10000 #10000

for epoch in epochs_to_look_at:
    path = full_model_path + model_name + "_" + str(epoch) + ".keras"
    model = ActorCriticNet(board_size, path)

    # Will have a length equal to the sum of the numer of rows in the positive and negative cases arrays
    # And will contain 1s for positive cases and 0s for negative cases
    # Is used as labels/targets for the regression
    all_cases = np.concatenate([positive_cases, negative_cases])
    all_labels = [1] * positive_cases.shape[0] + [0] * negative_cases.shape[0]
    all_labels = np.array(all_labels)
    shuffled_indices = np.arange(all_labels.shape[0])

    np.random.shuffle(shuffled_indices)

    all_cases = all_cases[shuffled_indices]
    all_labels = all_labels[shuffled_indices]

    concept_presences = {}
    
    outputs = model.get_all_resblock_outputs(all_cases)

    # Merge outputs
    merged_outputs = []
    for output_batch in outputs:
        for i, output_layer in enumerate(output_batch):
            if len(merged_outputs) <= i:
                merged_outputs.append([])
            merged_outputs[i].extend(output_layer)

    for i, layer_output in enumerate(merged_outputs):
        merged_outputs[i] = np.array(merged_outputs[i])
    
    outputs = merged_outputs

    # Perform regression
    concept_presence_per_layer = []
    for (i, output) in enumerate(outputs):
        points = output.reshape((output.shape[0], np.prod(output.shape[1:])))
        # So one has (n, k) samples where n is the number of positions, and k is the total number of activation values in layer i.
        print(f"Performing regression for layer {i}")
        score = concepts.perform_regression(
            points=points[:POSITIONS_TO_CONSIDER], 
            targets=all_labels[:POSITIONS_TO_CONSIDER], 
            validation_points=points[POSITIONS_TO_CONSIDER:], 
            validation_targets=all_labels[POSITIONS_TO_CONSIDER:], 
            is_binary=False
        )
        concept_presence_per_layer.append(score)

        print(f"The presence of {CONCEPT_NAME} in resblock {i} is {score}")
    concept_presences[CONCEPT_NAME] = concept_presence_per_layer

    # Remove the files if they exist
    if os.path.exists("../concept_presences/{}/{}/{}/".format(board_name, session_name, CONCEPT_NAME)):
        # Test if epoch folder exists
        if os.path.exists("../concept_presences/{}/{}/{}/{}".format(board_name, session_name, CONCEPT_NAME, epoch)):
            # Remove all files in the epoch folder
            for file in os.listdir("../concept_presences/{}/{}/{}/{}".format(board_name, session_name, CONCEPT_NAME, epoch)):
                os.remove("../concept_presences/{}/{}/{}/{}/{}".format(board_name, session_name, CONCEPT_NAME, epoch, file))

    os.makedirs("../concept_presences", exist_ok=True)
    os.makedirs("../concept_presences/{}".format(board_name), exist_ok=True)
    os.makedirs("../concept_presences/{}/{}".format(board_name, session_name), exist_ok=True)
    os.makedirs("../concept_presences/{}/{}/{}".format(board_name, session_name, CONCEPT_NAME), exist_ok=True)
    os.makedirs("../concept_presences/{}/{}/{}/{}".format(board_name, session_name, CONCEPT_NAME, epoch), exist_ok=True)

    random_suffix = ''.join(random.choices(string.ascii_uppercase + string.digits, k=10))

    # Save concept presences in json file
    with open("../concept_presences/{}/{}/{}/{}/{}.json".format(board_name, session_name, CONCEPT_NAME, epoch, random_suffix), "w") as f:
        json.dump(concept_presences[CONCEPT_NAME], f)




Getting resblock outputs: 100%|██████████| 16/16 [00:00<00:00, 241.47it/s]


Performing regression for layer 0
The presence of concept_two_eyes in resblock 0 is 0.868003113380433
Performing regression for layer 1
The presence of concept_two_eyes in resblock 1 is 0.8308408243378722
Performing regression for layer 2
The presence of concept_two_eyes in resblock 2 is 0.8042610414307192
Performing regression for layer 3
The presence of concept_two_eyes in resblock 3 is 0.769378667969815


Getting resblock outputs: 100%|██████████| 16/16 [00:00<00:00, 239.47it/s]


Performing regression for layer 0
The presence of concept_two_eyes in resblock 0 is 0.8708748396585474
Performing regression for layer 1
The presence of concept_two_eyes in resblock 1 is 0.8998526178657347
Performing regression for layer 2
The presence of concept_two_eyes in resblock 2 is 0.8861264164123976
Performing regression for layer 3
The presence of concept_two_eyes in resblock 3 is 0.6449199736034256


Getting resblock outputs: 100%|██████████| 16/16 [00:00<00:00, 234.71it/s]


Performing regression for layer 0
The presence of concept_two_eyes in resblock 0 is 0.8855133673486126
Performing regression for layer 1
The presence of concept_two_eyes in resblock 1 is 0.8541182319755268
Performing regression for layer 2
The presence of concept_two_eyes in resblock 2 is 0.788695957253686
Performing regression for layer 3
The presence of concept_two_eyes in resblock 3 is 0.6357739377085619


Getting resblock outputs: 100%|██████████| 16/16 [00:00<00:00, 220.19it/s]


Performing regression for layer 0
The presence of concept_two_eyes in resblock 0 is 0.8666138503595755
Performing regression for layer 1
The presence of concept_two_eyes in resblock 1 is 0.8948729750055942
Performing regression for layer 2
The presence of concept_two_eyes in resblock 2 is 0.8451283978881965
Performing regression for layer 3
The presence of concept_two_eyes in resblock 3 is 0.6006319880645954


Getting resblock outputs: 100%|██████████| 16/16 [00:00<00:00, 241.02it/s]


Performing regression for layer 0
The presence of concept_two_eyes in resblock 0 is 0.8382035532482892
Performing regression for layer 1
The presence of concept_two_eyes in resblock 1 is 0.8074608640838699
Performing regression for layer 2
The presence of concept_two_eyes in resblock 2 is 0.7658947936469765
Performing regression for layer 3
The presence of concept_two_eyes in resblock 3 is 0.7139801852160741


Getting resblock outputs: 100%|██████████| 16/16 [00:00<00:00, 246.50it/s]


Performing regression for layer 0
The presence of concept_two_eyes in resblock 0 is 0.8412335769423424
Performing regression for layer 1
The presence of concept_two_eyes in resblock 1 is 0.7740383725069481
Performing regression for layer 2
The presence of concept_two_eyes in resblock 2 is 0.852138380133342
Performing regression for layer 3
The presence of concept_two_eyes in resblock 3 is 0.6939742817684085


Getting resblock outputs: 100%|██████████| 16/16 [00:00<00:00, 233.70it/s]


Performing regression for layer 0
The presence of concept_two_eyes in resblock 0 is 0.9146295043303407
Performing regression for layer 1
The presence of concept_two_eyes in resblock 1 is 0.8501492480349165
Performing regression for layer 2
The presence of concept_two_eyes in resblock 2 is 0.846392902037627
Performing regression for layer 3
The presence of concept_two_eyes in resblock 3 is 0.805462194613944


Getting resblock outputs: 100%|██████████| 16/16 [00:00<00:00, 248.09it/s]

Performing regression for layer 0
The presence of concept_two_eyes in resblock 0 is 0.8391058942312353
Performing regression for layer 1
The presence of concept_two_eyes in resblock 1 is 0.8157457180754629
Performing regression for layer 2
The presence of concept_two_eyes in resblock 2 is 0.8354866245987705
Performing regression for layer 3
The presence of concept_two_eyes in resblock 3 is 0.6321682135585971



