### Notebook for concept detection in neural network

In [8]:
import numpy as np
import os
import sys
from tqdm import tqdm
import json
import random
import string

sys.path.append(os.path.abspath(os.path.join(os.path.pardir, 'src')))

import concepts
import env
from policy import ActorCriticNet

model_name = "net"
board_name = "5x5"

agents_to_sample = [0, 150, 300, 450, 600]

full_model_path = "../models/saved_sessions/board_size_5/"
board_size = 5
dims = (4, 5, 5)

CONCEPT_FUNC = concepts.concept_area_advantage

CONCEPT_NAME = concepts.concept_area_advantage.__name__


In [9]:
def load_model(full_name, model_name, epoch):
    model_path = full_name + model_name + "_" + str(epoch) + ".keras"
    model = ActorCriticNet(board_size, model_path)
    return model

agents = [load_model(full_model_path, model_name, epoch) for epoch in agents_to_sample]



In [10]:
def play_match(agents: list[ActorCriticNet], board_size, concept_function):
    go_env = env.GoEnv(board_size)
    state = go_env.reset()

    player_to_start = 1 if np.random.random() > 0.5 else 0
    current_player = player_to_start

    positive_cases = []
    negative_cases = []

    sample_ratio = 0.2

    game_over = False

    while not game_over:
        # Remove array index 3 and 5 from the current state making it an shape of (4, 5, 5)
        state_copy = np.delete(state, [3, 5], axis=0)
        
        if np.random.random() < sample_ratio:
            if concept_function(state):
                positive_cases.append(state_copy)
            else:
                negative_cases.append(state_copy)
        
        action = agents[current_player].best_action(state)

        state, _, game_over, _ = go_env.step(action)

        current_player = (current_player + 1) % 2
    
    return positive_cases, negative_cases

In [11]:
positive_cases = []
negative_cases = []

CASES_TO_SAMPLE = 100 # 25000

positive_bar = tqdm(total=CASES_TO_SAMPLE, desc="Positive cases")
while len(positive_cases) < CASES_TO_SAMPLE:
    pos, neg = play_match(agents, board_size, CONCEPT_FUNC)
    positive_cases.extend(pos)
    negative_cases.extend(neg)
    positive_bar.update(len(pos))

positive_cases = positive_cases[:CASES_TO_SAMPLE]
negative_cases = negative_cases[:CASES_TO_SAMPLE]

Positive cases: 101it [05:44,  3.41s/it]


In [12]:
positive_cases = np.array(positive_cases)
negative_cases = np.array(negative_cases)

print("Positive cases: ", positive_cases.shape)
print("Negative cases: ", negative_cases.shape)

Positive cases:  (100, 4, 5, 5)
Negative cases:  (100, 4, 5, 5)


In [13]:
epochs_to_look_at = [0, 150, 300, 450, 600]

for epoch in epochs_to_look_at:
    path = full_model_path + model_name + "_" + str(epoch) + ".keras"
    model = ActorCriticNet(board_size, path)
    all_cases = np.concatenate([positive_cases, negative_cases])
    all_labels = [1] * positive_cases.shape[0] + [0] * negative_cases.shape[0]
    all_labels = np.array(all_labels)
    shuffled_indices = np.arange(all_labels.shape[0])
    np.random.shuffle(shuffled_indices)
    all_cases = all_cases[shuffled_indices]
    all_labels = all_labels[shuffled_indices]
    POSITIONS_TO_CONSIDER = 40 #40000
    VALIDATION_POSITIONS = 10 #10000

    concept_presences = {}
    
    outputs = model.get_all_resblock_outputs(all_cases)

    # Merge outputs
    merged_outputs = []
    for output_batch in outputs:
        for i, output_layer in enumerate(output_batch):
            if len(merged_outputs) <= i:
                merged_outputs.append([])
            merged_outputs[i].extend(output_layer)

    for i, layer_output in enumerate(merged_outputs):
        merged_outputs[i] = np.array(merged_outputs[i])
    
    outputs = merged_outputs


    # Perform regression
    concept_presence_per_layer = []
    for (i, output) in enumerate(outputs):
        points = output.reshape((output.shape[0], np.prod(output.shape[1:])))
        # So one has (n, k) samples where n is the number of positions, and k is the total number of activation values in layer i.
        print("Performing regression for layer {}".format(i))
        # points = np.concatenate([points, actual_outputs], axis=1)
        score = concepts.perform_regression(
            points[:POSITIONS_TO_CONSIDER], 
            all_labels[:POSITIONS_TO_CONSIDER], 
            points[POSITIONS_TO_CONSIDER:], 
            all_labels[POSITIONS_TO_CONSIDER:], 
            False
        )
        concept_presence_per_layer.append(score)

        print("The presence of {} in resblock {} is {}".format(CONCEPT_NAME, i, score))
    concept_presences[CONCEPT_NAME] = concept_presence_per_layer

    # Save concept presences in json file
    os.makedirs("../concept_presences", exist_ok=True)
    os.makedirs("../concept_presences/{}".format(board_name), exist_ok=True)
    os.makedirs("../concept_presences/{}/{}".format(board_name, model_name), exist_ok=True)
    os.makedirs("../concept_presences/{}/{}/{}".format(board_name, model_name, CONCEPT_NAME), exist_ok=True)
    os.makedirs("../concept_presences/{}/{}/{}/{}".format(board_name, model_name, CONCEPT_NAME, epoch), exist_ok=True)

    random_suffix = ''.join(random.choices(string.ascii_uppercase + string.digits, k=10))

    with open("../concept_presences/{}/{}/{}/{}/{}.json".format(board_name, model_name, CONCEPT_NAME, epoch, random_suffix), "w") as f:
        json.dump(concept_presences[CONCEPT_NAME], f)




Getting resblock outputs: 100%|██████████| 7/7 [00:00<00:00, 251.23it/s]


Performing regression for layer 0
The presence of concept_area_advantage in resblock 0 is 0.8005235796681776
Performing regression for layer 1
The presence of concept_area_advantage in resblock 1 is 0.7856379398951262
Performing regression for layer 2
The presence of concept_area_advantage in resblock 2 is 0.7716334704738164
Performing regression for layer 3
The presence of concept_area_advantage in resblock 3 is 0.7976329786502842


Getting resblock outputs: 100%|██████████| 7/7 [00:00<00:00, 227.24it/s]


Performing regression for layer 0
The presence of concept_area_advantage in resblock 0 is 0.8304618113784658
Performing regression for layer 1
The presence of concept_area_advantage in resblock 1 is 0.909078987469455
Performing regression for layer 2
The presence of concept_area_advantage in resblock 2 is 0.9028865175648517
Performing regression for layer 3
The presence of concept_area_advantage in resblock 3 is 0.9185410783478843


Getting resblock outputs: 100%|██████████| 7/7 [00:00<00:00, 231.02it/s]


Performing regression for layer 0
The presence of concept_area_advantage in resblock 0 is 0.8357473093560657
Performing regression for layer 1
The presence of concept_area_advantage in resblock 1 is 0.8412329238620877
Performing regression for layer 2
The presence of concept_area_advantage in resblock 2 is 0.841712973309972
Performing regression for layer 3
The presence of concept_area_advantage in resblock 3 is 0.8505535895229004


Getting resblock outputs: 100%|██████████| 7/7 [00:00<00:00, 230.36it/s]


Performing regression for layer 0
The presence of concept_area_advantage in resblock 0 is 0.811951651105612
Performing regression for layer 1
The presence of concept_area_advantage in resblock 1 is 0.8815635557514147
Performing regression for layer 2
The presence of concept_area_advantage in resblock 2 is 0.8617497840836311
Performing regression for layer 3
The presence of concept_area_advantage in resblock 3 is 0.9370898831670431


Getting resblock outputs: 100%|██████████| 7/7 [00:00<00:00, 255.89it/s]

Performing regression for layer 0
The presence of concept_area_advantage in resblock 0 is 0.7744698727611571
Performing regression for layer 1
The presence of concept_area_advantage in resblock 1 is 0.8633717917346053
Performing regression for layer 2
The presence of concept_area_advantage in resblock 2 is 0.8565986912525883
Performing regression for layer 3
The presence of concept_area_advantage in resblock 3 is 0.9464436919059722





