### Notebook for concept detection in neural network

In [15]:
%load_ext autoreload
%autoreload 2

In [1]:
import numpy as np
import os
import sys

sys.path.append(os.path.abspath(os.path.join(os.path.pardir, 'src')))

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # or any {'0', '1', '2'}

from concepts import static_concepts, linear_regression, generate_static_concept_datasets
from policy import ConvNet, ResNet
from utils import concept_folder_setup_and_score

import tensorflow as tf

# Set memory growth
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

reward_function = "zero_sum" # "concept_fn", "zero_sum" or "jem"
model_name = "net"
session_name = "probing"
board_size = 7
board_name = f'{board_size}x{board_size}'
resnet = True

model_type = "resnet" if resnet else "convnet"

agents_to_sample = [0, 10, 50, 100, 300, 500, 600, 800, 1000]

full_model_path = f"../models/saved_sessions/{model_type}/{reward_function}/board_size_{board_size}/{session_name}/"

CONCEPT_FUNC = static_concepts.difference_in_stones

CONCEPT_NAME = static_concepts.difference_in_stones.__name__

BINARY = False

CASES_TO_SAMPLE = 2500 # 25000

SAMPLE_RATIO = 0.5

# Load the models
def load_model(full_name, model_name, epoch):
    model_path = full_name + model_name + "_" + str(epoch) + ".keras"
    if resnet:
        model = ResNet(board_size, model_path)
    else:
        model = ConvNet(board_size, model_path)
    return model

agents = [load_model(full_model_path, model_name, epoch) for epoch in agents_to_sample]

In [2]:
positive_cases, negative_cases = generate_static_concept_datasets(CASES_TO_SAMPLE, agents, board_size, CONCEPT_FUNC, sample_ratio=SAMPLE_RATIO, binary=BINARY)

# Create numpy arrays
positive_cases = np.array(positive_cases)
negative_cases = np.array(negative_cases)

Continues cases for concept 'difference_in_stones':   0%|          | 0/2500 [00:00<?, ?it/s]

Continues cases for concept 'difference_in_stones': 3505it [04:24, 13.23it/s]                          


In [3]:
# Print shapes
print("Positive cases: ", positive_cases.shape)
print("Negative cases: ", negative_cases.shape)

Positive cases:  (2500, 5, 7, 7)
Negative cases:  (2500, 5, 7, 7)


In [4]:
# Test if there are any duplicates accross the two sets
for i in range(len(positive_cases)):
    for j in range(len(negative_cases)):
        if np.array_equal(positive_cases[i], negative_cases[j]):
            print("Duplicate found!")
            print(positive_cases[i])
            print(negative_cases[j])
            break

In [3]:
# Find the number of any duplicates within the positive cases
duplicate_count = 0
for i in range(len(positive_cases)):
    for j in range(i + 1, len(positive_cases)):
        if np.array_equal(positive_cases[i], positive_cases[j]):
            duplicate_count += 1
            break

print("Duplicate count in positive cases: ", duplicate_count)

# Find the number of any duplicates within the negative cases
duplicate_count = 0
for i in range(len(negative_cases)):
    for j in range(i + 1, len(negative_cases)):
        if np.array_equal(negative_cases[i], negative_cases[j]):
            duplicate_count += 1
            break

print("Duplicate count in negative: ", duplicate_count)

Duplicate count in positive cases:  1
Duplicate count in negative:  13


In [None]:
from env import gogame
# Print all the positive cases
for i in range(100, 120):
    print(f"Positive case {i}:")
    print(positive_cases[i])
    print(gogame.str(positive_cases[i], nn_format=True))
    print()

In [3]:
# Positions to consider are 80% of the total positions
if BINARY:
    POSITIONS_TO_CONSIDER = int(0.8 * positive_cases.shape[0] * 2)
    print(f"Positions to consider: {POSITIONS_TO_CONSIDER}")
    #POSITIONS_TO_CONSIDER = 4000 #40000
    VALIDATION_POSITIONS = 10000 #10000
else:
    POSITIONS_TO_CONSIDER = int(0.8 * positive_cases.shape[0])
    print(f"Positions to consider: {POSITIONS_TO_CONSIDER}")
    #POSITIONS_TO_CONSIDER = 4000 #40000
    VALIDATION_POSITIONS = 10000

epochs = 5

Positions to consider: 2000


In [11]:
# First test if the concept can be regressed form the inputs
name = "input"
if BINARY:
    all_cases = np.concatenate([positive_cases, negative_cases])
    all_labels = [1] * positive_cases.shape[0] + [0] * negative_cases.shape[0]
else:
    all_cases = np.array(positive_cases)
    all_labels = negative_cases

all_labels = np.array(all_labels)
shuffled_indices = np.arange(all_labels.shape[0])

np.random.shuffle(shuffled_indices)

all_cases = all_cases[shuffled_indices]
all_labels = all_labels[shuffled_indices]

points = all_cases.reshape(all_cases.shape[0], -1)

# Use the regression
score = linear_regression.perform_regression(
    points=points[:POSITIONS_TO_CONSIDER], 
    targets=all_labels[:POSITIONS_TO_CONSIDER], 
    validation_points=points[POSITIONS_TO_CONSIDER:], 
    validation_targets=all_labels[POSITIONS_TO_CONSIDER:], 
    is_binary=BINARY,
    epochs=epochs,
    verbose=1
)

score = 0 if score < 0 else score

print("Regression score: ", score)

concept_folder_setup_and_score('static', model_type, board_name, session_name, CONCEPT_NAME, name, score)

Regression score:  1.0


In [12]:
from tqdm import tqdm
epochs_to_look_at = agents_to_sample

bar = tqdm(total=len(epochs_to_look_at), desc="Epochs")

for epoch in epochs_to_look_at:
    path = full_model_path + model_name + "_" + str(epoch) + ".keras"
    if resnet:
        model = ResNet(board_size, path)
    else:
        model = ConvNet(board_size, path)

    # Will have a length equal to the sum of the numer of rows in the positive and negative cases arrays
    # And will contain 1s for positive cases and 0s for negative cases
    # Is used as labels/targets for the regression
    if BINARY:
        all_cases = np.concatenate([positive_cases, negative_cases])
        all_labels = [1] * positive_cases.shape[0] + [0] * negative_cases.shape[0]
    else:
        all_cases = np.array(positive_cases)
        all_labels = negative_cases

    all_labels = np.array(all_labels)
    shuffled_indices = np.arange(all_labels.shape[0])

    np.random.shuffle(shuffled_indices)

    all_cases = all_cases[shuffled_indices]
    all_labels = all_labels[shuffled_indices]

    concept_presences = {}
    
    outputs = model.get_all_activation_values(all_cases)

    # Merge outputs
    merged_outputs = []
    for output_batch in outputs:
        for i, output_layer in enumerate(output_batch):
            if len(merged_outputs) <= i:
                merged_outputs.append([])
            merged_outputs[i].extend(output_layer)

    for i, layer_output in enumerate(merged_outputs):
        merged_outputs[i] = np.array(merged_outputs[i])
    
    outputs = merged_outputs

    # Perform regression
    concept_presence_per_layer = []
    for (i, output) in enumerate(outputs):
        points = output.reshape((output.shape[0], np.prod(output.shape[1:])))
        # So one has (n, k) samples where n is the number of positions, and k is the total number of activation values in layer i.
        print(f"Performing regression for layer {i}")
        score = linear_regression.perform_regression(
            points=points[:POSITIONS_TO_CONSIDER], 
            targets=all_labels[:POSITIONS_TO_CONSIDER], 
            validation_points=points[POSITIONS_TO_CONSIDER:], 
            validation_targets=all_labels[POSITIONS_TO_CONSIDER:], 
            is_binary=BINARY,
            epochs=epochs,
            verbose=1
        )
        score = 0 if score < 0 else score
        concept_presence_per_layer.append(score)

        print(f"The presence of {CONCEPT_NAME} in resblock {i} is {score}")
        
    concept_presences[CONCEPT_NAME] = concept_presence_per_layer

    concept_folder_setup_and_score('static', model_type, board_name, session_name, CONCEPT_NAME, epoch, concept_presence_per_layer)

    bar.update()

Getting activation outputs: 100%|██████████| 79/79 [00:02<00:00, 29.77it/s]


Performing regression for layer 0
The presence of difference_in_stones in resblock 0 is 0.9990625451924315
Performing regression for layer 1
The presence of difference_in_stones in resblock 1 is 0.9969729568746891
Performing regression for layer 2
The presence of difference_in_stones in resblock 2 is 0.9988421090721993
Performing regression for layer 3
The presence of difference_in_stones in resblock 3 is 0.9964637082562439
Performing regression for layer 4
The presence of difference_in_stones in resblock 4 is 0.9983095670342893
Performing regression for layer 5


Epochs:  11%|█         | 1/9 [00:11<01:32, 11.59s/it]

The presence of difference_in_stones in resblock 5 is 0.995212551162165
Performing regression for layer 6
The presence of difference_in_stones in resblock 6 is 0.38332185534311625
Performing regression for layer 7
The presence of difference_in_stones in resblock 7 is 0.6588785378253244


Getting activation outputs: 100%|██████████| 79/79 [00:02<00:00, 35.27it/s]


Performing regression for layer 0
The presence of difference_in_stones in resblock 0 is 0.9991232576521979
Performing regression for layer 1
The presence of difference_in_stones in resblock 1 is 0.9974917825129106
Performing regression for layer 2
The presence of difference_in_stones in resblock 2 is 0.9988448345922717
Performing regression for layer 3
The presence of difference_in_stones in resblock 3 is 0.9970028304833823
Performing regression for layer 4
The presence of difference_in_stones in resblock 4 is 0.9986885324237283
Performing regression for layer 5


Epochs:  22%|██▏       | 2/9 [00:22<01:18, 11.19s/it]

The presence of difference_in_stones in resblock 5 is 0.9958142794554615
Performing regression for layer 6
The presence of difference_in_stones in resblock 6 is 0.8322256904160974
Performing regression for layer 7
The presence of difference_in_stones in resblock 7 is 0.841244636077634


Getting activation outputs: 100%|██████████| 79/79 [00:02<00:00, 35.73it/s]


Performing regression for layer 0
The presence of difference_in_stones in resblock 0 is 0.998852699060349
Performing regression for layer 1
The presence of difference_in_stones in resblock 1 is 0.9968805420826016
Performing regression for layer 2
The presence of difference_in_stones in resblock 2 is 0.9984657868804802
Performing regression for layer 3
The presence of difference_in_stones in resblock 3 is 0.9960790679624513
Performing regression for layer 4
The presence of difference_in_stones in resblock 4 is 0.9981176943845582
Performing regression for layer 5


Epochs:  33%|███▎      | 3/9 [00:33<01:06, 11.08s/it]

The presence of difference_in_stones in resblock 5 is 0.9954372031073039
Performing regression for layer 6
The presence of difference_in_stones in resblock 6 is 0.6956825831781093
Performing regression for layer 7
The presence of difference_in_stones in resblock 7 is 0.896903575735313


Getting activation outputs: 100%|██████████| 79/79 [00:02<00:00, 35.22it/s]


Performing regression for layer 0
The presence of difference_in_stones in resblock 0 is 0.9980590792588053
Performing regression for layer 1
The presence of difference_in_stones in resblock 1 is 0.9935400673413503
Performing regression for layer 2
The presence of difference_in_stones in resblock 2 is 0.9949624839687394
Performing regression for layer 3
The presence of difference_in_stones in resblock 3 is 0.9882677894397832
Performing regression for layer 4
The presence of difference_in_stones in resblock 4 is 0.9934229571986651
Performing regression for layer 5


Epochs:  44%|████▍     | 4/9 [00:44<00:55, 11.12s/it]

The presence of difference_in_stones in resblock 5 is 0.9831357594118528
Performing regression for layer 6
The presence of difference_in_stones in resblock 6 is 0.42777446590651325
Performing regression for layer 7
The presence of difference_in_stones in resblock 7 is 0.7987193068804386


Getting activation outputs: 100%|██████████| 79/79 [00:02<00:00, 33.75it/s]


Performing regression for layer 0
The presence of difference_in_stones in resblock 0 is 0.9977146871454431
Performing regression for layer 1
The presence of difference_in_stones in resblock 1 is 0.9911339846221777
Performing regression for layer 2
The presence of difference_in_stones in resblock 2 is 0.9869084475886283
Performing regression for layer 3
The presence of difference_in_stones in resblock 3 is 0.9754977507520883
Performing regression for layer 4
The presence of difference_in_stones in resblock 4 is 0.9798523126771641
Performing regression for layer 5


Epochs:  56%|█████▌    | 5/9 [00:56<00:45, 11.31s/it]

The presence of difference_in_stones in resblock 5 is 0.9664759620303927
Performing regression for layer 6
The presence of difference_in_stones in resblock 6 is 0.5077271380506362
Performing regression for layer 7
The presence of difference_in_stones in resblock 7 is 0.6905368099306837


Getting activation outputs: 100%|██████████| 79/79 [00:02<00:00, 31.24it/s]


Performing regression for layer 0
The presence of difference_in_stones in resblock 0 is 0.9943303354260766
Performing regression for layer 1
The presence of difference_in_stones in resblock 1 is 0.9829310896242587
Performing regression for layer 2
The presence of difference_in_stones in resblock 2 is 0.9745498970742694
Performing regression for layer 3
The presence of difference_in_stones in resblock 3 is 0.964083654521248
Performing regression for layer 4
The presence of difference_in_stones in resblock 4 is 0.9663857691707894
Performing regression for layer 5


Epochs:  67%|██████▋   | 6/9 [01:08<00:34, 11.66s/it]

The presence of difference_in_stones in resblock 5 is 0.9496577251537
Performing regression for layer 6
The presence of difference_in_stones in resblock 6 is 0.42653771070130064
Performing regression for layer 7
The presence of difference_in_stones in resblock 7 is 0.3319112892137006


Getting activation outputs: 100%|██████████| 79/79 [00:03<00:00, 26.00it/s]


Performing regression for layer 0
The presence of difference_in_stones in resblock 0 is 0.9919492762446744
Performing regression for layer 1
The presence of difference_in_stones in resblock 1 is 0.9825470903484311
Performing regression for layer 2
The presence of difference_in_stones in resblock 2 is 0.9797831702820406
Performing regression for layer 3
The presence of difference_in_stones in resblock 3 is 0.9644369030268105
Performing regression for layer 4
The presence of difference_in_stones in resblock 4 is 0.9692467812073357
Performing regression for layer 5


Epochs:  78%|███████▊  | 7/9 [01:28<00:28, 14.28s/it]

The presence of difference_in_stones in resblock 5 is 0.9435638284251274
Performing regression for layer 6
The presence of difference_in_stones in resblock 6 is 0.7545249004444382
Performing regression for layer 7
The presence of difference_in_stones in resblock 7 is 0.27674247139063535


Getting activation outputs: 100%|██████████| 79/79 [00:03<00:00, 26.28it/s]


Performing regression for layer 0
The presence of difference_in_stones in resblock 0 is 0.9931291548315642
Performing regression for layer 1
The presence of difference_in_stones in resblock 1 is 0.9818777594751765
Performing regression for layer 2
The presence of difference_in_stones in resblock 2 is 0.9839776948008256
Performing regression for layer 3
The presence of difference_in_stones in resblock 3 is 0.9681247321596894
Performing regression for layer 4
The presence of difference_in_stones in resblock 4 is 0.9720866551643618
Performing regression for layer 5


Epochs:  89%|████████▉ | 8/9 [01:47<00:15, 15.87s/it]

The presence of difference_in_stones in resblock 5 is 0.9452162907774605
Performing regression for layer 6
The presence of difference_in_stones in resblock 6 is 0.7950242432044783
Performing regression for layer 7
The presence of difference_in_stones in resblock 7 is 0.20578902491283269


Getting activation outputs: 100%|██████████| 79/79 [00:02<00:00, 26.63it/s]


Performing regression for layer 0
The presence of difference_in_stones in resblock 0 is 0.9896802806513596
Performing regression for layer 1
The presence of difference_in_stones in resblock 1 is 0.9800868775612626
Performing regression for layer 2
The presence of difference_in_stones in resblock 2 is 0.9792809397360243
Performing regression for layer 3
The presence of difference_in_stones in resblock 3 is 0.9579685513182179
Performing regression for layer 4
The presence of difference_in_stones in resblock 4 is 0.9577663338811706
Performing regression for layer 5


Epochs: 100%|██████████| 9/9 [02:07<00:00, 17.04s/it]

The presence of difference_in_stones in resblock 5 is 0.9276390430283309
Performing regression for layer 6
The presence of difference_in_stones in resblock 6 is 0.734755771401223
Performing regression for layer 7
The presence of difference_in_stones in resblock 7 is 0.16516636285194053
