### Notebook for concept detection in neural network

In [15]:
%load_ext autoreload
%autoreload 2

In [1]:
import numpy as np
import os
import sys

sys.path.append(os.path.abspath(os.path.join(os.path.pardir, 'src')))

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # or any {'0', '1', '2'}

from concepts import static_concepts, linear_regression, generate_static_concept_datasets
from policy import ConvNet, ResNet
from utils import concept_folder_setup_and_score

import tensorflow as tf

# Set memory growth
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

reward_function = "zero_sum" # "concept_fn", "zero_sum" or "jem"
model_name = "net"
session_name = "probing"
board_size = 7
board_name = f'{board_size}x{board_size}'
resnet = True

model_type = "resnet" if resnet else "convnet"

agents_to_sample = [0, 10, 50, 100, 300, 500, 600, 800, 1000]

full_model_path = f"../models/saved_sessions/{model_type}/{reward_function}/board_size_{board_size}/{session_name}/"

CONCEPT_FUNC = static_concepts.difference_in_stones

CONCEPT_NAME = static_concepts.difference_in_stones.__name__

BINARY = False

CASES_TO_SAMPLE = 2500 # 25000

SAMPLE_RATIO = 0.5

# Load the models
def load_model(full_name, model_name, epoch):
    model_path = full_name + model_name + "_" + str(epoch) + ".keras"
    if resnet:
        model = ResNet(board_size, model_path)
    else:
        model = ConvNet(board_size, model_path)
    return model

agents = [load_model(full_model_path, model_name, epoch) for epoch in agents_to_sample]

In [2]:
positive_cases, negative_cases = generate_static_concept_datasets(CASES_TO_SAMPLE, agents, board_size, CONCEPT_FUNC, sample_ratio=SAMPLE_RATIO, binary=BINARY)

# Create numpy arrays
positive_cases = np.array(positive_cases)
negative_cases = np.array(negative_cases) # Target cases if continuous

Continuous cases for concept 'difference_in_stones': 3519it [05:04, 11.55it/s]                          


In [None]:
# Print shapes
print("Positive cases: ", positive_cases.shape)
print("Negative cases: ", negative_cases.shape)

In [6]:
if not BINARY:
    print("Continuous case - skipping duplicates check")
else:
    # Test if there are any duplicates accross the two sets
    for i in range(len(positive_cases)):
        for j in range(len(negative_cases)):
            if np.array_equal(positive_cases[i], negative_cases[j]):
                print("Duplicate found!")
                print(positive_cases[i])
                print(negative_cases[j])
                break

In [None]:
if not BINARY:
    print("Continuous case - skipping duplicates check")
else:
    # Find the number of any duplicates within the positive cases
    duplicate_count = 0
    for i in range(len(positive_cases)):
        for j in range(i + 1, len(positive_cases)):
            if np.array_equal(positive_cases[i], positive_cases[j]):
                duplicate_count += 1
                break

    print("Duplicate count in positive cases: ", duplicate_count)

    # Find the number of any duplicates within the negative cases
    duplicate_count = 0
    for i in range(len(negative_cases)):
        for j in range(i + 1, len(negative_cases)):
            if np.array_equal(negative_cases[i], negative_cases[j]):
                duplicate_count += 1
                break

    print("Duplicate count in negative: ", duplicate_count)

In [None]:
from env import gogame
# Print all the positive cases
for i in range(0, 10):
    print(f"Positive case {i}:")
    print(positive_cases[i])
    print(gogame.str(positive_cases[i], nn_format=True))
    print()

In [None]:
from env import gogame

if not BINARY:
    # Print random cases from the negative cases
    for i in range(100, 120):
        print(negative_cases[i])
        print(f'Target: {negative_cases[i]}')
        print(gogame.str(negative_cases[i], nn_format=True))
        print()

In [3]:
# Positions to consider are 80% of the total positions
if BINARY:
    POSITIONS_TO_CONSIDER = int(0.8 * positive_cases.shape[0] * 2)
    print(f"Positions to consider: {POSITIONS_TO_CONSIDER}")
    #POSITIONS_TO_CONSIDER = 4000 #40000
    VALIDATION_POSITIONS = 10000 #10000
else:
    POSITIONS_TO_CONSIDER = int(0.8 * positive_cases.shape[0])
    print(f"Positions to consider: {POSITIONS_TO_CONSIDER}")
    #POSITIONS_TO_CONSIDER = 4000 #40000
    VALIDATION_POSITIONS = 10000

epochs = 5

Positions to consider: 2000


In [4]:
# First test if the concept can be regressed form the inputs
name = "input"
if BINARY:
    all_cases = np.concatenate([positive_cases, negative_cases])
    all_labels = [1] * positive_cases.shape[0] + [0] * negative_cases.shape[0]
else:
    all_cases = np.array(positive_cases)
    all_labels = negative_cases

all_labels = np.array(all_labels)
shuffled_indices = np.arange(all_labels.shape[0])

np.random.shuffle(shuffled_indices)

all_cases = all_cases[shuffled_indices]
all_labels = all_labels[shuffled_indices]

points = all_cases.reshape(all_cases.shape[0], -1)

# Use the regression
score = linear_regression.perform_regression(
    points=points[:POSITIONS_TO_CONSIDER], 
    targets=all_labels[:POSITIONS_TO_CONSIDER], 
    validation_points=points[POSITIONS_TO_CONSIDER:], 
    validation_targets=all_labels[POSITIONS_TO_CONSIDER:], 
    is_binary=BINARY,
    epochs=epochs,
    verbose=1
)

score = 0 if score < 0 else score

print("Regression score: ", score)

concept_folder_setup_and_score('static', model_type, board_name, session_name, CONCEPT_NAME, name, score)

Regression score:  1.0


In [5]:
from tqdm import tqdm
epochs_to_look_at = agents_to_sample

bar = tqdm(total=len(epochs_to_look_at), desc="Epochs")

for epoch in epochs_to_look_at:
    path = full_model_path + model_name + "_" + str(epoch) + ".keras"
    if resnet:
        model = ResNet(board_size, path)
    else:
        model = ConvNet(board_size, path)

    # Will have a length equal to the sum of the numer of rows in the positive and negative cases arrays
    # And will contain 1s for positive cases and 0s for negative cases
    # Is used as labels/targets for the regression
    if BINARY:
        all_cases = np.concatenate([positive_cases, negative_cases])
        all_labels = [1] * positive_cases.shape[0] + [0] * negative_cases.shape[0]
    else:
        all_cases = np.array(positive_cases)
        all_labels = negative_cases

    all_labels = np.array(all_labels)
    shuffled_indices = np.arange(all_labels.shape[0])

    np.random.shuffle(shuffled_indices)

    all_cases = all_cases[shuffled_indices]
    all_labels = all_labels[shuffled_indices]

    concept_presences = {}
    
    outputs = model.get_all_activation_values(all_cases)

    # Merge outputs
    merged_outputs = []
    for output_batch in outputs:
        for i, output_layer in enumerate(output_batch):
            if len(merged_outputs) <= i:
                merged_outputs.append([])
            merged_outputs[i].extend(output_layer)

    for i, layer_output in enumerate(merged_outputs):
        merged_outputs[i] = np.array(merged_outputs[i])
    
    outputs = merged_outputs

    # Perform regression
    concept_presence_per_layer = []
    for (i, output) in enumerate(outputs):
        points = output.reshape((output.shape[0], np.prod(output.shape[1:])))
        # So one has (n, k) samples where n is the number of positions, and k is the total number of activation values in layer i.
        print(f"Performing regression for layer {i}")
        score = linear_regression.perform_regression(
            points=points[:POSITIONS_TO_CONSIDER], 
            targets=all_labels[:POSITIONS_TO_CONSIDER], 
            validation_points=points[POSITIONS_TO_CONSIDER:], 
            validation_targets=all_labels[POSITIONS_TO_CONSIDER:], 
            is_binary=BINARY,
            epochs=epochs,
            verbose=1
        )
        score = 0 if score < 0 else score
        concept_presence_per_layer.append(score)

        print(f"The presence of {CONCEPT_NAME} in resblock {i} is {score}")
        
    concept_presences[CONCEPT_NAME] = concept_presence_per_layer

    concept_folder_setup_and_score('static', model_type, board_name, session_name, CONCEPT_NAME, epoch, concept_presence_per_layer)

    bar.update()

Getting activation outputs: 100%|██████████| 79/79 [00:02<00:00, 28.69it/s]


Performing regression for layer 0
The presence of difference_in_stones in resblock 0 is 0.9988720214318776
Performing regression for layer 1
The presence of difference_in_stones in resblock 1 is 0.9956920914021364
Performing regression for layer 2
The presence of difference_in_stones in resblock 2 is 0.9984781394303233
Performing regression for layer 3
The presence of difference_in_stones in resblock 3 is 0.9952664507929939
Performing regression for layer 4
The presence of difference_in_stones in resblock 4 is 0.9980416037401231
Performing regression for layer 5


Epochs:  11%|█         | 1/9 [00:12<01:36, 12.08s/it]

The presence of difference_in_stones in resblock 5 is 0.9938543009446112
Performing regression for layer 6
The presence of difference_in_stones in resblock 6 is 0.4248891861648454
Performing regression for layer 7
The presence of difference_in_stones in resblock 7 is 0.5548650167787619


Getting activation outputs: 100%|██████████| 79/79 [00:02<00:00, 32.73it/s]


Performing regression for layer 0
The presence of difference_in_stones in resblock 0 is 0.9984146089603839
Performing regression for layer 1
The presence of difference_in_stones in resblock 1 is 0.9954907257099095
Performing regression for layer 2
The presence of difference_in_stones in resblock 2 is 0.9981039142165937
Performing regression for layer 3
The presence of difference_in_stones in resblock 3 is 0.993636416222921
Performing regression for layer 4
The presence of difference_in_stones in resblock 4 is 0.9977512162532962
Performing regression for layer 5


Epochs:  22%|██▏       | 2/9 [00:23<01:22, 11.76s/it]

The presence of difference_in_stones in resblock 5 is 0.993035740772833
Performing regression for layer 6
The presence of difference_in_stones in resblock 6 is 0.7781201894744492
Performing regression for layer 7
The presence of difference_in_stones in resblock 7 is 0.7736391963486167


Getting activation outputs: 100%|██████████| 79/79 [00:02<00:00, 32.47it/s]


Performing regression for layer 0
The presence of difference_in_stones in resblock 0 is 0.9988528450711136
Performing regression for layer 1
The presence of difference_in_stones in resblock 1 is 0.9960524298135736
Performing regression for layer 2
The presence of difference_in_stones in resblock 2 is 0.998466126735921
Performing regression for layer 3
The presence of difference_in_stones in resblock 3 is 0.9946252791670271
Performing regression for layer 4
The presence of difference_in_stones in resblock 4 is 0.9979729126322778
Performing regression for layer 5


Epochs:  33%|███▎      | 3/9 [00:35<01:10, 11.67s/it]

The presence of difference_in_stones in resblock 5 is 0.9946838502457951
Performing regression for layer 6
The presence of difference_in_stones in resblock 6 is 0.6869179042031477
Performing regression for layer 7
The presence of difference_in_stones in resblock 7 is 0.8643041943954065


Getting activation outputs: 100%|██████████| 79/79 [00:02<00:00, 33.17it/s]


Performing regression for layer 0
The presence of difference_in_stones in resblock 0 is 0.9982554171934095
Performing regression for layer 1
The presence of difference_in_stones in resblock 1 is 0.9923798671748502
Performing regression for layer 2
The presence of difference_in_stones in resblock 2 is 0.9949013280844076
Performing regression for layer 3
The presence of difference_in_stones in resblock 3 is 0.9904849748806832
Performing regression for layer 4
The presence of difference_in_stones in resblock 4 is 0.9924838472485189
Performing regression for layer 5


Epochs:  44%|████▍     | 4/9 [00:46<00:58, 11.65s/it]

The presence of difference_in_stones in resblock 5 is 0.9836313399700415
Performing regression for layer 6
The presence of difference_in_stones in resblock 6 is 0.3620453163989912
Performing regression for layer 7
The presence of difference_in_stones in resblock 7 is 0.7609115400287976


Getting activation outputs: 100%|██████████| 79/79 [00:02<00:00, 33.46it/s]


Performing regression for layer 0
The presence of difference_in_stones in resblock 0 is 0.9972284302290596
Performing regression for layer 1
The presence of difference_in_stones in resblock 1 is 0.9914519718352478
Performing regression for layer 2
The presence of difference_in_stones in resblock 2 is 0.9845822318073667
Performing regression for layer 3
The presence of difference_in_stones in resblock 3 is 0.9763849548643112
Performing regression for layer 4
The presence of difference_in_stones in resblock 4 is 0.9779345175846151
Performing regression for layer 5


Epochs:  56%|█████▌    | 5/9 [00:58<00:46, 11.66s/it]

The presence of difference_in_stones in resblock 5 is 0.9592506596610932
Performing regression for layer 6
The presence of difference_in_stones in resblock 6 is 0.4691942464214506
Performing regression for layer 7
The presence of difference_in_stones in resblock 7 is 0.6934863627269162


Getting activation outputs: 100%|██████████| 79/79 [00:02<00:00, 33.68it/s]


Performing regression for layer 0
The presence of difference_in_stones in resblock 0 is 0.9925496139983698
Performing regression for layer 1
The presence of difference_in_stones in resblock 1 is 0.9814508608870064
Performing regression for layer 2
The presence of difference_in_stones in resblock 2 is 0.9639496042800275
Performing regression for layer 3
The presence of difference_in_stones in resblock 3 is 0.9451449222428385
Performing regression for layer 4
The presence of difference_in_stones in resblock 4 is 0.9533314566013488
Performing regression for layer 5


Epochs:  67%|██████▋   | 6/9 [01:10<00:35, 11.73s/it]

The presence of difference_in_stones in resblock 5 is 0.9227247212830348
Performing regression for layer 6
The presence of difference_in_stones in resblock 6 is 0.4969554013141282
Performing regression for layer 7
The presence of difference_in_stones in resblock 7 is 0.3076537812826845


Getting activation outputs: 100%|██████████| 79/79 [00:02<00:00, 26.69it/s]


Performing regression for layer 0
The presence of difference_in_stones in resblock 0 is 0.9909294233367105
Performing regression for layer 1
The presence of difference_in_stones in resblock 1 is 0.9813806638025661
Performing regression for layer 2
The presence of difference_in_stones in resblock 2 is 0.9791227723848356
Performing regression for layer 3
The presence of difference_in_stones in resblock 3 is 0.9624038344041701
Performing regression for layer 4
The presence of difference_in_stones in resblock 4 is 0.9680744028882156
Performing regression for layer 5


Epochs:  78%|███████▊  | 7/9 [01:30<00:28, 14.46s/it]

The presence of difference_in_stones in resblock 5 is 0.9478175334557489
Performing regression for layer 6
The presence of difference_in_stones in resblock 6 is 0.788715376761382
Performing regression for layer 7
The presence of difference_in_stones in resblock 7 is 0.2558723390407809


Getting activation outputs: 100%|██████████| 79/79 [00:03<00:00, 24.72it/s]


Performing regression for layer 0
The presence of difference_in_stones in resblock 0 is 0.9921267845471928
Performing regression for layer 1
The presence of difference_in_stones in resblock 1 is 0.9838192064173246
Performing regression for layer 2
The presence of difference_in_stones in resblock 2 is 0.980522994722465
Performing regression for layer 3
The presence of difference_in_stones in resblock 3 is 0.9581642942812386
Performing regression for layer 4
The presence of difference_in_stones in resblock 4 is 0.9687378054687441
Performing regression for layer 5


Epochs:  89%|████████▉ | 8/9 [01:50<00:16, 16.39s/it]

The presence of difference_in_stones in resblock 5 is 0.9379759438814652
Performing regression for layer 6
The presence of difference_in_stones in resblock 6 is 0.7780874126460997
Performing regression for layer 7
The presence of difference_in_stones in resblock 7 is 0.22364389781686145


Getting activation outputs: 100%|██████████| 79/79 [00:03<00:00, 25.21it/s]


Performing regression for layer 0
The presence of difference_in_stones in resblock 0 is 0.9920084772379316
Performing regression for layer 1
The presence of difference_in_stones in resblock 1 is 0.9812171712829092
Performing regression for layer 2
The presence of difference_in_stones in resblock 2 is 0.9812164440412986
Performing regression for layer 3
The presence of difference_in_stones in resblock 3 is 0.9701758151383912
Performing regression for layer 4
The presence of difference_in_stones in resblock 4 is 0.9716616726375378
Performing regression for layer 5


Epochs: 100%|██████████| 9/9 [02:10<00:00, 17.46s/it]

The presence of difference_in_stones in resblock 5 is 0.9463226780828125
Performing regression for layer 6
The presence of difference_in_stones in resblock 6 is 0.7906450590092879
Performing regression for layer 7
The presence of difference_in_stones in resblock 7 is 0.2750066804423481
