In [1]:
import os
import numpy as np

from data_utils import get_tvsd_dataset, load_data, THINGSDataset
from moment_utils import *

DATAROOT = './'
ACTIVATIONS_ROOT = os.path.join(DATAROOT, 'TVSD', 'TVSD_activations')
RESULTS_ROOT = os.path.join(DATAROOT, 'TVSD', 'results')

In [2]:
data = load_data(DATAROOT)

sub_names = ['F', 'N']
roi_names = ['V1', 'V4', 'IT']

roi_data = {}
for sub_name in sub_names:

    roi_data[sub_name] = {}
    for roi_name in roi_names:
        roi = np.asarray(data[sub_name]['df'][roi_name].tolist())
        roi_data[sub_name][roi_name] = roi


# Store the local paths of images
all_image_paths = {}
for name in ['F', 'N']:

    image_paths = {}
    for path in data[name]['df'].things_path:
        cn, fn = path.split('\\')

        if cn not in image_paths:
            image_paths[cn] = [fn]
        else:
            image_paths[cn] += [fn]

    all_image_paths[name] = image_paths
    
for key in image_paths.keys():
    assert np.sum(all_image_paths['F'][key] != all_image_paths['N'][key]) == 0
    
IMAGE_PATHS = image_paths

In [3]:
from pathlib import Path

THINGS_ROOT = os.path.join(DATAROOT, 'THINGS')
THINGS_DATASET = os.path.join(THINGS_ROOT, 'images')
TRAIN_FOLDER = os.path.join(THINGS_DATASET, 'imgs_train')

layer_names_resnet18 = ['conv1', 'maxpool',
                        'layer1.0.conv1', 'layer1.0.conv2', 'layer1.1.conv1', 'layer1.1.conv2',
                        'layer2.0.conv1', 'layer2.0.conv2', 'layer2.1.conv1', 'layer2.1.conv2',
                        'layer3.0.conv1', 'layer3.0.conv2', 'layer3.1.conv1', 'layer3.1.conv2',
                        'layer4.0.conv1', 'layer4.0.conv2', 'layer4.1.conv1', 'layer4.1.conv2',
                        'avgpool', 'fc']

layer_names_cornet_s = ['V1.conv1', 'V1.conv2', 'V2.conv1', 'V2.conv2', 'V2.conv3',
                        'V4.conv1', 'V4.conv2', 'V4.conv3', 'IT.conv1', 'IT.conv2', 'IT.conv3',
                        'decoder.avgpool', 'decoder.linear']

model_dict = {'resnet18': {'layers': layer_names_resnet18,
                           'source': 'torchvision'},
              'cornet-s': {'layers': layer_names_cornet_s,
                           'source': 'custom'}
              }

In [4]:
def extract_things_activations(model_name, layer_name):

    model_info = model_dict[model_name]
    layer_names = model_info['layers']
    source = model_info['source']

    assert layer_name in layer_names, f"Layer {layer_name} not in model {model_name}."

    import torch
    from torch.utils.data import DataLoader

    from thingsvision import get_extractor
    from thingsvision.utils.storing import save_features

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    batch_size = 64

    extractor = get_extractor(model_name=model_name,
                              source=source,
                              device=device,
                              pretrained=True
                              )
    extractor.show_model()

    dataset = THINGSDataset(root=TRAIN_FOLDER,
                            path_dict=IMAGE_PATHS,
                            transform=extractor.get_transformations()
                            )

    batches = DataLoader(dataset=dataset,
                         batch_size=batch_size,
                         shuffle=False,
                         num_workers=8,
                         pin_memory=True,
                         pin_memory_device=device,
                         )

    print(f"Processing: {model_name} {layer_name}")
    save_folder = Path(ACTIVATIONS_ROOT) / 'models' / model_name
    save_path = save_folder / layer_name
    features = extractor.extract_features(batches=batches,
                                          module_name=layer_name,
                                          flatten_acts=True,
                                          output_type='tensor'
                                          )
    save_features(features, out_path=save_path, file_format='npy')

    return features.numpy()


def get_things_activations(model_name, layer_name):

    model_info = model_dict[model_name]
    layer_names = model_info['layers']
    source = model_info['source']

    save_folder = Path(ACTIVATIONS_ROOT) / 'models' / model_name
    save_path = save_folder / layer_name

    if os.path.isfile(save_path / f'features.npy'):
        print('Loading activations for layer:', model_name, layer_name)
        features = np.load(save_path / f'features.npy')
    else:
        features = extract_things_activations(model_name, layer_name)

    return features


model_name = 'cornet-s'
layer_name = 'IT.conv2'

acts = get_things_activations(model_name, layer_name)

In [10]:
P = 2000
Q = 100
num_repeat = 200

filename = f'./all_ckas_P_{P}_Q_{Q}_rep_{num_repeat}.npz'

if not os.path.isfile(filename):

    all_data = {}
    for sub_name in sub_names:
        subject_data = {}
        for model_name in model_dict.keys():

            layers = model_dict[model_name]['layers']

            model_subject_ckas = {key: [] for key in roi_names}
            for layer_name in layers:

                Psi = get_things_activations(model_name, layer_name)

                for roi_name in roi_names:

                    Phi = roi_data[sub_name][roi_name]

                    print('\r{} {}, {}, {}'.format(sub_name, roi_name, model_name, layer_name))

                    trials = []
                    for i in range(num_repeat):

                        idx_P = np.random.choice(np.arange(Phi.shape[0]), P, replace=False)
                        idx_Qa = np.random.choice(np.arange(Phi.shape[1]), Q, replace=False)
                        idx_Qb = np.random.choice(np.arange(Psi.shape[1]), Q, replace=False)

                        Phi_a = Phi[:, idx_Qa][idx_P, :]
                        Phi_b = Psi[:, idx_Qb][idx_P, :]

                        # Calculate estimated numerator using Naive, Song and ours
                        num = list(getest_all(Phi_a, Phi_b, indep_cols=True))

                        # Calculate estimated denominators using Naive, Song and ours
                        denom1 = list(getest_all(Phi_a, Phi_a, indep_cols=False))
                        denom2 = list(getest_all(Phi_b, Phi_b, indep_cols=False))

                        trials.append([num, denom1, denom2])

                    model_subject_ckas[roi_name].append(trials)
                subject_data[model_name] = model_subject_ckas
            all_data[sub_name] = subject_data

    np.savez(filename, all_data=all_data)

In [None]:
P_total = 22248
P = 2000
Q = 5000
num_repeat = 100

Q_n_ratio = 1/16

filename = f"./all_models_Pl_{P}_Qb_{Q}_inv_Qratio_{int(1/Q_n_ratio)}_rep_{num_repeat}.npz"

np.random.seed(0)
idx_P = np.random.choice(np.arange(P_total), P, replace=False)

if not os.path.isfile(filename):

    all_data = {}
    for sub_name in sub_names:
        subject_data = {}
        for model_name in model_dict.keys():

            layers = model_dict[model_name]['layers']

            model_subject_ckas = {key: [] for key in roi_names}
            for layer_name in layers:

                Psi = get_things_activations(model_name, layer_name)

                for roi_name in roi_names:

                    Phi = roi_data[sub_name][roi_name]
                    Pa, Qa = Phi.shape

                    print('\r{} {}, {}, {}'.format(sub_name, roi_name, model_name, layer_name))

                    trials = []
                    for i in range(num_repeat):

                        # idx_P = np.random.choice(np.arange(Phi.shape[0]), P, replace=False)
                        idx_Qa = np.random.choice(np.arange(Phi.shape[1]), int(Qa*Q_n_ratio), replace=False)
                        idx_Qb = np.random.choice(np.arange(Psi.shape[1]), Q, replace=False)

                        Phi_a = Phi[:, idx_Qa][idx_P, :]
                        Phi_b = Psi[:, idx_Qb][idx_P, :]

                        # Calculate estimated numerator using Naive, Song and ours
                        num = list(getest_all(Phi_a, Phi_b, indep_cols=True))

                        # Calculate estimated denominators using Naive, Song and ours
                        denom1 = list(getest_all(Phi_a, Phi_a, indep_cols=False))
                        denom2 = list(getest_all(Phi_b, Phi_b, indep_cols=False))

                        trials.append([num, denom1, denom2])

                    model_subject_ckas[roi_name].append(trials)
                subject_data[model_name] = model_subject_ckas
            all_data[sub_name] = subject_data

    np.savez(filename, all_data=all_data)

In [None]:
layer_for_roi = {'resnet18': {'V1': layer_names_resnet18[1],
                              'V4': layer_names_resnet18[10],
                              'IT': layer_names_resnet18[14]},
                 'cornet-s': {'V1': layer_names_cornet_s[1],
                              'V4': layer_names_cornet_s[2],
                              'IT': layer_names_cornet_s[5]}}

P_total = 22248
P = 2000
Q = 5000
num_repeat = 100

Q_neuron_ratios = np.power(1/2, np.linspace(4, 1, 6))

filename = f"./all_models_Pl_{P}_Qb_{Q}_sweep_Qnratio_rep_{num_repeat}.npz"

np.random.seed(0)
idx_P = np.random.choice(np.arange(P_total), P, replace=False)

if not os.path.isfile(filename):

    all_data = {}
    for sub_name in sub_names:
        subject_data = {}
        for model_name in model_dict.keys():

            model_subject_ckas = {key: [] for key in roi_names}
            for Q_n_ratio in Q_neuron_ratios:

                for roi_name in roi_names:

                    layer_name = layer_for_roi[model_name][roi_name]
                    Psi = get_things_activations(model_name, layer_name)
                    Phi = roi_data[sub_name][roi_name]
                    Pa, Qa = Phi.shape

                    print('\r{} {}, {}, {:.3f}'.format(sub_name, roi_name, model_name, Q_n_ratio))

                    trials = []
                    for i in range(num_repeat):

                        # idx_P = np.random.choice(np.arange(Phi.shape[0]), P, replace=False)
                        idx_Qa = np.random.choice(np.arange(Phi.shape[1]), int(Qa*Q_n_ratio), replace=False)
                        idx_Qb = np.random.choice(np.arange(Psi.shape[1]), Q, replace=False)

                        Phi_a = Phi[:, idx_Qa][idx_P, :]
                        Phi_b = Psi[:, idx_Qb][idx_P, :]

                        # Calculate estimated numerator using Naive, Song and ours
                        num = list(getest_all(Phi_a, Phi_b, indep_cols=True))

                        # Calculate estimated denominators using Naive, Song and ours
                        denom1 = list(getest_all(Phi_a, Phi_a, indep_cols=False))
                        denom2 = list(getest_all(Phi_b, Phi_b, indep_cols=False))

                        trials.append([num, denom1, denom2])

                    model_subject_ckas[roi_name].append(trials)
                subject_data[model_name] = model_subject_ckas
            all_data[sub_name] = subject_data

    Q_lists = {}
    for sub_name in sub_names:
        for roi_name in roi_names:
            Phi = roi_data[sub_name][roi_name]
            Pa, Qa = Phi.shape
            Q_lists[(sub_name, roi_name)] = [int(Qa*Q_n_ratio) for Q_n_ratio in Q_neuron_ratios]

    np.savez(filename, all_data=all_data, Q_lists=Q_lists)