# Generate Activations of ImageNet Subset - for ALL feature maps
# ---all feature maps and target class saved in one file---

## Install lucid

In [None]:
# !sudo pip3 install lucid==0.3.8

## Imports

In [None]:
# general imports
import numpy as np
import random
import scipy.ndimage as nd
import tensorflow as tf
import os
import csv
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# lucid imports
import lucid.modelzoo.vision_models as models
from render import import_model

In [None]:
# custom imports
import occlusion_utils as ut

In [None]:
# for debugging
import time

## Load experiment specification

In [None]:
# for testing and debugging
objective_list = ["channel"]#, "neuron"]
n_batches_stop = 1171
stimuli_dir = "$DATAPATH/all_activations_imagenet_train" 
trial_type = "sampled_trials"

## Load model

In [None]:
# import IneptionV1 from the Lucid modelzoo
model = models.InceptionV1()
model.load_graphdef()

## Parameters

In [None]:
image_type = "natural"

In [None]:
tf.set_random_seed(1234)

In [None]:
# setting seeds
random.seed(0)
np.random.seed(0)

In [None]:
# choose parameters for data
val_or_train = 'train'

## Load data with pytorch

In [None]:
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from tqdm import tqdm

In [None]:
class ImageFolderWithPaths(datasets.ImageFolder):
    """Custom dataset that includes image file paths. Extends
    torchvision.datasets.ImageFolder
    """

    # override the __getitem__ method. this is the method dataloader calls
    def __getitem__(self, index):
        # this is what ImageFolder normally returns 
        original_tuple = super(ImageFolderWithPaths, self).__getitem__(index)
        # the image file path
        path = self.imgs[index][0]
        # make a new tuple that includes original and the path
        tuple_with_path = (original_tuple + (path,))
        return tuple_with_path

In [None]:
batch_size = 512

In [None]:
# import ImageNet
datapath = '$PATHTOIMAEGNET/'

# get data
data_dir = os.path.join(datapath, val_or_train)

# make deterministic
torch.manual_seed(1234)

# preprocessing (corresponds to ResNet)
this_dataset = ImageFolderWithPaths(data_dir, transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor()
    ]))

data_loader = torch.utils.data.DataLoader(
    this_dataset,
    batch_size=batch_size, 
    shuffle=True, # TODO: determine shuffle! If images are determined over whole dataset, False is ok. Otherwise True.
    num_workers=32, 
    pin_memory=True)

In [None]:
torch.set_grad_enabled(True)  # save memory and computation cost by not calculating the grad

# Run it!

In [None]:
# read in unit specifications from csv into pandas dataframe
path_to_csv_file = os.path.join(stimuli_dir, f"layer_folder_mapping_{trial_type}.csv")
unit_specs_df = pd.read_csv(path_to_csv_file, header=1)

In [None]:
# for ALL feature maps
# make directories until layer-level
for objective_i in objective_list:
    for layer_number in range(10):
        cur_dir = os.path.join(stimuli_dir, objective_i, "sampled_trials", f"layer_{layer_number}")
        if not os.path.exists(cur_dir):
            os.makedirs(cur_dir)

In [None]:
if 'session' in locals() and session is not None:
    print('Close interactive session')
    session.close()

In [None]:

# for all but last batch
# for ALL feature maps
# different structure: csv saves list of values
# for whole dataset: save relevant (according to neuron or channel objective) activation to csv
with tf.Graph().as_default() as graph, tf.Session() as sess:
    
    image = tf.placeholder(tf.float32, shape=(batch_size, 224, 224, 3)) 
    model_instance = import_model(model, image)
    tf_activations_list, unique_layer_str_list = ut.get_tf_activations_list_whole_net(model_instance, unit_specs_df)    

    layer_dfs = [[] for _ in unique_layer_str_list]

    # loop through batches
    for batch_number, (images, targets, paths) in enumerate(tqdm(data_loader, total=len(data_loader.dataset) // batch_size)):
        start_time = time.time()
        if batch_number == n_batches_stop or batch_number == len(data_loader)-1:
            last_batchs_batch_size = images.shape[0]
            print(f"breaking at {batch_number}")
            break
        
        # forward pass
        images_np_transformed = images.numpy().transpose(0,2,3,1)
        activations_list = sess.run(tf_activations_list, {image: images_np_transformed}) # batch_size, x, y, number_feature_maps

        # save it!
        # loop through layers
        for layer_idx, cur_layer_str in enumerate(unique_layer_str_list):
            activations_np = activations_list[layer_idx]
            
            # loop through objectives # TODO: remove after decision which objective to use
            for objective_i in objective_list:
                unit_activations = ut.get_activation_according_to_objective(objective_i, activations_np, np.arange(0,activations_np.shape[-1]))
                unit_activations = unit_activations.astype(np.float16)
                # write activation to csv
                layer_dfs[layer_idx] += ut.create_unit_activations_dataset_rows(unit_activations, paths, targets)

In [None]:
for layer_idx, layer_df in enumerate(layer_dfs):
    layer_df = pd.DataFrame(layer_df)
    activations_whole_dataset_csv = "activations_whole_dataset.csv"
    activations_whole_dataset_pkl = "activations_whole_dataset.pkl"
    layer_dir = os.path.join(stimuli_dir, objective_i, "sampled_trials", f"layer_{layer_idx}")
    path_activations_whole_dataset_csv = os.path.join(layer_dir, activations_whole_dataset_csv)
    path_activations_whole_dataset_pkl = os.path.join(layer_dir, activations_whole_dataset_pkl)
    layer_df.to_csv(path_activations_whole_dataset_csv)
    layer_df.to_pickle(path_activations_whole_dataset_pkl)

print('Done!!!')