In [None]:
# filename: mcap_demo.ipynb
# source activate mcap_demo

# set path to MindCaptioning directory
rootPath = './'

import os
import sys
import torch
import time

# add path to code
sys.path.append(rootPath+'code/python/util/')
from thutil4 import getFN, getDN, setdir, fix_seed,randsample
import mcap_utils_demo as mu

gpu_use = 1
if gpu_use:
    gpu_id = '0'
    print('Start script: gpu device:%s' %(gpu_id))
    os.environ['CUDA_VISIBLE_DEVICES'] = gpu_id
else:
    os.environ['CUDA_VISIBLE_DEVICES'] = ""
device = "cuda" if torch.cuda.is_available() and gpu_use == 1 else "cpu"
print('gpu availability:%d'%(torch.cuda.is_available()))

savdir_general    = rootPath + 'res/text_generation/'
decfeat_dir       = rootPath + 'res/decoding/'
LMmodeldir        = rootPath + 'data/model/'
normparam_dat_dir = rootPath + 'data/feature/norm_param/'
capdata_dir       = rootPath + 'data/caption/'

# set proxy if necessary
proxies = {
    "http": "",
    "https": "",
}


In [None]:
# model, normalization, caption_data and parameter preparations

# setting model
# select MLM from ['bert-base-cased','bert-base-uncased','bert-large-cased','bert-large-uncased','bert-large-uncased-whole-word-masking','bert-large-cased-whole-word-masking','roberta-base','roberta-large','deberta-large-feedback']
# you can test untrained model by addiing "_untrained" (e.g., 'roberta-large_untrained')
MLMType = 'roberta-large' 

# select LM for feature extraction from ['bert-base-uncased','bert-large-uncased','bert-base-cased','bert-large-cased','bert-large-uncased-whole-word-masking','bert-large-cased-whole-word-masking','openai-gpt','gpt2','gpt2-medium','gpt2-large','gpt2-xl','xlnet-base-cased','xlnet-large-cased','roberta-base','roberta-large','distilbert-base-uncased','distilbert-base-cased','distilgpt2','albert-base-v1','albert-large-v1','albert-xlarge-v1','albert-xxlarge-v1','albert-base-v2','albert-large-v2','albert-xlarge-v2','albert-xxlarge-v2','t5-small','t5-base','t5-large','bart-base','bart-large','ctrl','xlm-mlm-17-1280','xlm-mlm-100-1280','electra','xlm-roberta-base','xlm-roberta-large','clip_l','sgpt','deberta-base','deberta-large','deberta-xlarge']
LMType = 'deberta-large'

# initialize
start = time.time()

# load pre-trained masked language model
tokenizer, model = mu.load_mlm_model(LMmodeldir, MLMType, proxies, device)
# load feature computation model
tokenizer_lm, model_lm, nlayers = mu.load_lm_model(LMmodeldir, LMType, proxies, device)
# set evaluation mode
model.eval(),model_lm.eval()

# prepre skip tokens, if any
skip_token_ids_mlm = mu.set_skip_token_ids(tokenizer, speficied_skip_tokens=[], include_special_token=True)
skip_token_ids_lm = mu.set_skip_token_ids(tokenizer_lm, speficied_skip_tokens=[], include_special_token=True)

# set parameters
params = {
    'nItr': 100,
    'metricType': 'corr',
    'do_norm': 1,
    'beamwidth': 5,
    'nMaskCands': 5,
    'nMaskPerSentence': 2,
    'nGram4Mask': 3,
    'multiMaskType':'forward_seq',
    'maskingUnitType':'token',
    'add_insert_mask': 1,
    'mLayerType': 'vstack',
    'optimal_th': 0.001,
    'topk': 5,
    'max_batch_samp': 200,
    'length_penalty_type':'token',
    'length_penalty_w': 0.10,
    'mlmscoreType': 'modified',
    'mlm_sampling_type': 'sampling',
    'mlms_fix_weight': 0,
    'nMax_MLMs_cands':5000,
    'do_reflesh': 1,
    'reflesh_th': [10,0.1,5,0.00],
    'add_mask_removal': False,
    'layerIdx': range(0,nlayers),
    'device':device,
}

# load normalization parameters
normparam_path = f"{normparam_dat_dir}/{LMType}/"
feat_mu_all, feat_sd_all = mu.prepare_norm_params(normparam_path, nlayers, device=device)

# load caption data
caps_data = mu.load_caption_data(capdata_dir, ['ck20'])
nCapEach = 20



In [None]:
# Text generation from an arbitrary word sequence
# You can test arbitrary word sequences to examine the effectiveness of our method
target_sentence = 'Five apples are on the table.'
target_sentence = 'In the beginning God created the heavens and the earth.'
target_sentence = 'Imagination is more important than knowledge.'
target_sentence = 'To be, or not to be, that is the question.'
target_sentence = 'May the Force be with you.'

# extract semantic features
feat_target = mu.compute_sentence_feature_patterns_wrapper([target_sentence], model_lm, tokenizer_lm, skip_token_ids=skip_token_ids_lm, do_norm=params['do_norm'], feat_mu_all=feat_mu_all, feat_sd_all=feat_sd_all, device=device, layerIdx=params['layerIdx'], max_batch_samp=params['max_batch_samp'])[0][0]

# Start optimization
best_cands, scores_all, scores_eval_all = mu.text_optimization_steps(feat_target, feat_mu_all, feat_sd_all, model, tokenizer, skip_token_ids_mlm, model_lm, tokenizer_lm, skip_token_ids_lm, params, device)



In [None]:
# Text generation from brain-decoded features
# if you have finished creating/downloading decoded features, you can use brain-decoded features for text generation

# parameters [CHANGE here]
dataType = 'testPerception' # ['testPerception','testImagery','trainPerception']
sbj = 'S1'
roiType = 'WB'
decfeat_path = f"{decfeat_dir}/{dataType}/{LMType}/{sbj}/{roiType}/"
decsampidx = 6 # [0-71 for 'testPerception' and 'testImagery'; 0-2107 for 'trainPerception']

# extract semantic features
videoidx = mu.prepare_label_parameters(decsampidx, decfeat_path, device)[0]

feat_target = mu.prepare_feature_data(decsampidx, decfeat_path, params)[0]

print(f"VideoID:{videoidx+1}\nCorrect reference:") # this ID corresponds to video file names from Cowen & Keltner (2017)
for i,cap in enumerate(caps_data[videoidx*nCapEach:((videoidx+1)*nCapEach)]): 
    print(f"[{i+1}]:{cap}")

# Start optimization
print('\nGeneratd description:')
best_cands, scores_all, scores_eval_all = mu.text_optimization_steps(feat_target, feat_mu_all, feat_sd_all, model, tokenizer, skip_token_ids_mlm, model_lm, tokenizer_lm, skip_token_ids_lm, params, device)

