In [1]:
# filename: mcap_demo.ipynb
# source activate mcap_demo

import os
import sys
import torch
import time

rootPath = '/home/psi/horikawa-t/toolbox/public/mcap/'

# add path to language project directory
sys.path.append('/home/psi/horikawa-t/toolbox/user/python/user/thtoolbox/language/scripts/libraries/mcap/')
sys.path.append('/home/psi/horikawa-t/toolbox/user/python/user/thtoolbox/util/code/')
from thutil4 import getFN, getDN, setdir, fix_seed,randsample
import mcap_utils_demo as mu

gpu_use = 1
if gpu_use:
    gpu_id = 'MIG-593efb6c-3714-5470-8214-f4771927764b'
    print('Start script: gpu device:%s' %(gpu_id))
    os.environ['CUDA_VISIBLE_DEVICES'] = gpu_id
else:
    os.environ['CUDA_VISIBLE_DEVICES'] = ""
device = "cuda" if torch.cuda.is_available() and gpu_use == 1 else "cpu"
print('gpu availability:%d'%(torch.cuda.is_available()))

savdir_general = rootPath + 'res/text_generation/'
brain_dat_dir_general = rootPath + 'res/decoding/'
LMmodeldir = rootPath + 'data/model/'
normparam_dat_dir = rootPath + 'data/feature/'
capdata_dir = rootPath + 'data/caption/'
proxies = {
    "http": "",
    "https": "",
}
proxies = {
    "http": "http://proxy-u.ecl.ntt.co.jp:8080/",
    "https": "http://proxy-u.ecl.ntt.co.jp:8080/",
}


Start script: gpu device:MIG-593efb6c-3714-5470-8214-f4771927764b
gpu availability:1


[]

In [44]:
# model, normalization, and parameter settings

# perform normalization?
do_norm = 1

# setting model
# select MLM from ['bert-base-cased','bert-base-uncased','bert-large-cased','bert-large-uncased','bert-large-uncased-whole-word-masking','bert-large-cased-whole-word-masking','roberta-base','roberta-large','deberta-large-feedback']
# you can test untrained model by addiing "_untrained" (e.g., 'roberta-large_untrained')
MLMType = 'roberta-large' 

# select LM for feature extraction from ['bert-base-uncased','bert-large-uncased','bert-base-cased','bert-large-cased','bert-large-uncased-whole-word-masking','bert-large-cased-whole-word-masking','openai-gpt','gpt2','gpt2-medium','gpt2-large','gpt2-xl','xlnet-base-cased','xlnet-large-cased','roberta-base','roberta-large','distilbert-base-uncased','distilbert-base-cased','distilgpt2','albert-base-v1','albert-large-v1','albert-xlarge-v1','albert-xxlarge-v1','albert-base-v2','albert-large-v2','albert-xlarge-v2','albert-xxlarge-v2','t5-small','t5-base','t5-large','bart-base','bart-large','ctrl','xlm-mlm-17-1280','xlm-mlm-100-1280','electra','xlm-roberta-base','xlm-roberta-large','clip_l','sgpt','deberta-base','deberta-large','deberta-xlarge']
LMType = 'deberta-large'

# initialize
start = time.time()

# load pre-trained masked language model
tokenizer, model = mu.load_mlm_model(LMmodeldir, MLMType, proxies, device)
# load feature computation model
tokenizer_lm, model_lm, nlayers = mu.load_lm_model(LMmodeldir, LMType, proxies, device)
# set evaluation mode
model.eval(),model_lm.eval()

# prepre skip tokens, if any
skip_token_ids_mlm = mu.set_skip_token_ids(tokenizer, speficied_skip_tokens=[], include_special_token=True)
skip_token_ids_lm = mu.set_skip_token_ids(tokenizer_lm, speficied_skip_tokens=[], include_special_token=True)

# load normalization parameters
normparam_path = f"{normparam_dat_dir}/{LMType}/norm_param/"
# normparam_path = f"/home/ann/project/language/data/features/pytorch//{LMType}/raw/ck20/normParams/tokenmean/ck58/"
feat_mu_all, feat_sd_all = mu.prepare_norm_params(normparam_path, nlayers, device=device) if do_norm else ([],[])

# set parameters
params = {
    'nItr': 100,
    'metricType': 'corr',
    'do_norm': do_norm,
    'beamwidth': 5,
    'nMaskCands': 5,
    'nMaskPerSentence': 2,
    'nGram4Mask': 3,
    'multiMaskType':'forward_seq',
    'maskingUnitType':'token',
    'add_insert_mask': 1,
    'mLayerType': 'vstack',
    'optimal_th': 0.001,
    'topk': 5,
    'max_batch_samp': 200,
    'length_penalty_type':'token',
    'length_penalty_w': 0.10,
    'mlmscoreType': 'modified',
    'mlm_sampling_type': 'sampling',
    'mlms_fix_weight': 0,
    'nMax_MLMs_cands':5000,
    'do_reflesh': 1,
    'reflesh_th': [10,0.1,5,0.00],
    'add_mask_removal': False,
    'layerIdx': range(0,nlayers),
}



Loading mlm model...
Loading model and tokenizer from cache...
Load roberta-large model done
Loading lm model...
Loading model and tokenizer from cache...
Load roberta-large model done


In [45]:
# You can test arbitrary word sequences to examine the effectiveness of our method
target_sentence = 'Five apples are on the table.'
target_sentence = 'In the beginning God created the heavens and the earth.'
target_sentence = 'Imagination is more important than knowledge.'
target_sentence = 'To be, or not to be, that is the question.'
target_sentence = 'May the Force be with you.'

# extract semantic features
feat_target, inputs = mu.compute_sentence_feature_patterns_wrapper([target_sentence], model_lm, tokenizer_lm, skip_token_ids=skip_token_ids_lm, do_norm=params['do_norm'], feat_mu_all=feat_mu_all, feat_sd_all=feat_sd_all, device=device, layerIdx=params['layerIdx'], max_batch_samp=params['max_batch_samp'])
# Start optimization
best_cands, scores_all, scores_eval_all = mu.text_optimization_steps(feat_target[0], feat_mu_all, feat_sd_all, model, tokenizer, skip_token_ids_mlm, model_lm, tokenizer_lm, skip_token_ids_lm, params, device)



[0]:<unk>:[score=0.1522, score_reg=0.1522][t=0.03924]
[1]:3.:[score=0.2069, score_reg=0.1930][t=0.29230]
[2]:5.:[score=0.2272, score_reg=0.2120][t=1.16132]
[3]:5.:[score=0.2272, score_reg=0.2120][t=2.65721]
[4]:5 Go ahead.:[score=0.2951, score_reg=0.2569][t=4.65430]
[5]:Flash the Matrix.:[score=0.3433, score_reg=0.2988][t=6.74760]
[6]:I am. Believe me.:[score=0.3964, score_reg=0.3314][t=9.04068]
[7]:I Believe You.:[score=0.4066, score_reg=0.3540][t=11.26445]
[8]:3 Believe in peace for me.:[score=0.4580, score_reg=0.3770][t=13.45309]
[9]:Join me with you.:[score=0.4910, score_reg=0.4180][t=16.11843]
[10]:Keep me with you.:[score=0.5288, score_reg=0.4502][t=18.76866]
[11]:Rest in your peace with us.:[score=0.5776, score_reg=0.4755][t=21.60732]
[12]:Rest in his peace be with you.:[score=0.6348, score_reg=0.5156][t=24.55984]
[13]:May peace be with you.:[score=0.8231, score_reg=0.6881][t=27.92304]
[14]:May peace be with you.:[score=0.8231, score_reg=0.6881][t=30.39247]
[15]:May be with you.