**create-wsd-environment.sh**

In [None]:
# clone the package and run the patch for backward compatibility
! rm -rf MetaWSD/
! git clone https://github.com/avyavkumar/MetaWSD.git
! sed -i -e 's/diffopt.step(loss)/diffopt.step(loss, retain_graph=True)/g' MetaWSD/models/seq_meta.py

# install dependencies
! pip install -r MetaWSD/requirements.txt
! pip install transformers==4.15.0
! git clone https://github.com/Nithin-Holla/higher.git
! cd higher && python setup.py install
! yes | pip uninstall overrides && pip install overrides==3.1.0

**setup-data.sh**

*use only for word-sense disambiguation tasks*

In [None]:
# setup the data
! rm -rf data/
! mkdir data
! cd data 
! git clone https://github.com/google-research-datasets/word_sense_disambigation_corpora.git

# build the dataset
! cd ../
! python MetaWSD/scripts/wsd_gen_sense_inventory.py
! python MetaWSD/scripts/generate_wsd_data.py --n_support_examples 16 --n_query_examples 16 --n_train_episodes 10000

**create_encodings.py**

*only use for word-sense disambiguation tasks*

In [None]:
import os
import json
import torch
import numpy as np

UNDEF = -10000

def generate(model, tokenizer, SHOT):
  TRAINING_DATA_PATH = "data/semcor_meta/meta_train_" + str(SHOT) + "-" + str(SHOT)
  TEST_DATA_PATH = "data/semcor_meta/meta_test_" + str(SHOT) + "-" + str(SHOT)
  CENTROIDS_OUTPUT_PATH = "data/centroids/"
  TRAINING_ENCODINGS_PATH = "data/encodings/"
  TEST_ENCODINGS_PATH = "data/test_encodings/"

  centroid_sizes = {}
  training_sets = []
  total_files = 0
  for file_name in os.listdir(TRAINING_DATA_PATH):
    total_files += 1
    print("Processing file", total_files, "with name", file_name)
    data = json.load(open(TRAINING_DATA_PATH + "/" + file_name))
    for index in range(len(data)):
      labeled_sentence = [data[index]['sentence'], data[index]['label']]
      input_sentence = (' '.join(labeled_sentence[0]))    
      required_index = UNDEF
      for index_label in range(len(labeled_sentence[1])):
        if labeled_sentence[1][index_label] != -1:
          required_index = index_label
          break
      required_word = labeled_sentence[0][required_index]
      inputs = tokenizer(required_word, return_tensors="pt")
      required_bert_tokens = [e for e in inputs['input_ids'].tolist()[0] if e not in [101, 102]]
      inputs = tokenizer(input_sentence, return_tensors="pt")
      outputs = model(**inputs)
      inputs_indices_sentence = []
      for i in range(len(inputs['input_ids'].tolist()[0])):
        if inputs['input_ids'].tolist()[0][i] in required_bert_tokens:
          required_bert_tokens.remove(inputs['input_ids'].tolist()[0][i])
          inputs_indices_sentence.append(i)        
      encodings = []
      for inputs_index_sentence in inputs_indices_sentence:
        encodings.append(outputs.last_hidden_state[:,inputs_index_sentence,:])
      encoding = torch.stack(encodings).mean(dim=0)
      label = labeled_sentence[1][required_index]    

      if label + "_size" in centroid_sizes:
        with open(TRAINING_ENCODINGS_PATH + label, 'rb+') as f_enc:
          data_points = np.load(f_enc)
        if (encoding.detach().numpy() == data_points).any() != True:
          data_points_updated = np.concatenate((data_points, encoding.detach().numpy()), axis=0)
          with open(TRAINING_ENCODINGS_PATH + label, 'wb+') as f_enc:
            np.save(f_enc, data_points_updated)
          centroid_sizes[label + '_size'] += 1
          with open(CENTROIDS_OUTPUT_PATH + label, 'rb+') as f:
            old_average = np.load(f)
          new_average = old_average + (encoding.detach().numpy() - old_average)/centroid_sizes[label + "_size"]
          with open(CENTROIDS_OUTPUT_PATH + label, 'wb+') as f:
            np.save(f, new_average)
      else:
        centroid_sizes[label + '_size'] = 1
        with open(CENTROIDS_OUTPUT_PATH + label, 'wb+') as f:
          np.save(f, encoding.detach().numpy())
        with open(TRAINING_ENCODINGS_PATH + label, 'wb+') as f:
          np.save(f, encoding.detach().numpy())      

  test_data_sizes = {}
  total_files = 0
  for file_name in os.listdir(TEST_DATA_PATH):
    total_files += 1
    print("Processing file", total_files)
    data = json.load(open(TEST_DATA_PATH + "/" + file_name))
    for index in range(len(data)):
      labeled_sentence = [data[index]['sentence'], data[index]['label']]
      input_sentence = (' '.join(labeled_sentence[0]))
      required_index = UNDEF
      for index_label in range(len(labeled_sentence[1])):
        if labeled_sentence[1][index_label] != -1:
          required_index = index_label
          break
      required_word = labeled_sentence[0][required_index]
      inputs = tokenizer(required_word, return_tensors="pt")
      required_bert_tokens = [e for e in inputs['input_ids'].tolist()[0] if e not in [101, 102]]
      inputs = tokenizer(input_sentence, return_tensors="pt")
      outputs = model(**inputs)
      inputs_indices_sentence = []
      for i in range(len(inputs['input_ids'].tolist()[0])):
        if inputs['input_ids'].tolist()[0][i] in required_bert_tokens:
          required_bert_tokens.remove(inputs['input_ids'].tolist()[0][i])
          inputs_indices_sentence.append(i)        
      encodings = []
      for inputs_index_sentence in inputs_indices_sentence:
        encodings.append(outputs.last_hidden_state[:,inputs_index_sentence,:])
      encoding = torch.stack(encodings).mean(dim=0)
      label = labeled_sentence[1][required_index]

      if label + "_size" in test_data_sizes:
        test_data_sizes[label + '_size'] += 1
        with open(TEST_ENCODINGS_PATH + label, 'rb+') as f_enc:
          data_points = np.load(f_enc)
        if (encoding.detach().numpy() == data_points).any() != True:
          data_points_updated = np.concatenate((data_points, encoding.detach().numpy()), axis=0)
          with open(TEST_ENCODINGS_PATH + label, 'wb+') as f_enc:
            np.save(f_enc, data_points_updated)        
      else:
        test_data_sizes[label + '_size'] = 1
        with open(TEST_ENCODINGS_PATH + label, 'wb+') as f:
          np.save(f, encoding.detach().numpy())

**create-leopard-environment.sh**

In [None]:
! pip install sentence-transformers
! git clone https://github.com/iesl/leopard.git

**create-lo-shot-environment.sh**

In [None]:
# install the required dependencies
! pip install mathutils
! pip install poisson-disc
! pip install transformers
! pip install mosek
! pip install nbimporter

**install-mosek.sh**

In [None]:
! cp mosektoolslinux64x86.tar.bz2 $HOME/
! cp mosek.lic $HOME/
! cd $HOME/
! tar -xf mosektoolslinux64x86.tar.bz2
! mv $HOME/mosek.lic $HOME/mosek/
! rm $HOME/mosektoolslinux64x86.tar.bz2
! export PATH=/root/mosek/9.3/tools/platform/linux64x86/bin:$PATH