In [1]:
from transformers import BertModel, BertTokenizer
from utils import devdf_generator
import pandas as pd
import torch
import vsm
import os
%load_ext autoreload
%autoreload 2

VSM_HOME = os.path.join('data', 'vsmdata')

DATA_HOME = os.path.join('data', 'wordrelatedness')

def evaluate_pooled_bert(rel_df, layer, pool_func):
    
    if torch.cuda.is_available():
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')
        
    bert_weights_name = 'bert-base-uncased'

    # Initialize a BERT tokenizer and BERT model based on
    # `bert_weights_name`:
    tokenizer = BertTokenizer.from_pretrained(bert_weights_name)
    model = BertModel.from_pretrained(bert_weights_name)
    model = model.to(device)
    print(f'Model is on {model.device}')

    # Get the vocabulary from `rel_df`:
    ##### YOUR CODE HERE
    vocab = set(rel_df.word1.values) | set(rel_df.word2.values)
    
    # Use `vsm.create_subword_pooling_vsm` with the user's arguments:
    pooled_df = vsm.create_subword_pooling_vsm(vocab, tokenizer, model, layer=layer, pool_func=pool_func)
    
    # Return the results of the relatedness evalution:
    return vsm.word_relatedness_evaluation(rel_df, pooled_df)

In [2]:
pooling_function = vsm.mean_pooling
dev = pd.read_csv(os.path.join(DATA_HOME, "cs224u-wordrelatedness-dev.csv"))
highest = devdf_generator(dev, scoring='highest')
lowest = devdf_generator(dev, scoring='lowest')
average = devdf_generator(dev, scoring='mean')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


Series([], Name: score variance, dtype: float64)
Series([], Name: score variance, dtype: float64)
Series([], Name: score variance, dtype: float64)


### Series of Experiments using differnt "hyperparameters" for BERT pooling model

In [9]:
# 1. Same hypers, different dev datasets
scores = {}

dev_eval, dev_rho = evaluate_pooled_bert(dev, -1, pooling_function)
scores['dev'] = dev_rho

highest, highest_rho = evaluate_pooled_bert(highest, -1, pooling_function)
scores['highest'] = highest_rho

lowest, lowest_rho = evaluate_pooled_bert(lowest, -1, pooling_function)
scores['lowest'] = lowest_rho

mean, mean_rho = evaluate_pooled_bert(average, -1, pooling_function)
scores['mean'] = mean_rho

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model is on cuda:0


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model is on cuda:0


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model is on cuda:0


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model is on cuda:0


In [10]:
scores

{'dev': 0.23311499452307924,
 'highest': 0.24422440155634575,
 'lowest': 0.2359034072168557,
 'mean': 0.24182501097666576}

In [12]:
#2 same dev set (highest) different pooling functions
pooling_scores = {}

min_eval, min_rho = evaluate_pooled_bert(highest, -1, vsm.min_pooling)
pooling_scores['min'] = min_rho

max_eval, max_rho = evaluate_pooled_bert(highest, -1, vsm.max_pooling)
pooling_scores['max'] = max_rho

mean_eval, mean_rho = evaluate_pooled_bert(highest, -1, vsm.mean_pooling)
pooling_scores['mean'] = mean_rho

last_eval, last_rho = evaluate_pooled_bert(average, -1, vsm.last_pooling)
pooling_scores['last'] = last_rho

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model is on cuda:0


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model is on cuda:0


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model is on cuda:0


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model is on cuda:0


In [13]:
pooling_scores

{'min': 0.24454069661746727,
 'max': 0.24005252002267974,
 'mean': 0.24422440155634575,
 'last': 0.23881398655354832}