In [7]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import torch
import torch.nn as nn
import pickle
import sys
import imp
sys.path.append('../')
%matplotlib inline

In [8]:
from transformers import (
    GPT2Config,
    GPT2Tokenizer,
    GPT2LMHeadModel,
    MinLengthLogitsProcessor,
    LogitsProcessorList,
    AutoTokenizer,
    AutoModelForSequenceClassification,
)
from cvar_helpers import calc_cvar_from_quantiles
from cvar_sampler import distort_probabilities
from rl_learner import TD_Learner
from generate_sentences_w_cvar import score_sentiment

import manual_examples
imp.reload(manual_examples)
from manual_examples import get_probabilities, get_distributions, plot_distributions, get_prompt_distribution

### Loading GPT-2

In [9]:
modelname = '../models/pretrained/gpt2-large/'
config = GPT2Config.from_pretrained(modelname)
tokenizer = GPT2Tokenizer.from_pretrained(modelname)
model = GPT2LMHeadModel.from_pretrained(modelname)
device = 'cpu'

In [13]:
sentiment_tokenizer = AutoTokenizer.from_pretrained('../models/pretrained/cardiffnlp-twitter-roberta-base-sentiment')
sentiment_model = AutoModelForSequenceClassification.from_pretrained('../models/pretrained/cardiffnlp-twitter-roberta-base-sentiment')
sentiment_model.to(device);

### Loading RL Code

In [10]:
def load_rl_model(epoch=75, hidden_dim =104, n_quantiles = 10, extra = '_prompt_enc',
                  folder = 'single_sentences_IYou_3',huber=0.1):

    filename='../data/results/'+folder+'/'
    filename+=f'quantile_learner_{hidden_dim}_{huber}{extra}/log_quantile_learner_epoch{epoch}.pkl'

    Z_network = TD_Learner(config.n_embd, n_quantiles, hidden_dim=hidden_dim)
    Z_network.load_state_dict(torch.load(filename.replace('log_',''),map_location=torch.device('cpu')))

    log = pickle.load(open(filename,'rb'))
    loss = np.array(log['loss'])
    epoch = np.array(log['epoch'])
    
    taus = (2 * np.arange(n_quantiles) + 1) / (2.0 * n_quantiles)
    alphas = np.append(np.insert(taus, 0, 0), 1) # add zero, one
    
    out = {'Z_network': Z_network,
             'loss': loss,
           'taus' : taus,
          'log':log,
          'epoch':epoch,
          'alphas': alphas}
    return(out)

### V3 w/ and w/o prompt

In [11]:
out = load_rl_model(epoch=75, hidden_dim =104, n_quantiles = 10, extra = '_prompt_enc',
                  folder = 'single_sentences_IYou_3',huber=0.1)
Z_network = out['Z_network']

In [15]:
for prompt in ['I felt really sick.', 'I felt sick.', 
               'I felt somewhat sick.', 'I did not feel that sick.',
               'I felt ok.',
               'I felt like I had a cold.']:
    
    thetas, cvars = get_prompt_distribution(prompt, tokenizer, model, device, Z_network)
    print(prompt)
    print(np.round(thetas,2))
    print(score_sentiment(prompt, sentiment_tokenizer, sentiment_model, device))
    print()

I felt really sick.
[-0.67 -0.72 -0.76 -0.78 -0.8  -0.81 -0.82 -0.83 -0.83 -0.72]
[-0.9379235]

I felt sick.
[-0.56 -0.64 -0.69 -0.72 -0.75 -0.77 -0.79 -0.8  -0.81 -0.73]
[-0.87500717]

I felt somewhat sick.
[-0.55 -0.62 -0.66 -0.7  -0.72 -0.75 -0.77 -0.78 -0.8  -0.71]
[-0.89648049]

I did not feel that sick.
[-0.31 -0.26 -0.24 -0.23 -0.22 -0.2  -0.19 -0.17 -0.16 -0.14]
[-0.58306157]

I felt ok.
[0.35 0.47 0.5  0.51 0.51 0.51 0.5  0.48 0.46 0.41]
[0.85541132]

I felt like I had a cold.
[-0.52 -0.56 -0.59 -0.61 -0.62 -0.63 -0.64 -0.64 -0.64 -0.56]
[-0.84604912]



In [16]:
for prompt in ['It was boring.','It was very boring.', 'It was extremely boring.',
               'It was strenuous.','It was very strenuous.', 'It was extremely strenuous.',
              'It was hard.','It was very hard.', 'It was extremely hard.']:
    
    thetas, cvars = get_prompt_distribution(prompt, tokenizer, model, device, Z_network)
    print(prompt)
    print(np.round(thetas,2))
    print(score_sentiment(prompt, sentiment_tokenizer, sentiment_model, device))
    print()

It was boring.
[-0.3  -0.37 -0.41 -0.44 -0.47 -0.49 -0.51 -0.52 -0.54 -0.48]
[-0.93666359]

It was very boring.
[-0.35 -0.39 -0.42 -0.45 -0.47 -0.48 -0.5  -0.51 -0.52 -0.47]
[-0.96904829]

It was extremely boring.
[-0.48 -0.55 -0.6  -0.63 -0.66 -0.69 -0.71 -0.73 -0.75 -0.69]
[-0.9732028]

It was strenuous.
[0.01 0.04 0.05 0.06 0.07 0.08 0.08 0.09 0.11 0.13]
[-0.23800157]

It was very strenuous.
[-0.05 -0.01  0.01  0.02  0.03  0.05  0.06  0.07  0.1   0.14]
[-0.3932954]

It was extremely strenuous.
[-0.02 -0.    0.01  0.01  0.02  0.02  0.03  0.04  0.05  0.07]
[-0.5088403]

It was hard.
[-0.09 -0.07 -0.07 -0.06 -0.06 -0.06 -0.05 -0.04 -0.03  0.  ]
[-0.44828964]

It was very hard.
[-0.13 -0.09 -0.08 -0.07 -0.06 -0.05 -0.04 -0.03 -0.01  0.04]
[-0.68532158]

It was extremely hard.
[-0.1  -0.09 -0.09 -0.09 -0.09 -0.09 -0.09 -0.08 -0.08 -0.06]
[-0.74546438]



In [17]:
for prompt in ['It will be bad.','It will be very bad.', 'It will be extremely bad.',
                'It will be boring.','It will be very boring.', 'It will be extremely boring.',
               'It will be strenuous.','It will be very strenuous.', 'It will be extremely strenuous.',
              'It will be hard.','It will be very hard.', 'It will be extremely hard.',
              'It will be difficult.','It will be very difficult.', 'It will be extremely difficult.']:
    
    thetas, cvars = get_prompt_distribution(prompt, tokenizer, model, device, Z_network)
    print(prompt)
    print(np.round(thetas,2))
    print(score_sentiment(prompt, sentiment_tokenizer, sentiment_model, device))
    print()

It will be bad.
[-0.55 -0.6  -0.62 -0.64 -0.65 -0.66 -0.67 -0.67 -0.66 -0.55]
[-0.86431855]

It will be very bad.
[-0.63 -0.62 -0.62 -0.62 -0.62 -0.62 -0.61 -0.6  -0.58 -0.5 ]
[-0.92837798]

It will be extremely bad.
[-0.74 -0.72 -0.73 -0.73 -0.73 -0.72 -0.72 -0.71 -0.7  -0.62]
[-0.94130034]

It will be boring.
[-0.08 -0.09 -0.1  -0.11 -0.12 -0.12 -0.13 -0.14 -0.14 -0.14]
[-0.84792384]

It will be very boring.
[-0.22 -0.22 -0.22 -0.23 -0.23 -0.24 -0.24 -0.24 -0.24 -0.24]
[-0.93431965]

It will be extremely boring.
[-0.39 -0.4  -0.42 -0.43 -0.43 -0.44 -0.45 -0.45 -0.45 -0.43]
[-0.95319607]

It will be strenuous.
[-0.1  -0.05 -0.03 -0.02 -0.    0.01  0.02  0.03  0.05  0.08]
[-0.24446443]

It will be very strenuous.
[-0.14 -0.1  -0.08 -0.06 -0.05 -0.04 -0.02 -0.01  0.01  0.05]
[-0.41001865]

It will be extremely strenuous.
[-0.16 -0.12 -0.1  -0.09 -0.08 -0.07 -0.05 -0.04 -0.02  0.01]
[-0.51264539]

It will be hard.
[-0.13 -0.09 -0.07 -0.06 -0.05 -0.04 -0.02 -0.01  0.01  0.04]
[-0.54603691