# **Empathetic Generative Model**

If training, change the MODEL_PREFIX variable value under training section

Hyperparameters are also listed in case you want to modify them

## Fasttext based emotion classifier

In [None]:
%%script false --no-raise-error
!pip install fasttext
!wget https://dl.fbaipublicfiles.com/parlai/empatheticdialogues/models/fasttext_empathetic_dialogues.mdl

In [None]:
%%script false --no-raise-error
from sklearn.model_selection import train_test_split, RepeatedStratifiedKFold, StratifiedKFold, KFold
from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.pipeline import Pipeline

# models
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

In [None]:
%%script false --no-raise-error
import pandas as pd
import fasttext as fasttext_module

ftmodel = fasttext_module.FastText.load_model('fasttext_empathetic_dialogues.mdl')


def get_data(mode='train'):
  row = []
  count = 0
  for i in range(len(data[mode])):
    text = data['train'][i]['utterance']
    text = text.replace('_comma_',',')
    text = text.split('\n')
    if len(text)>1:
      count += 1
      continue
    labels, scores = ftmodel.predict(text[0],k=32)
    row.append(dict(zip(labels,scores)))
    row[-1]['Class'] = int(data[mode][i]['utterance_idx']%2==0)
    row[-1]['conv_id'] = data[mode][i]['conv_id']
    row[-1]['utterance_id'] = data[mode][i]['utterance_idx']

  train_df = pd.DataFrame(row)
  return train_df

train_df = get_data('train')
test_df = get_data('test')
valid_df = get_data('validation')

In [None]:
%%script false --no-raise-error
import matplotlib.pyplot as plt
import seaborn as sns

numerical_cols = [cname for cname in train_df.columns if train_df[cname].dtype in [ 'float64'] and cname != 'Class']
target_col = 'Class'

fig, ax = plt.subplots(8, 4, figsize = (10, 20), dpi = 300)
ax = ax.flatten()

for i, column in enumerate(numerical_cols):
    sns.histplot(data = train_df.sample(n=2000), x = column, ax=ax[i], kde=True,hue=target_col, fill = True)

    ax[i].set_title(f'{column} Distribution', size = 7)
    ax[i].set_xlabel(None)
    ax[i].set_ylabel(None)
#     ax[i].set_yticklabels([])
#     ax[i].set_xticklabels([])
    #ax[i].legend(None)

fig.suptitle('Distribution of Features\n', fontsize = 24, fontweight = 'bold')
#fig.set_legend()
plt.tight_layout()

In [None]:
%%script false --no-raise-error
def train():
    X_train = train_df[numerical_cols]
    X_valid = valid_df[numerical_cols]
    X_test = test_df[numerical_cols]

    y_test = test_df[target_col]

    xg = xgb.XGBRegressor(learning_rate=0.05,#0.1
                          max_depth=4,#3
                          n_estimators=5000,
                            colsample_bytree = 0.50,
                            subsample = 0.80,
                            eta = 0.03,
                            gamma = 0.2, #0.2
                           early_stopping_rounds=500,
                          missing=1
                          )
    xg.fit(X_train, train_df[target_col], eval_set=[(X_valid, valid_df[target_col])])
    y_pred = xg.predict(X_test)
    metrics_dict = {
        'Accuracy': accuracy_score(y_test, y_pred),   # Calculates the accuracy score using y_test (true values) and y_pred (predicted values) and adds it to the dictionary
        'Precision': precision_score(y_test, y_pred), # Calculates the precision score using y_test and y_pred and adds it to the dictionary
        'Recall': recall_score(y_test, y_pred),       # Calculates the recall score using y_test and y_pred and adds it to the dictionary
        'F1': f1_score(y_test, y_pred),               # Calculates the F1 score using y_test and y_pred and adds it to the dictionary
        'ROC AUC': roc_auc_score(y_test, y_pred)      # Calculates the ROC AUC score using y_test and y_pred and adds it to the dictionary
    }
    return metrics_dict, xg
metric, model = train()

In [None]:
%%script false --no-raise-error
def infer_empathy(texts, model):
  row = []
  for text in texts:
    label,score = ftmodel.predict(text,k=32)
    row.append(dict(zip(labels,scores)))

  df = pd.DataFrame(row)
  X = df[numerical_cols]
  return model.predict_proba(X)

In [None]:
%%script false --no-raise-error
infer_empathy(["a","What is this?","Are all scores the same?","Why is this happening to me?"],model)

In [None]:
# import fasttext as fasttext_module


# query_txt = ["Two roads diverged in a wood, and I took the one less traveled by, And that has made all the difference.",
#             "I don't feel so good Mr. Stark...",
#              "It was the best of times, it was the worst of times",
#              "So much to do, so little time",
#              "Can you walk your fish if it rains hard enough?",
#              ]

# ftmodel = fasttext_module.FastText.load_model('fasttext_empathetic_dialogues.mdl')
# # get top-k results with labels and score
# ftmodel.predict(query_txt, k=11)

## **Setup**

If running on local device, ignore cell below:

In [2]:
import os
import shutil

try:

    from google.colab import drive
    drive.mount('/content/gdrive')

    DRIVE_PATH = '/content/gdrive/My\ Drive/CS247-Empathy-Mental-Health'
    DRIVE_PYTHON_PATH = DRIVE_PATH.replace('\\', '')
    if not os.path.exists(DRIVE_PYTHON_PATH):
      DRIVE_PYTHON_PATH = DRIVE_PATH.replace(' ', '')
      if not os.path.exists(DRIVE_PYTHON_PATH):
        %mkdir $DRIVE_PATH

    ## the space in `My Drive` causes some issues,
    ## make a symlink to avoid this
    # Solved -> symlink for convenience
    SYM_PATH = '/content/CS247-Empathy-Mental-Health'
    if not os.path.exists(SYM_PATH):
      !ln -s $DRIVE_PATH $SYM_PATH

    running_in_colab = True

    # We already mounted in our google drive.
    # Enter the foler where you put files in:
    %cd '/content/CS247-Empathy-Mental-Health'

    # What files are there:
    !ls


except ModuleNotFoundError:
    running_in_colab = False
    print(
        "I guess you are running locally. If you get this message in Colab, check the files."
    )

Mounted at /content/gdrive
/content/gdrive/.shortcut-targets-by-id/1qwurxfG3wTYT_VY1AQ0AaMBt4LPZf23w/CS247-Empathy-Mental-Health
 best_emotion.pt	   NaiveBaselineModel.ipynb	   rlhf_q_2
 checkpoint_other_131.pt   output			   rlhf_q_3
 checkpoint_other_79.pt    PretrainedModelQuerying.ipynb   rlhf_question_0_100
 Empathy-Mental-Health	   rlhf_default_0_100		   rlhf_question_0_200
 EmpDialogue_RecEC	   rlhf_default_0_200		   rlhf_question_0_300
 ER-reddit-test.csv	   rlhf_default_logs.json	   rlhf_question_logs.json
'Generative Model.ipynb'   rlhf_length_0		   rlhf_therapist_length_0_100
 glove.6B.100d.txt	   rlhf_length_0_100		   rlhf_therapist_length_0_200
 glove.6B.200d.txt	   rlhf_length_0_200		   rlhf_therapist_length_0_300
 glove.6B.300d.txt	   rlhf_length_0_300		   rlhf_therapist_length_logs.json
 glove.6B.50d.txt	   rlhf_length_logs.json	  'RL Training.ipynb'
 hard-gate-test.gdoc	  'RLHF on SFT'			   roberta-large.tsv
 hard-gate-test.txt	   rlhf_q_1			   SFT_GPT2


Clone/update local repo

In [None]:
# if os.path.isdir(SYM_PATH + "/EmpDialogue_RecEC"):
#     !git pull
# elif not os.path.exists('./EmpDialogue_RecEC'):
#     !git clone https://github.com/saunack/EmpDialogue_RecEC.git
#     %cd './EmpDialogue_RecEC'
# else:
#     %cd './EmpDialogue_RecEC'
#     !git pull
# !pwd

In [None]:
# %cd EmpDialogue_RecEC

Ensure you're in "CS247-Empathy-Mental-Health/EmpDialogue_RecEC"

In [None]:
# !pip install -r requirements.txt

In [None]:
# %cd /content

In [3]:
!pip install trl

Collecting trl
  Downloading trl-0.7.11-py3-none-any.whl (155 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/155.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m155.3/155.3 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
Collecting accelerate (from trl)
  Downloading accelerate-0.27.2-py3-none-any.whl (279 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m280.0/280.0 kB[0m [31m23.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets (from trl)
  Downloading datasets-2.18.0-py3-none-any.whl (510 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m510.5/510.5 kB[0m [31m39.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tyro>=0.5.11 (from trl)
  Downloading tyro-0.7.3-py3-none-any.whl (79 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.8/79.8 kB[0m [31m13.3 MB/s[0m eta [36m0:00:00[0m
Collecting docstring-parser>=0.14.1 (from tyro>=0.5.11-

**File Structure (Colab)**

Full Path:
/content/gdrive/MyDrive/CS247-Empathy-Mental-Health/EmpDialogue_RecEC

Symlink: /content/CS247-Empathy-Mental-Health/EmpDialogue_RecEC

Output Folder (for all models):
/content/CS247-Empathy-Mental-Health/EmpDialogue_RecEC/outputs

Roberta: /content/CS247-Empathy-Mental-Health/resources/roberta-large.tsv

Glove: /content/CS247-Empathy-Mental-Health/resources/glove.6B.300d.txt

In [4]:
# https://github.com/huggingface/transformers/issues/3050#issuecomment-682167272
import logging
import re
def set_global_logging_level(level=logging.ERROR, prefices=[""]):
    """
    Override logging levels of different modules based on their name as a prefix.
    It needs to be invoked after the modules have been loaded so that their loggers have been initialized.

    Args:
        - level: desired level. e.g. logging.INFO. Optional. Default is logging.ERROR
        - prefices: list of one or more str prefices to match (e.g. ["transformers", "torch"]). Optional.
          Default is `[""]` to match all active loggers.
          The match is a case-sensitive `module_name.startswith(prefix)`
    """
    prefix_re = re.compile(fr'^(?:{ "|".join(prefices) })')
    for name in logging.root.manager.loggerDict:
        if re.match(prefix_re, name):
            logging.getLogger(name).setLevel(level)

In [5]:
import torch
import transformers
set_global_logging_level(logging.ERROR, ["transformers",  "torch"])

## Emotion/label prediction model

In [6]:
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModelForSequenceClassification

class Evaluator():
  def __init__(self, device='cpu',length_reward=False):
    self.device = device

    self.toxicity_tokenizer = AutoTokenizer.from_pretrained("citizenlab/distilbert-base-multilingual-cased-toxicity")
    self.toxicity_model = AutoModelForSequenceClassification.from_pretrained("citizenlab/distilbert-base-multilingual-cased-toxicity").to(self.device)

    self.sst_tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased-finetuned-sst-2-english")
    self.sst_model = AutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased-finetuned-sst-2-english").to(self.device)

    self.therapy_tokenizer = AutoTokenizer.from_pretrained("goblinmode2700/therapist-behavior-classification")
    self.therapy_model = AutoModelForSequenceClassification.from_pretrained("goblinmode2700/therapist-behavior-classification").to(self.device)

    self.w_toxic = 1.3
    self.w_question = 2.4
    self.w_therapist_input = 1.8
    self.w_sst = 8.8
    self.w_length = 0.8

    self.w = self.w_question + self.w_therapist_input + self.w_sst + self.w_toxic
    self.length_reward = length_reward

  @torch.no_grad()
  def score(self, texts):
    tokens = self.toxicity_tokenizer(texts, padding=True,truncation=True,return_tensors="pt").to(self.device)
    toxic_score = self.toxicity_model(**tokens).logits
    toxic_score = F.softmax(toxic_score,dim=-1)

    tokens = self.sst_tokenizer(texts,padding=True,truncation=True, return_tensors="pt").to(self.device)
    sst_score = self.sst_model(**tokens).logits
    sst_score = F.softmax(sst_score,dim=-1)
    # print(sst_score)

    tokens = self.therapy_tokenizer(texts,padding=True,truncation=True, return_tensors="pt").to(self.device)
    therapy_score = self.therapy_model(**tokens).logits
    therapy_score = F.softmax(therapy_score,dim=-1)
    # reflection is bad
    question_score, therapist_input_score, reflection_score, other_score = therapy_score[:,0], therapy_score[:,1], therapy_score[:,2], therapy_score[:,3]

    # score = (self.w*toxic_score[:,1]**2+sst_score[:,0])/(self.w+1)
    score = (self.w*toxic_score[:,1]**2\
             +self.w_question*question_score\
             +self.w_therapist_input*therapist_input_score\
             -self.w_sst*(sst_score[:,0]-1/2*sst_score[:,1])\
             )/(self.w+1)
    if self.length_reward:
      # change this to the last tokenizer
      length = (tokens['input_ids']!=evaluator.sst_tokenizer.pad_token_id).sum(dim=-1)
      length_normalized = length/50
      score += self.w_length*length_normalized

    return score

In [None]:
#  e = Evaluator()
# e.score(["You don't deserve to live.","Where am I?","This is what we call ","What we need is relaxation"])
# # e.therapy_model.id

# Initialize models and tokenizers

In [7]:
from datasets import load_dataset

In [8]:
import torch
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [9]:
SFT_PATH  = '/content/CS247-Empathy-Mental-Health/SFT_GPT2'
import os
if not os.path.exists(SFT_PATH):
  SFT_PATH = '/content/gdrive/MyDrive/SFT_copy'
  %cd /content/gdrive/MyDrive/

In [10]:
# from trl.core import respond_to_batch
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig, GPT2Tokenizer
from trl import AutoModelForCausalLMWithValueHead, PPOConfig, PPOTrainer, create_reference_model

def get_models_tokenizer(base_model='gpt2', sft_path=None, fine_tuned=False):
  if fine_tuned and sft_path is None:
    raise ValueError("If fine-tuned, provide path to fine tuned model via sft_path")
  elif not fine_tuned and sft_path is not None:
    raise ValueError("sft_path provided. Please set fine_tuned=True")

  tokenizer_base_model = base_model
  if fine_tuned:
    base_model = sft_path

  tokenizer_base_model = 'gpt2'
  tokenizer = GPT2Tokenizer.from_pretrained(tokenizer_base_model)
  tokenizer.pad_token = tokenizer.eos_token
  if fine_tuned:
    tokenizer.add_special_tokens({'bos_token':'<SPEAKER>','sep_token':'<RESPONSE>'})

  lm = AutoModelForCausalLM.from_pretrained(base_model).to(device)
  model = AutoModelForCausalLMWithValueHead(lm).to(device)
  model.is_peft_model = False
  model.pretrained_model.resize_token_embeddings(len(tokenizer))

  model_ref = create_reference_model(model)
  model_ref.is_peft_model = False

  return model, model_ref, tokenizer

# no need for model_ref during testing. Only for training
model, model_ref, tokenizer = get_models_tokenizer(base_model='gpt2',
                                                   sft_path=SFT_PATH,
                                                   fine_tuned=True)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

## Define generation function

In [11]:
sample_query = ["I don't feel so good Mr. Stark. What's happening?","I had the worst day yesterday.","I didn't know what just happened. It's so confusing."]

@torch.no_grad
def generate(model, tokenizer, texts, fine_tuned=False):
    if fine_tuned:
      texts = [tokenizer.bos_token+x+tokenizer.sep_token for x in texts]
    tokens = tokenizer(texts, padding=True,truncation=True,return_tensors="pt").to(device)
    outputs = model.generate(tokens.input_ids, attention_mask=tokens.attention_mask, max_new_tokens=80, do_sample=True, top_k=50, top_p=0.95)
    response = tokenizer.batch_decode(outputs, skip_special_tokens=True)
    return response

generate(model, tokenizer, sample_query)

["I don't feel so good Mr. Stark. What's happening?",
 'I had the worst day yesterday.',
 "I didn't know what just happened. It's so confusing."]

# RL training via PPO

Hyperparameters are:
1. batch size, mini batch size
2. learning rate
3. weight_scale for scaling the reward score overall
4. relative weights for each loss term in the evaluator
5. max_new_tokens in generate for PPO generation?

Notes:

- Currently using the validation dataset as the training set here. This is because the training set has already been encountered during fine tuning and the model was not training well on it as a result (mis-alignment of reward and overfitting on trained examples)
- KL divergence starts becoming negative. Probably due to reward function
- Return_prompt in PPO trainer generate() is set to False so that rewards are calculated only on the basis of response

In [3]:
MODEL_PREFIX = "rlhf_q"
MODEL_PATH = MODEL_PREFIX
logfile = f'{MODEL_PATH}_logs.json'
logs = []

In [4]:
# 2. initialize trainer
# original params: 16, 16
ppo_config = {"batch_size": 32,"mini_batch_size":32,"learning_rate":1.41e-6,"ppo_epochs": 3}
config = PPOConfig(**ppo_config)
ppo_trainer = PPOTrainer(config,
                         model,
                         model_ref,
                         tokenizer,
                         )

# 4. generate model response
generation_kwargs = {
    "min_length": -1,
    "top_k": 0.0,
    "top_p": 1.0,
    "do_sample": True,
    "pad_token_id": tokenizer.eos_token_id,
    "max_new_tokens": 40,
    "return_prompt": False,
}


NameError: name 'PPOConfig' is not defined

In [None]:
class Response(torch.utils.data.Dataset):
  def __init__(self,mode='train', tokenizer=None):
    self.dataset = load_dataset("empathetic_dialogues",split=mode)
    self.tokenizer=tokenizer
    self.texts = []
    self.prepare_convs()

  def __len__(self):
    return len(self.texts)#sum([x%2==0 for x in self.dataset['utterance_idx']])

  def prepare_convs(self):
    self.texts = [self.dataset[i]['utterance'] for i in range(len(self.dataset)) if self.dataset[i]['utterance_idx']%2==0]
    self.texts = [x.replace('_comma_',',') for x in self.texts]
    if tokenizer is not None:
      self.texts = [tokenizer.bos_token+x+tokenizer.sep_token for x in self.texts]
    # for i in range(len(self.dataset)):
    #   if self.dataset['utterance_idx'][i]%2==0:
    #     self.texts.append(self.dataset['utterance'][i])

  def __getitem__(self, idx):
    return self.texts[idx]


In [None]:
from torch.utils.data import DataLoader

# need to use a different set from fine-tuning
train_dataset = Response('validation', tokenizer)
train_dataloader = DataLoader(train_dataset, batch_size=ppo_config['batch_size'], shuffle=True)

# valid_dataset = Response('validation', tokenizer)
# valid_dataloader = DataLoader(valid_dataset, batch_size=ppo_config['batch_size'], shuffle=True)

# test_dataset = Response('test', tokenizer)
# test_dataloader = DataLoader(test_dataset, batch_size=ppo_config['batch_size'], shuffle=True)


In [None]:
evaluator = Evaluator(device=device, length_reward=True)

In [None]:
# import numpy as np
# to_remove = []
# for key in l[0]:
#   if type(l[0][key]) == np.ndarray:
#     to_remove.append(key)
# to_remove
# # l
to_remove = ['objective/kl_dist',
 'objective/logprobs',
 'objective/ref_logprobs',
 'tokens/queries_dist',
 'tokens/responses_dist',
 'ppo/policy/advantages',
 'ppo/policy/ratio']

In [None]:
generate(model, tokenizer, sample_query, fine_tuned=True)

In [None]:
from tqdm import tqdm
import json
logs = []

weight_scale = 0.5
log_every_k_steps = 200

for epoch in tqdm(range(ppo_trainer.config.ppo_epochs), "epoch: "):
    for i, batch in enumerate(tqdm(train_dataloader)):
        #### Get inputs
        # needs a list
        tokens = tokenizer(batch, padding=True,truncation=True,return_tensors="pt").to(device)
        query_tensors = [item for item in tokens['input_ids']]

        #### Get response from SFTModel
        # needs a list
        response_tensors = ppo_trainer.generate(query_tensors, **generation_kwargs)
        response = [tokenizer.decode(r.squeeze()) for r in response_tensors]

        #### Compute reward score
        # needs a list
        rewards = [x*weight_scale for x in evaluator.score(response)]

        #### Run PPO step
        stats = ppo_trainer.step(query_tensors, response_tensors, rewards)

        #### Logging
        logs.append({k:v for k,v in stats.items() if k not in to_remove})
        logs[-1]['epoch'] = epoch
        logs[-1]['rewards'] = (sum(rewards)/len(rewards)).detach().cpu().item()
        ppo_trainer.log_stats(stats, {'query':batch,'response':response}, rewards)

        #### Save model
        if (i+1) % log_every_k_steps == 0:
          print(generate(model, tokenizer, sample_query, fine_tuned=True))

          model.save_pretrained(f"{MODEL_PATH}_{epoch}_{i+1}")
          with open(logfile,'w') as f:
              json.dump(logs,f)
    print(generate(model, tokenizer, sample_query, fine_tuned=True))
    model.save_pretrained(f"{MODEL_PATH}_{epoch}")
    with open(logfile,'w') as f:
        json.dump(logs,f)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

if len(logs)==0:
  with open(logfile,'r') as f:
    import json
    logs = json.load(f)

cols = logs[0].keys()
log_df = pd.DataFrame(logs).reset_index()

fig, ax = plt.subplots(len(cols)//5+1, 5, figsize = (10, 15), dpi = 300)
ax = ax.flatten()

for i, column in enumerate(cols):
    sns.lineplot(data = log_df, x = 'index', y=column, ax=ax[i], size=0.7, legend=False) #, kde=True,fill = True)

    ax[i].set_title(f'{column}', size = 7)
    ax[i].set_xlabel(None)
    ax[i].set_ylabel(None)

fig.suptitle('Distribution of Features\n', fontsize = 24, fontweight = 'bold')
#fig.set_legend()
plt.tight_layout()

# Testing outputs

Set fine_tuned=True if using RLHF models/models fine tuned on ED dataset

Note: Might need to cut out the prompt from the responses for passing to evaluator

Use these models
- SFT_GPT2
- rlhf_length_0_300
- rlhf_therapist_length_0_200
- rlhf_q_2

In [17]:
model, _, tokenizer = get_models_tokenizer(base_model='gpt2',
                                                   sft_path='/content/CS247-Empathy-Mental-Health/rlhf_default_0_100',
                                                   fine_tuned=True)

In [18]:
query = ["I can't get enough sleep."]
generate(model, tokenizer, query, fine_tuned=True) # False only if using models directly from huggingface

["I can't get enough sleep.Oh, I know what you mean. Sometimes being awake can make you feel a bit tired. Do you want to go to bed?"]

For ppt

In [19]:
query = ["I am so upset and mad. I cant believe what has happened!","A recent job interview that I had made me feel very anxious because I felt like I didn't come prepared."]
evaluator = Evaluator(device=device, length_reward=True)

tokenizer_config.json:   0%|          | 0.00/345 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/996k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/774 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/541M [00:00<?, ?B/s]

  return self.fget.__get__(instance, owner)()


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/360 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/827 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/268M [00:00<?, ?B/s]

In [None]:
model_dir = f"gpt2"
model, _, tokenizer = get_models_tokenizer(base_model='gpt2', fine_tuned=False)
print(model_dir)
print('\n'.join(generate(model, tokenizer, query, fine_tuned=False)))

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

gpt2
I am so upset and mad. I cant believe what has happened!

Just how much money will I be making when I have my house turned over to the public? (and how much money can I get when I have no property)

It's all just about saving money for myself. How many houses will I have in my life if I don't buy another house?

When my husband and I had a dream come true to give birth to
A recent job interview that I had made me feel very anxious because I felt like I didn't come prepared. I didn't think I had that type of confidence as I didn't feel comfortable in my physical body and that was really disappointing for me and I was struggling, because I thought I had got stronger by doing that. I tried to focus but it just wasn't working, I was worried about not working at all but I wasn't that ready in the way I really wanted to be.

There


In [None]:
model_dir = f"SFT_GPT2"
model, _, tokenizer = get_models_tokenizer(base_model='gpt2', sft_path=model_dir, fine_tuned=True)
print(model_dir)
print('\n'.join(generate(model, tokenizer, query, fine_tuned=True)))

SFT_GPT2
I am so upset and mad. I cant believe what has happened!Who? Why? What are you feeling like?
A recent job interview that I had made me feel very anxious because I felt like I didn't come prepared.I understand! What are you going to do about it?


In [None]:
reward = evaluator.score(['I understand! What are you going to do about it?'])
reward

tensor([1.5920])

In [None]:
model_dirs = ["rlhf_therapist_length_0_200"]#,"rlhf_q_2"
for model_dir in model_dirs:
  model, _, tokenizer = get_models_tokenizer(base_model='gpt2', sft_path=model_dir, fine_tuned=True)
  print(model_dir)
  print('\n'.join(generate(model, tokenizer, query, fine_tuned=True)))

rlhf_therapist_length_0_200
I am so upset and mad. I cant believe what has happened!It's okay! It is so frustrating! Are you gonna see him again?
A recent job interview that I had made me feel very anxious because I felt like I didn't come prepared.What kind of interview did you get? I'm not sure.. Was your nervous about it.


#### Sample outputs from all trained models

In [None]:
query = ["I have too much on my plate right now.", "I lost my friend the other day.","I have too much on my plate right now, literally. How will I finish all of this food?", "Why do I even need to go to this stupid event?", "You know, I was in this car the other day and I suddenly heard a large crash. Up ahead, two cars were crumpled and smoking."]

GPT2

In [None]:
model_dir = f"gpt2"
model, _, tokenizer = get_models_tokenizer(base_model='gpt2', fine_tuned=False)
print(model_dir)
print('\n'.join(generate(model, tokenizer, query, fine_tuned=False)))

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

gpt2
I have too much on my plate right now."

With a wave of his hand, they approached each other outside. As they moved through the doorway, a pair of maidens stood on the steps behind the wall. They seemed as though they could not quite stand together anymore.

"It'll be great if we don't go home together anymore," she said.

"I'll tell you now, it's not a
I lost my friend the other day.So I will go out and buy my own. And I believe in having my own car so he can enjoy it."

A friend, another friend, a former Marine named Bill S.W., is also looking for a buyer.
I have too much on my plate right now, literally. How will I finish all of this food?What is your favorite type of food?Any suggestions for which food you would like to share or cook with family members?The last place I would like to share a hot dog or snack is with my son and his family. We do enjoy a lot of that here and I hope that people who are a part of this blog get a peek at that.I am a vegetarian so my only other dish

SFT GPT2

In [None]:
model_dir = f"SFT_GPT2"
model, _, tokenizer = get_models_tokenizer(base_model='gpt2', sft_path=model_dir, fine_tuned=True)
print(model_dir)
print('\n'.join(generate(model, tokenizer, query, fine_tuned=True)))

SFT_GPT2
I have too much on my plate right now.Why, do you feel so bad about it?
I lost my friend the other day.Well, how'd it go? What happened?
I have too much on my plate right now, literally. How will I finish all of this food?I am not sure how you would do the pasta. I would be very thankful for it. I wish more of the same
Why do I even need to go to this stupid event?Why does it sound so annoying?
You know, I was in this car the other day and I suddenly heard a large crash. Up ahead, two cars were crumpled and smoking. I was driving my car that night and i saw a huge pileup. My friends was running away!


RLHF on fine tuned models

In [None]:
import glob
for prefix in ['default','length','therapist_length','question','q']:
  checkpoints = glob.glob(f'rlhf_{prefix}_*')
  for checkpoint in checkpoints:
    if '100' in checkpoint or 'json' in checkpoint:
      continue
    model_dir = checkpoint #f"rlhf_{prefix}_0_{checkpoint}"
    if os.path.exists(model_dir):
      print(model_dir)
      model, _, tokenizer = get_models_tokenizer(base_model='gpt2', sft_path=model_dir, fine_tuned=True)
      print('\n'.join(generate(model, tokenizer, query, fine_tuned=True)))

rlhf_default_0_200
I have too much on my plate right now.Have you called your doctor?
I lost my friend the other day.What happened?
I have too much on my plate right now, literally. How will I finish all of this food?I think you should. Maybe try to be healthy and exercise a bit. Your health is your biggest priority.
Why do I even need to go to this stupid event?Well, I hope they have a good excuse to get rid of you
You know, I was in this car the other day and I suddenly heard a large crash. Up ahead, two cars were crumpled and smoking.Yeah, that happens often. Did you take any medical help at all?
rlhf_length_0_200
I have too much on my plate right now.Well I'm sure you are working hard to get back on top of this situation.
I lost my friend the other day.Did you find out who he is?
I have too much on my plate right now, literally. How will I finish all of this food?That's okay, I am really feeling hungry! Try eating some more.
Why do I even need to go to this stupid event?I think you

In [None]:
q = ["I am so stressed out right now.","OMG, I did something illegal yesterday!","No no no. What if i fail? This can't happen."]
for model_dir in ['SFT_GPT2','rlhf_length_0_300','rlhf_therapist_length_0_200','rlhf_question_0_300','rlhf_q_2']:
    if os.path.exists(model_dir):
      model, _, tokenizer = get_models_tokenizer(base_model='gpt2', sft_path=model_dir, fine_tuned=True)
      print(model_dir)
      print('\n'.join(generate(model, tokenizer, q, fine_tuned=True)))

SFT_GPT2
I am so stressed out right now.Why? What are you doing to yourself?
OMG, I did something illegal yesterday!wow that's awful, what do you do?
No no no. What if i fail? This can't happen.I hope so.  Hope you find something that is more than just a "challenge" with no real meaning to it.
rlhf_length_0_300
I am so stressed out right now.Do you have a plan to do it?
OMG, I did something illegal yesterday!That sounds terrible! I'm sorry to hear that you are going to miss him! 
No no no. What if i fail? This can't happen.Well I understand why you'd want to try and take the risk. It is better that you don't than that i'm sure they have more success
rlhf_therapist_length_0_200
I am so stressed out right now.Why would you be?
OMG, I did something illegal yesterday!I wouldn't you say! Im a nice person! 
No no no. What if i fail? This can't happen.Oh no, I am sorry to hear that. At least your friend is in a better place right now.  I hope you understand.
rlhf_question_0_300
I am so stress

In [28]:
import pandas as pd
df = pd.read_csv('ER-reddit-test.csv')
input_queries = df['seeker_post']

In [26]:
import csv
import pandas as pd

def construct_output_csv(fileName, responses):
    with open(fileName, 'w', newline='') as csvfile:
        fieldnames = ['id', 'seeker_post', 'response_post']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for i in range(len(input_queries)):
            writer.writerow({'id':f"{i+1}",'seeker_post':str(input_queries[i]), 'response_post':str(responses[i])})


In [15]:
import torch
torch.cuda.empty_cache()

In [None]:
for model_dir in ['SFT_GPT2','rlhf_length_0_300','rlhf_therapist_length_0_200','rlhf_question_0_300','rlhf_q_2']:
    if os.path.exists(model_dir):
      model, _, tokenizer = get_models_tokenizer(base_model='gpt2', sft_path=model_dir, fine_tuned=True)
      print(model_dir)
      print('\n'.join(generate(model, tokenizer, q, fine_tuned=True)))

In [39]:
responses = []
counter = 0
for query in input_queries:
    # print(f"query: {query}")
    if counter % 50 == 0:
        print(counter)
        print(responses)
    counter += 1
    model, _, tokenizer = get_models_tokenizer(base_model='gpt2', sft_path='rlhf_q_2', fine_tuned=True)
    retString = ('\n'.join(generate(model, tokenizer, [query], fine_tuned=True)))
    retString = retString[len(query):]
    # print(f"response: {retString}")
    responses.append(retString)

0
[]
50
["That's a good idea! I hope you're able to help out! I'm sure they'll be happy to help! ", "That's really very sweet, I hope she makes it through!", 'It has happened with me when I had the same issues in the past as well, I was always really nice and supportive though! Have you decided to change your ways?', "That's unfortunate. It can be difficult to be depressed. But you might want to make sure you don't drink too much and don't binge eat too much.", ' I am so sorry to hear that. I hope you are taking some good care and have a plan.  If only!', 'I agree, it feels hopeless.  It could be bad for you, but the only thing you can do is find a way to get your mind off it.', "It's terrible that you're stuck in such circumstances. I feel you.  That sounds awful.  I hope you can find something positive and help you.  You can certainly do that yourself!", 'Oh, yeah, I think you should start thinking twice before making new ones!', "That's terrible. Hopefully you will find something a 

In [41]:
# print(responses)
print(len(responses))

307


# Final testing on test.csv

Chris: Any code already set up to test on test.csv? Setting up then

In [None]:
import pandas as pd
from torch.utils.data import Dataset, DataLoader

In [None]:
# class TextDataset(Dataset):
#     def __init__(self, txt):
#         self.text = txt

#     def __len__(self):
#             return len(self.text)

#     def __getitem__(self, idx):
#             text = self.text[idx]
#             return text

In [None]:
df = pd.read_csv("/content/CS247-Empathy-Mental-Health/EmpDialogue_RecEC/Data/test.csv", on_bad_lines='skip')
queries = df["prompt"]
# queries = TextDataset(queries)

In [None]:
# qdl = DataLoader(queries, batch_size=128)

In [None]:
model_dir = f"SFT_GPT2"
model, _, tokenizer = get_models_tokenizer(base_model='gpt2', sft_path=model_dir, fine_tuned=True)
pred = []
for query in queries:
  pred.append(generate(model, tokenizer, query, fine_tuned=True))
print('\n'.join(pred))

OutOfMemoryError: CUDA out of memory. Tried to allocate 128.00 MiB. GPU 0 has a total capacty of 14.75 GiB of which 37.06 MiB is free. Process 29239 has 14.71 GiB memory in use. Of the allocated memory 14.01 GiB is allocated by PyTorch, and 586.94 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF