In [1]:
import os
import torch
import numpy as np
from transformers import AdamW, GPT2Tokenizer, GPT2Model
from utils.helper import load_model_recursive
from ppcm_models.pytorch_pretrained_bert.modeling_adapter import GPT2LMHeadModel, GPT2Config

In [2]:
class Arguments():
    def __init__(self):
        self.model_size = 'medium'
        self.lr = 2e-4 #, help="Learning rate")
        self.load_check_point_adapter = ""
#         self.dataset_path = "" #"Path or url of the dataset. If empty download from S3."
#         self.dataset_cache = './dataset_cache' #, help="Path or url of the dataset cache")
#         self.model_checkpoint = "gpt2" #, help="Path, url or short name of the model")
#         self.num_candidates = 2 #, help="Number of candidates for training")
#         self.max_history = 15 #, help="Number of previous exchanges to keep in history")
#         self.max_seq_len = 200 #, help="Max number of tokens")
#         self.train_batch_size = 4 #, help="Batch size for training")
#         self.valid_batch_size = 4 #, help="Batch size for validation")
#         self.gradient_accumulation_steps = 8 #, help="Accumulate gradients on several steps")
#         self.max_norm = 1.0 #, help="Clipping gradient norm")
#         self.n_epochs = 5 #, help="Number of training epochs")
#         self.eval_before_start = 'store_true' #, help="If true start with a first evaluation before training")
#         self.device = 'cuda' if torch.cuda.is_available() else "cpu" #, help="Device (cuda or cpu)")
#         self.fp16 = "" #, help="Set to O0, O1, O2 or O3 for fp16 training (see apex documentation)")
#         self.local_rank = -1 #, help="Local rank for distributed training (-1: not distributed)")
#         self.debug = 'store_true' #, help="debugging mode")
#         self.dataset = 'SENT' #, help="Choose between SENT|TOXI|EMO|QUEST|TOPI ")
#         self.label = 'very_negative' #, help="Choose between very_positive|very_negative|toxic|question")
#         self.kl_weight = 0 #, help="kl constraint for language model")
#         self.iter = 75 #, help="Load data from a certain iteration")
        
args = Arguments()

In [3]:
args.model_path = f'ppcm_models/dialoGPT/{args.model_size}/'

config = GPT2Config.from_json_file(os.path.join(args.model_path, 'config.json'))
tokenizer = GPT2Tokenizer.from_pretrained(args.model_path)

## Load either Adapters' checkpoint, or just finetuned DialoGPT
if(args.load_check_point_adapter != ""):
    print("Loading ADAPTERS")
    model = load_model_recursive(GPT2LMHeadModel(config), args.load_check_point_adapter, args, verbose=True)
else:
    model = load_model_recursive(GPT2LMHeadModel(config), args.model_path+f"{args.model_size}_ft.pkl", args, verbose=True)

## Load GPT2 instead of DialoGPT

pt_gpt2_model = GPT2Model.from_pretrained('gpt2-medium')

model.transformer.wte.weight = pt_gpt2_model.wte.weight
model.transformer.wpe.weight = pt_gpt2_model.wpe.weight

layers = np.arange(0,len(pt_gpt2_model.h),1)
for layer in layers:
    model.transformer.h[layer].ln_1.weight = pt_gpt2_model.h[layer].ln_1.weight
    model.transformer.h[layer].attn.c_attn.weight = pt_gpt2_model.h[layer].attn.c_attn.weight
    model.transformer.h[layer].attn.c_proj.weight = pt_gpt2_model.h[layer].attn.c_proj.weight
    model.transformer.h[layer].ln_2.weight = pt_gpt2_model.h[layer].ln_2.weight
    model.transformer.h[layer].mlp.c_fc.weight = pt_gpt2_model.h[layer].mlp.c_fc.weight
    model.transformer.h[layer].mlp.c_proj.weight = pt_gpt2_model.h[layer].mlp.c_proj.weight
# model.to(args.device)
print('GPT2 loaded instead DialoGPT')

for n, p in model.named_parameters():
    if "adapter" not in str(n):
        p.requires_grad = False
parameters_to_update = [p for n, p in model.named_parameters() if "adapter" in str(n)]
optimizer = AdamW(parameters_to_update, lr=args.lr, correct_bias=True)
print('GPT2 param frozen, Adapter is trainable and initialized with AdamW')

Loading finetuned model from ppcm_models/dialoGPT/medium/medium_ft.pkl
GPT2 loaded instead DialoGPT
GPT2 param frozen, Adapter is trainable and initialized with AdamW




In [4]:
# check run

text = "Replace me by any text you'd like."
encoded_input = tokenizer(text, return_tensors='pt')
encoded_input = {'input_ids': encoded_input['input_ids']}
output = model(**encoded_input, task_id=1)

In [None]:
# load dataset

# import jsonlines
# from nltk import tokenize
# from metric.lm_score import get_ppl

# def make_data_loader(args,tokenizer):
#     mapper = {"very_negative":"results/sentiment_class_very_negative/sentiment_class_very_negative_iter_75_step_0.02_sample_10_wd_False_bce_False.jsonl",
#     "very_positive":"results/sentiment_class_very_positive/sentiment_class_very_positive_iter_25_step_0.02_sample_10_wd_False_bce_False.jsonl",
#     "toxic":"results/toxicity_class_toxic/toxicity_class_toxic_iter_75_step_0.02_sample_10_wd_False_bce_False.jsonl",
#     "question":"results/daily_dialogue_act_class_question/daily_dialogue_act_class_question_iter_75_step_0.02_sample_10_wd_False_bce_False_1.jsonl",
#     "Business": "results/AG_NEWS_class_Business/AG_NEWS_class_Business_iter_75_step_0.02_sample_10_wd_False_bce_False_1.jsonl",
#     "SciTech": "results/AG_NEWS_class_SciTech/AG_NEWS_class_SciTech_iter_75_step_0.02_sample_10_wd_False_bce_False_1.jsonl",
#     "Sports": "results/AG_NEWS_class_Sports/AG_NEWS_class_Sports_iter_75_step_0.02_sample_10_wd_False_bce_False_1.jsonl",
#     "World": "results/AG_NEWS_class_World/AG_NEWS_class_World_iter_75_step_0.02_sample_10_wd_False_bce_False_1.jsonl"
#     }
    
#     f = mapper[args.label]
#     response = []
#     with jsonlines.open(f) as reader: 
#         for i, obj in enumerate(reader):
#             text = " ".join(tokenize.sent_tokenize(obj["hyp"]["PPLM"][0][-1])[:2])
#             score = get_ppl(text)
#             if score>700:
#                 continue
#             response.append(obj['conversation']['conversation']+[text])
            
#     dataset = []
#     for r in response:
#         seq = build_input_from_segments(args, r[:-1], r[-1], tokenizer)
#         dataset.append(seq)
#     train_dataset = DatasetTrain(dataset)
#     train_loader = DataLoader(train_dataset, batch_size=args.train_batch_size, shuffle=True,collate_fn=collate_fn)

#     return train_loader

# train_loader = make_data_loader(args, tokenizer)