### Install the Huggingface transformers module

In [1]:
! pip -q install transformers



In [3]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
model_size = "small" 
tokenizer = AutoTokenizer.from_pretrained(f"microsoft/DialoGPT-{model_size}")
model = AutoModelForCausalLM.from_pretrained(f"microsoft/DialoGPT-{model_size}")

## Chat with the untrained model

In [3]:
def chat(model, tokenizer, trained=False):
    print("type \"q\" to quit. Automatically quits after 5 messages")

    for step in range(5):
        message = input("MESSAGE: ")

        if message in ["", "q"]:  # if the user doesn't wanna talk
            break

        # encode the new user input, add the eos_token and return a tensor in Pytorch
        new_user_input_ids = tokenizer.encode(message + tokenizer.eos_token, return_tensors='pt')

        # append the new user input tokens to the chat history
        bot_input_ids = torch.cat([chat_history_ids, new_user_input_ids], dim=-1) if step > 0 else new_user_input_ids

        # generated a response while limiting the total chat history to 1000 tokens, 
        if (trained):
            chat_history_ids = model.generate(
                bot_input_ids, 
                max_length=1000,
                pad_token_id=tokenizer.eos_token_id,  
                no_repeat_ngram_size=3,       
                do_sample=True, 
                top_k=100, 
                top_p=0.7,
                temperature = 0.8, 
            )
        else:
            chat_history_ids = model.generate(
                bot_input_ids, 
                max_length=1000, 
                pad_token_id=tokenizer.eos_token_id,
                no_repeat_ngram_size=3
            )

        # pretty print last ouput tokens from bot
        print("DialoGPT: {}".format(tokenizer.decode(chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True)))

chat(model, tokenizer)

type "q" to quit. Automatically quits after 5 messages


MESSAGE:  maaz


DialoGPT: 


MESSAGE:  hahha


DialoGPT: 


MESSAGE:  where are u from?


DialoGPT: I'm in the US and I've never heard of this.


MESSAGE:  who are u


DialoGPT: I've never seen this before.


MESSAGE:  do u like me


DialoGPT: I like you


It's capable of holding a conversation, but doesn't resemble Rick Sanchez at all yet

## Configuring the model

In [4]:
import glob, logging, os, pickle, random, re, torch, pandas as pd, numpy as np
from typing import Dict, List, Tuple
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader, Dataset, RandomSampler, SequentialSampler
from tqdm.notebook import tqdm, trange
from pathlib import Path
from transformers import (
    AdamW,
    AutoConfig,
    AutoModelForCausalLM,
    AutoTokenizer,
    PreTrainedModel,
    PreTrainedTokenizer,
    get_linear_schedule_with_warmup,
)
    
try:
    from torch.utils.tensorboard import SummaryWriter
except ImportError:
    from tensorboardX import SummaryWriter

logger = logging.getLogger(__name__)

# Args to allow for easy convertion of python script to notebook
class Args():
    def __init__(self):
        self.output_dir = f'output-{model_size}'
        self.model_type = 'gpt2'
        self.model_name_or_path = f'microsoft/DialoGPT-{model_size}'
        self.config_name = f'microsoft/DialoGPT-{model_size}'
        self.tokenizer_name = f'microsoft/DialoGPT-{model_size}'
        self.cache_dir = 'cached'
        self.block_size = 512
        self.per_gpu_train_batch_size = 4
        self.gradient_accumulation_steps = 1
        self.learning_rate = 5e-5
        self.weight_decay = 0.0
        self.adam_epsilon = 1e-8
        self.max_grad_norm = 1.0
        self.num_train_epochs = 10  # 3
        self.max_steps = -1
        self.warmup_steps = 0
        self.logging_steps = 1000
        self.save_total_limit = None
        self.seed = 42
        self.local_rank = -1

args = Args()

In [5]:
df = pd.read_csv("../input/harry-potter-final-data/final_data.csv")

'''contexted = []
n = 7

for i in range(n, len(data['line'])):
  row = []
  prev = i - 1 - n
  for j in range(i, prev, -1):
    row.append(data['line'][j])
  contexted.append(row) 

columns = ['response'] + ['context '+str(i+1) for i in range(n)]
df = pd.DataFrame.from_records(contexted, columns=columns)'''
df.head(5)

Unnamed: 0,response,context1,context2,context3,context4,context5,context6,context7
0,Do you think it wise to trust Hagrid with some...,Hagrid is bringing him.,And the boy?,The good and the bad.,"I'm afraid so, professor.","Are the rumors true, Albus?","Good evening, Professor Dumbledore.","I should've known that you would be here, Prof..."
1,"Ah, Professor, I would trust Hagrid with my life.",Do you think it wise to trust Hagrid with some...,Hagrid is bringing him.,And the boy?,The good and the bad.,"I'm afraid so, professor.","Are the rumors true, Albus?","Good evening, Professor Dumbledore."
2,"Professor Dumbledore, sir.","Ah, Professor, I would trust Hagrid with my life.",Do you think it wise to trust Hagrid with some...,Hagrid is bringing him.,And the boy?,The good and the bad.,"I'm afraid so, professor.","Are the rumors true, Albus?"
3,Professor McGonagall.,"Professor Dumbledore, sir.","Ah, Professor, I would trust Hagrid with my life.",Do you think it wise to trust Hagrid with some...,Hagrid is bringing him.,And the boy?,The good and the bad.,"I'm afraid so, professor."
4,"No problems, I trust, Hagrid?",Professor McGonagall.,"Professor Dumbledore, sir.","Ah, Professor, I would trust Hagrid with my life.",Do you think it wise to trust Hagrid with some...,Hagrid is bringing him.,And the boy?,The good and the bad.


In [6]:
len(df)

4918

In [8]:
def construct_conv(row, tokenizer, eos = True):
    flatten = lambda l: [item for sublist in l for item in sublist]
    conv = list(reversed([tokenizer.encode(x) + [tokenizer.eos_token_id] for x in row]))
    conv = flatten(conv)
    return conv

def load_and_cache_examples(args, tokenizer, df_trn):
    return ConversationDataset(tokenizer, args, df_trn)

def set_seed(args):
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

class ConversationDataset(Dataset):
    def __init__(self, tokenizer: PreTrainedTokenizer, args, df, block_size=512):

        block_size = block_size - (tokenizer.model_max_length - tokenizer.max_len_single_sentence)
        directory = args.cache_dir
        cached_features_file = os.path.join(directory, args.model_type + "_cached_lm_" + str(block_size))

        logger.info("Creating features from dataset file at %s", directory)
        self.examples = []
        for _, row in df.iterrows():
            conv = construct_conv(row, tokenizer)
            self.examples.append(conv)

        logger.info("Saving features into cached file %s", cached_features_file)
        with open(cached_features_file, "wb") as handle:
            pickle.dump(self.examples, handle, protocol=pickle.HIGHEST_PROTOCOL)

    def __len__(self):
        return len(self.examples)

    def __getitem__(self, item):
        return torch.tensor(self.examples[item], dtype=torch.long)

In [10]:
def train(args, train_dataset, model: PreTrainedModel, tokenizer: PreTrainedTokenizer) -> Tuple[int, float]:
    if args.local_rank in [-1, 0]:
        tb_writer = SummaryWriter()

    args.train_batch_size = args.per_gpu_train_batch_size * max(1, args.n_gpu)

    def collate(examples: List[torch.Tensor]):
        if tokenizer._pad_token is None:
            return pad_sequence(examples, batch_first=True)
        return pad_sequence(examples, batch_first=True, padding_value=tokenizer.pad_token_id)

    train_sampler = RandomSampler(train_dataset) if args.local_rank == -1 else DistributedSampler(train_dataset)
    train_dataloader = DataLoader(
        train_dataset, sampler=train_sampler, batch_size=args.train_batch_size, collate_fn=collate, drop_last = True
    )

    t_total = len(train_dataloader) // args.gradient_accumulation_steps * args.num_train_epochs
    model = model.module if hasattr(model, "module") else model  # Take care of distributed/parallel training
    model.resize_token_embeddings(len(tokenizer))

    # Prepare optimizer and schedule (linear warmup and decay)
    no_decay = ["bias", "LayerNorm.weight"]
    optimizer_grouped_parameters = [
        {
            "params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
            "weight_decay": args.weight_decay,
        },
        {"params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], "weight_decay": 0.0},
    ]
    optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon)
    scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=args.warmup_steps, num_training_steps=t_total
    )

    logger.info("*** Running trainng, Num examples = %d, Num Epochs = %d ***", len(train_dataset), args.num_train_epochs)

    global_step, epochs_trained = 0, 0
    tr_loss, logging_loss = 0.0, 0.0

    model.zero_grad()
    train_iterator = trange(
        epochs_trained, int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0]
    )
    set_seed(args)  # Added here for reproducibility
    for _ in train_iterator:
        epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0])
        for step, batch in enumerate(epoch_iterator):
            
            inputs, labels = (batch, batch)
            if inputs.shape[1] > 1024: continue
            inputs = inputs.to(args.device)
            labels = labels.to(args.device)
            model.train()
            outputs = model(inputs, labels=labels)
            loss = outputs[0]  # model outputs are always tuple in transformers (see doc)

            if args.gradient_accumulation_steps > 1:
                loss = loss / args.gradient_accumulation_steps

            loss.backward()

            tr_loss += loss.item()
            if (step + 1) % args.gradient_accumulation_steps == 0:
                torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm)
                optimizer.step()
                scheduler.step()  # Update learning rate schedule
                model.zero_grad()
                global_step += 1

                if args.local_rank in [-1, 0] and args.logging_steps > 0 and global_step % args.logging_steps == 0:
                    # Log metrics
                    tb_writer.add_scalar("lr", scheduler.get_lr()[0], global_step)
                    tb_writer.add_scalar("loss", (tr_loss - logging_loss) / args.logging_steps, global_step)
                    logging_loss = tr_loss

    tb_writer.close()

    return global_step, tr_loss / global_step

# Main Runner

Here we're simply setting up the logger and starting the training!

In [11]:
def main(df_trn):
    args = Args()
    
    # Setup CUDA, GPU & distributed training
    device = torch.device("cuda")
    args.n_gpu = torch.cuda.device_count()
    args.device = device

    # Setup logging
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN,
    )
    logger.warning("Process rank: %s, device: %s, n_gpu: %s", args.local_rank, device, args.n_gpu)

    set_seed(args) # Set seed

    config = AutoConfig.from_pretrained(args.config_name, cache_dir=args.cache_dir)
    tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_name, cache_dir=args.cache_dir)
    model = AutoModelForCausalLM.from_pretrained(args.model_name_or_path, from_tf=False, config=config, cache_dir=args.cache_dir)
    model.to(args.device)
    
    # Training
    train_dataset = load_and_cache_examples(args, tokenizer, df_trn)
    global_step, tr_loss = train(args, train_dataset, model, tokenizer)
    logger.info(" global_step = %s, average loss = %s", global_step, tr_loss)

    # Saving best-practices: if you use save_pretrained for the model and tokenizer, you can reload them using from_pretrained()
    os.makedirs(args.output_dir, exist_ok=True)

    logger.info("Saving model checkpoint to %s", args.output_dir)
    model_to_save = (model.module if hasattr(model, "module") else model)  # Take care of distributed/parallel training
    model_to_save.save_pretrained(args.output_dir)
    tokenizer.save_pretrained(args.output_dir)

    # Good practice: save your training arguments together with the trained model
    torch.save(args, os.path.join(args.output_dir, "training_args.bin"))

    # Load a trained model and vocabulary that you have fine-tuned
    model = AutoModelForCausalLM.from_pretrained(args.output_dir)
    tokenizer = AutoTokenizer.from_pretrained(args.output_dir)
    model.to(args.device)

# Lets Run it!
This should take around 5 minutes so you might as well go grab a cup of coffee ☕️

In [12]:
main(df)

Downloading:   0%|          | 0.00/641 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/614 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/351M [00:00<?, ?B/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1229 [00:00<?, ?it/s]



Iteration:   0%|          | 0/1229 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1229 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1229 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1229 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1229 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1229 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1229 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1229 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1229 [00:00<?, ?it/s]

# Chatting with the trained bot

In [16]:
tokenizer = AutoTokenizer.from_pretrained(f'microsoft/DialoGPT-{model_size}')
model = AutoModelForCausalLM.from_pretrained(f'output-{model_size}')
chat(model, tokenizer, trained=True)

type "q" to quit. Automatically quits after 5 messages


MESSAGE:  How are u Harry?


DialoGPT: I'm fine. Go.


MESSAGE:  Where do you live?


DialoGPT: This is Scabbers, by the way.


MESSAGE:  Who are u?


DialoGPT: Lobby sir, Lobby the house elf.


MESSAGE:  Whats your name?


DialoGPT: Oh, sorry sir. I’m Harry, sir, Harry Potter.


KeyboardInterrupt: Interrupted by user

That's more like it!

In [1]:
!apt-get install git-lfs

Reading package lists... Done
Building dependency tree       
Reading state information... Done
The following NEW packages will be installed:
  git-lfs
0 upgraded, 1 newly installed, 0 to remove and 12 not upgraded.
Need to get 2129 kB of archives.
After this operation, 7662 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu bionic/universe amd64 git-lfs amd64 2.3.4-1 [2129 kB]
Fetched 2129 kB in 0s (18.9 MB/s)
debconf: delaying package configuration, since apt-utils is not installed
Selecting previously unselected package git-lfs.
(Reading database ... 102229 files and directories currently installed.)
Preparing to unpack .../git-lfs_2.3.4-1_amd64.deb ...
Unpacking git-lfs (2.3.4-1) ...
Setting up git-lfs (2.3.4-1) ...


Git needs an email address:

In [None]:
userEmail = input("Enter git email: ")
!git config --global user.email "$userEmail" 

In [None]:
model_name = "RickBotExample" # you can change this, make sure it doesn't contain any spaces though

conversational_tag = """---
tags:
- conversational
---
# RickBot built for [Chai](https://chai.ml/)
Make your own [here](https://colab.research.google.com/drive/1LtVm-VHvDnfNy7SsbZAqhh49ikBwh1un?usp=sharing)"""

model.push_to_hub(model_name)
! echo "$conversational_tag" > "$model_name/README.md"
tokenizer.push_to_hub(model_name)

! rm -r "$model_name/"   # clean up local directory

Install the chaipi package:

In [None]:
!pip install --upgrade chaipy

Setup the notebook:

In [None]:
import chai_py
chai_py.setup_notebook()

In [None]:
from chai_py.auth import set_auth

DEV_UID = input("Enter dev UID: ")
DEV_KEY = input("Enter dev key: ")
set_auth(DEV_UID, DEV_KEY)

In [None]:
user_name = !huggingface-cli whoami
userPlusModel = f"{user_name[0]}/{model_name}"
%store userPlusModel > bot/myArguments.txt

In [None]:
%%write_and_run bot bot.py Bot
import json
import requests
import time
from chai_py import ChaiBot, Update

f = open("myArguments.txt", "r")
userPlusModel = f.read()

class Bot(ChaiBot):
    
    def setup(self):
        self.ENDPOINT = f"https://api-inference.huggingface.co/models/{userPlusModel}"
        self.headers = { "Authorization": "Bearer api_oieZbocfGuGxzuQozzaqpFYnBrpBsSLwzP" }
        self.first_response = "Hey, I'm Rick" # you can change this

    async def on_message(self, update: Update) -> str:
        if update.latest_message.text == self.FIRST_MESSAGE_STRING:
            return self.first_response
        payload = await self.get_payload(update)
        return self.query(payload)

    def query(self, payload):
        data = json.dumps(payload)
        response = requests.post(self.ENDPOINT, headers=self.headers, data=data)

        if (response.status_code == 503):  # This means we need to wait for the model to load 😴.
            estimated_time = response.json()["estimated_time"]
            time.sleep(estimated_time)
            data = json.loads(data)
            data["options"] = {"use_cache": False, "wait_for_model": True}
            data = json.dumps(data)
            response = requests.post(self.ENDPOINT, headers=self.headers, data=data)

        return json.loads(response.content.decode("utf-8"))["generated_text"]

    async def get_payload(self, update):
        past_user_inputs = ["Hey"]  # You can add to this!
        generated_responses = [self.first_response]  # and this!
        return {
            "inputs": {
                "past_user_inputs": past_user_inputs,
                "generated_responses": generated_responses,
                "text": update.latest_message.text,
            },
        }

In [None]:
from chai_py import package, Metadata, upload_and_deploy, wait_for_deployment, share_bot

package(
    Metadata(
        name=model_name,
        image_url="https://live.staticflickr.com/65535/48185490292_1896035611_b.jpg",
        color="0000ff",
        description="Pickle Rick!",
        input_class=Bot,
        developer_uid=DEV_UID,
        memory=3000,
    )
)

print()
bot_uid = upload_and_deploy("bot/_package.zip")
wait_for_deployment(bot_uid)
share_bot(bot_uid)