In [1]:
!pip install transformers sentencepiece gradio

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.25.1-py3-none-any.whl (5.8 MB)
[K     |████████████████████████████████| 5.8 MB 17.7 MB/s 
[?25hCollecting sentencepiece
  Downloading sentencepiece-0.1.97-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[K     |████████████████████████████████| 1.3 MB 32.6 MB/s 
[?25hCollecting gradio
  Downloading gradio-3.13.0-py3-none-any.whl (13.8 MB)
[K     |████████████████████████████████| 13.8 MB 48.4 MB/s 
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 31.8 MB/s 
Collecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.11.1-py3-none-any.whl (182 kB)
[K     |████████████████████████████████| 182 kB 59.0 MB/s 
Collecting paramiko
  Downloading paramiko-2.12.0-py2.

In [2]:
import torch

import re

import gradio as gr

In [3]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, T5Tokenizer, T5ForConditionalGeneration

chatbot_tokenizer = AutoTokenizer.from_pretrained("facebook/blenderbot-400M-distill")
chatbot_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/blenderbot-400M-distill")

shakespeare_tokenizer = T5Tokenizer.from_pretrained('t5-base')
shakespeare_model = T5ForConditionalGeneration.from_pretrained('t5-base', pad_token_id=shakespeare_tokenizer.eos_token_id)
shakespeare_tokenizer.pad_token = shakespeare_tokenizer.eos_token

Downloading:   0%|          | 0.00/1.15k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.57k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/127k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/62.9k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/16.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/772 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/730M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/792k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.
- Be aware that you SHOULD NOT rely on t5-base automatically truncating your input to 512 when padding/encoding.
- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.


Downloading:   0%|          | 0.00/892M [00:00<?, ?B/s]

In [4]:
from google.colab import drive

import shutil

drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
shakespeare_model.load_state_dict(torch.load('/content/drive/MyDrive/ECE1786_Project/Models/T5_Base_Models/t5-base_0.0001lr_5epochs.pt', map_location=torch.device('cpu')))

<All keys matched successfully>

In [28]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

chatbot_model = chatbot_model.to(device)
shakespeare_model = shakespeare_model.to(device)

In [40]:
def get_chatbot_output(input_text, temperature):
    
    chatbot_input = chatbot_tokenizer.encode(input_text)
    chatbot_output = chatbot_model.generate(input_ids=torch.tensor([chatbot_input]).to(device), do_sample=True, max_new_tokens=30, temperature=temperature)
    chatbot_output_string = chatbot_tokenizer.decode(chatbot_output[0], skip_special_tokens=True).strip()
    
    return chatbot_output_string

In [41]:
def split_chatbot_output(chatbot_output_string):

    # Get punctuations in order, as re.split() will ignore them
    punctuation_ordered = []
    for i in chatbot_output_string:
        if (i=='.' or i=='?' or i=='!'):
            punctuation_ordered.append(i)

    # Split chatbot output to list without punctuations
    chatbot_output_list = re.split('[.!?]', chatbot_output_string)
    
    # Remove empty strings and add respective punctuations
    chatbot_output_list_cleaned = []
    for i in range(len(chatbot_output_list)):
        if (chatbot_output_list[i] == ''):
            continue
        chatbot_output_list_cleaned.append(chatbot_output_list[i].strip()+punctuation_ordered[i])

    return chatbot_output_list_cleaned

In [42]:
def get_shakespeare_output(chatbot_output_list_cleaned, temperature):
    
    shakespeare_output_string = ''
    shakespeare_output_list = []

    for i in range(len(chatbot_output_list_cleaned)):
        shakespeare_input = shakespeare_tokenizer.encode(chatbot_output_list_cleaned[i])
        shakespeare_output = shakespeare_model.generate(input_ids=torch.tensor([shakespeare_input]).to(device), do_sample=True, max_new_tokens=50, temperature=temperature)
        shakespeare_output_list.append(shakespeare_tokenizer.decode(shakespeare_output[0], skip_special_tokens=True).strip())

    for i in range(len(shakespeare_output_list)):
        shakespeare_output_string = shakespeare_output_string +' '+shakespeare_output_list[i]

    shakespeare_output_string = shakespeare_output_string.strip()

    return shakespeare_output_string

In [49]:
def Shakespeare_Chatbot(Your_Input, Chatbot_Temperature, Shakespeare_Temperature):

    chatbot_output_string = get_chatbot_output(Your_Input, Chatbot_Temperature)

    chatbot_output_list_cleaned = split_chatbot_output(chatbot_output_string)

    shakespeare_output_string = get_shakespeare_output(chatbot_output_list_cleaned, Shakespeare_Temperature)

    return chatbot_output_string, shakespeare_output_string

In [52]:
output_1 = gr.Textbox(label="Original Chatbot")
output_2 = gr.Textbox(label="Shakespeare Chatbot")

demo = gr.Interface(fn=Shakespeare_Chatbot, inputs=["text",gr.Slider(0, 2, 0.1), gr.Slider(0, 2, 0.1)], outputs=[output_1,output_2])

demo.launch()

Colab notebook detected. To show errors in colab notebook, set `debug=True` in `launch()`
Note: opening Chrome Inspector may crash demo inside Colab notebooks.

To create a public link, set `share=True` in `launch()`.


<IPython.core.display.Javascript object>



In [21]:
def Shakespeare_Chatbot(input_text):

    # Get output from Chatbot    
    chatbot_input = chatbot_tokenizer.encode(input_text)
    chatbot_output = chatbot_model.generate(input_ids=torch.tensor([chatbot_input]).to(device), do_sample=True, max_new_tokens=30, temperature=0.9)

    chatbot_output_string = chatbot_tokenizer.decode(chatbot_output[0], skip_special_tokens=True).strip()

    print(chatbot_output_string)

    # Get punctuations in order
    punctuation_ordered = []
    for i in chatbot_output_string:
        if (i=='.' or i=='?' or i=='!'):
            punctuation_ordered.append(i)


    # Split chatbot output to list without punctuations
    chatbot_output_list = re.split('[.!?]', chatbot_output_string)


    # Remove empty strings and add respective punctuations
    chatbot_output_list_cleaned = []

    for i in range(len(chatbot_output_list)):
        if (chatbot_output_list[i] == ''):
            continue
        chatbot_output_list_cleaned.append(chatbot_output_list[i].strip()+punctuation_ordered[i])
    


    # Convert each chatbot output to Shakespeare style
    shakespeare_output_list = []

    for i in range(len(chatbot_output_list_cleaned)):

        shakespeare_input = shakespeare_tokenizer.encode(chatbot_output_list_cleaned[i])
        shakespeare_output = shakespeare_model.generate(input_ids=torch.tensor([shakespeare_input]).to(device), do_sample=True, max_new_tokens=50, temperature=0.9)
        #print(chatbot_output_list_cleaned[i])
        shakespeare_output_list.append(shakespeare_tokenizer.decode(shakespeare_output[0], skip_special_tokens=True).strip())
    


    # Get output string
    shakespeare_output_string = ''

    for i in range(len(shakespeare_output_list)):
        shakespeare_output_string = shakespeare_output_string +' '+shakespeare_output_list[i]

    shakespeare_output_string = shakespeare_output_string.strip()

    print(shakespeare_output_string)

    return chatbot_output_string, shakespeare_output_string


In [25]:
output_1 = gr.Textbox(label="Original Chatbot")
output_2 = gr.Textbox(label="Shakespeare Chatbot")

demo = gr.Interface(fn=Shakespeare_Chatbot, inputs="text", outputs=[output_1,output_2])

demo.launch()

Colab notebook detected. To show errors in colab notebook, set `debug=True` in `launch()`
Note: opening Chrome Inspector may crash demo inside Colab notebooks.

To create a public link, set `share=True` in `launch()`.


<IPython.core.display.Javascript object>

