In [1]:
!pip install transformers gradio sentencepiece --quiet

[K     |████████████████████████████████| 5.8 MB 4.8 MB/s 
[K     |████████████████████████████████| 11.6 MB 15.2 MB/s 
[K     |████████████████████████████████| 1.3 MB 56.2 MB/s 
[K     |████████████████████████████████| 182 kB 48.6 MB/s 
[K     |████████████████████████████████| 7.6 MB 55.5 MB/s 
[K     |████████████████████████████████| 106 kB 64.9 MB/s 
[K     |████████████████████████████████| 54 kB 3.9 MB/s 
[K     |████████████████████████████████| 278 kB 76.7 MB/s 
[K     |████████████████████████████████| 213 kB 83.8 MB/s 
[K     |████████████████████████████████| 55 kB 2.0 MB/s 
[K     |████████████████████████████████| 84 kB 4.4 MB/s 
[K     |████████████████████████████████| 2.3 MB 45.3 MB/s 
[K     |████████████████████████████████| 84 kB 3.8 MB/s 
[K     |████████████████████████████████| 56 kB 5.3 MB/s 
[K     |████████████████████████████████| 64 kB 3.3 MB/s 
[K     |████████████████████████████████| 80 kB 10.4 MB/s 
[K     |████████████████████████████

In [2]:
import torch
import gradio as gr
from transformers import GPT2LMHeadModel,  GPT2Tokenizer, GPT2Config
from transformers import AutoConfig, AutoModelWithLMHead, AutoTokenizer

In [3]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

In [4]:
#TITLE GENERATION
TITLE_MAX_LEN = 20

TITLE_MODEL_PATH = '/content/drive/MyDrive/nlp/title_generation_2.pt'
title_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-common_gen")
title_tokenizer.pad_token = title_tokenizer.eos_token
title_config = AutoConfig.from_pretrained("mrm8488/t5-base-finetuned-common_gen")
title_model = AutoModelWithLMHead.from_pretrained(TITLE_MODEL_PATH, config = title_config)
title_model.to(device)
title_model.tie_weights()
title_model.eval()

Downloading:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.27k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/792k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.79k [00:00<?, ?B/s]



T5ForConditionalGeneration(
  (shared): Embedding(32128, 768)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 768)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=768, out_features=768, bias=False)
              (k): Linear(in_features=768, out_features=768, bias=False)
              (v): Linear(in_features=768, out_features=768, bias=False)
              (o): Linear(in_features=768, out_features=768, bias=False)
              (relative_attention_bias): Embedding(32, 12)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=768, out_features=3072, bias=False)
              (wo): Linear(in_features=3072, out_features=768, bias=False)
              (dropout): Dro

In [5]:
#ARTICLE GENERATION
bos = '<|startoftext|>'
eos = '<|EOS|>'
body = '<|body|>'
MAX_LEN = 200

MODEL_PATH = '/content/drive/MyDrive/nlp/article_generation.pt'
tokenizer = GPT2Tokenizer.from_pretrained("/content/drive/MyDrive/nlp/article_generation_token")

config = GPT2Config.from_json_file('/content/drive/MyDrive/nlp/config.json')

model = GPT2LMHeadModel.from_pretrained(MODEL_PATH, config=config)
#model.resize_token_embeddings(len(tokenizer))
model.to(device)
model.tie_weights()
model.eval()


GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50261, 1024)
    (wpe): Embedding(1024, 1024)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0): GPT2Block(
        (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
      (1): GPT2Block(
        (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout)

In [7]:
def generation(Keywords, Temperature, Top_P):
    keywords = []
    keywords.append(Keywords)  
    inputs = title_tokenizer([sentence for sentence in keywords], return_tensors="pt", padding=True)
    if Temperature == 0:
      Temperature = 1.0
    if Top_P == 0:
      Top_P = 1.0
    title_output_sequences = title_model.generate(
        input_ids=inputs["input_ids"].to(device, dtype=torch.long),
        attention_mask=inputs["attention_mask"].to(device, dtype=torch.long),
        do_sample=True,
        max_length = TITLE_MAX_LEN,
        repetition_penalty= 1.5,
        top_p= Top_P,
        temperature= Temperature,
    )

    title = title_tokenizer.batch_decode(title_output_sequences, skip_special_tokens=True)


    articles = {}
    for headline in title:
      headline_c = ' '.join([bos, headline,  body])
      input = tokenizer(headline_c, return_tensors="pt", padding=True)
      article = model.generate(
          input_ids=input["input_ids"].to(device, dtype=torch.long),
          attention_mask=input["attention_mask"].to(device, dtype=torch.long),
          do_sample=True,
          max_length = MAX_LEN,
          repetition_penalty= 1.5,
          top_p= Top_P,
          temperature= Temperature,
      )
      decoded_article = tokenizer.decode(article[0], skip_special_tokens=True)
      articles[headline] = decoded_article.replace(headline, '')

    for title, article in articles.items():
      article = article

    return title, article 
output1 = gr.Textbox(label="TITLE")
output2 = gr.Textbox(label="ARTICLE")
    
demo = gr.Interface(
    fn=generation,
    inputs=["text",gr.Slider(0, 1),gr.Slider(0, 1)],
    outputs=[output1, output2]
)


demo.launch()

Colab notebook detected. To show errors in colab notebook, set `debug=True` in `launch()`
Note: opening Chrome Inspector may crash demo inside Colab notebooks.

To create a public link, set `share=True` in `launch()`.


<IPython.core.display.Javascript object>

