# Text Generation using Transformers

## Installing Libraries

In [1]:
%%capture
!pip install transformers torch

## Importing Libraries

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer

## Loading pre-trained Model & Tokenizer

In [4]:
# Load pre-trained 'GPT-2' model and tokenizer
model_name = "gpt2-large"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)

## Setting up Model & Tokenizer

In [5]:
# Set the model to evaluation mode (no training)
model.eval()

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 1280)
    (wpe): Embedding(1024, 1280)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-35): 36 x GPT2Block(
        (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=1280, out_features=50257, bias=False)
)

In [6]:
# Setting pad token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

model.generation_config.pad_token_id = tokenizer.pad_token_id

## Generating Text

In [7]:
# Function to generate text
def generate_text_from_model(prompt, max_length=50, temperature=0.7):

    # Encoding prompt to token
    input_ids = tokenizer.encode(prompt, return_tensors="pt")

    # Generate text
    output = model.generate(
        input_ids,
        max_length=max_length,
        num_return_sequences=1,
        no_repeat_ngram_size=2,
        top_k=50,
        top_p=0.95,
        temperature=temperature,
    )

    # Decode and return generated text
    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)

    return generated_text

In [8]:
# Generate text with a prompt
def text_generation():

  # Prompt input from user
  prompt = str(input('Enter prompt: '))

  # Generating text from model
  generated_text = generate_text_from_model(prompt, max_length = 100)

  # Printing generated text
  print(generated_text)

In [9]:
text_generation()

Enter prompt: I love my nation India
I love my nation India. I love India's culture. But I also love the people of India, and I want to see them prosper. And I think that's what we're going to do. We're not going into a war with India."

The Indian government has been trying to get the U.S. to stop selling arms to Pakistan, which is accused of sponsoring terrorism.
.@POTUS: "We're gonna work with the Indian people to make sure that they


In [10]:
text_generation()

Enter prompt: I am a cricketer
I am a cricketer, and I am not a cricket fan. I have never watched a Test match. But I do know that the Indian team is a very good team. They have a lot of talent.

"I have seen them play in the World Cup. It is very exciting. The Indian players are very talented. There are a few players who are not so good, but they are good enough to play for India. So I think they will be a good side."


In [11]:
text_generation()

Enter prompt: I love songs of One Direction
I love songs of One Direction, but I don't think they're the best band in the world. I think the band is great, and I love the music, I just don

I think that the song is a little bit too long. It's a bit of a stretch.
The song's not bad, it's just a tad long, so I'm not sure if I'd want to listen to it. But I do think it would be a good song to play in
