# Imports

In [2]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# Load Models

In [3]:

# Load pre-trained models
model_name_gpt = "gpt2"
model_name_lama = "microsoft/Llama2-7b-WhoIsHarryPotter"

model_gpt = GPT2LMHeadModel.from_pretrained(model_name_gpt)
tokenizer_gpt = GPT2Tokenizer.from_pretrained(model_name_gpt)

tokenizer_lama = AutoTokenizer.from_pretrained(model_name_lama)
model_lama = AutoModelForCausalLM.from_pretrained(model_name_lama)

# Set the device to GPU if available, otherwise use CPU
device = "cuda" if torch.cuda.is_available() else "cpu"
model_gpt.to(device)
model_lama.to(device)

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 4096, padding_idx=0)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)
          (up_proj): Linear(in_features=4096, out_features=11008, bias=False)
          (down_proj): Linear(in_features=11008, out_features=4096, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSNorm()
      )
    )
    (norm): LlamaRMSNo

# Interaction Loop

In [4]:
while True:
    # Prompt the user for input
    user_input = input("Enter a prompt (or type 'exit' to quit): ")
    if user_input.lower() == "exit":
        print("Goodbye!")
        break

    # GPT response
    # Encode the user's input text to tensor
    input_ids = tokenizer_gpt.encode(user_input, return_tensors="pt").to(device)
    # Generate text based on the input
    with torch.no_grad():
        output = model_gpt.generate(input_ids, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95)
        
    # Llama response
        input_ids = tokenizer_lama.encode(user_input, return_tensors="pt").to(device)
    # Generate text based on the input
    with torch.no_grad():
        output = model_lama.generate(input_ids, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95)

    # Decode and print the generated text
    generated_text_gpt = tokenizer_gpt.decode(output[0], skip_special_tokens=True)
    generated_text_lama = tokenizer_lama.decode(output[0], skip_special_tokens=True)
    print("")
    print("########################################")
    print("Generated Text from GPT2:")
    print(generated_text_gpt)
    print("")
    print("########################################")
    print("Generated Text from Llama:")
    print(generated_text_lama)
    print("")
    print("########################################")

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



########################################
Generated Text from GPT2:
" means accumulate inwardction De gravitational.. changed De's affiliateomet skin comfort diminish.#

########################################
Generated Text from Llama:
What's your name?

My name is Sherlock Holmes.


########################################


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



########################################
Generated Text from GPT2:
" means's mortarι 6 mortar#$ gravitational#

########################################
Generated Text from Llama:
What is 5 + 6?

########################################


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



########################################
Generated Text from GPT2:
" means ever who "ificationays gravitational.. Dwight accumulate elusive 110 e movingife total't putident Life "tonone diminish. promotion questaysco ently whoards diminish#

########################################
Generated Text from Llama:
What else can you tell me?

I'm happy to provide more information or answer any questions you may have.
Please let me know how I can help.

########################################
Goodbye!
