#### Importing Packages

In [1]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import warnings
warnings.filterwarnings("ignore")

In [2]:
device = "cpu"

if torch.backends.mps.is_available():
    device = "mps"
elif torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"

print(f"Using {device} device.")

Using cuda device.


In [3]:
# Parameters

MAX_LENGTH = 1024  

TEMPRATURE = 0.7  

REPETITION_PENALTY = 1.0

TOP_K = 50  

TOP_P = 0.92 

DO_SAMPLE = True  

NUM_RETUTNR_SEQUENCES = 1 

MODEL_PATH = "Models/New_E6/checkpoint-42000/"

In [4]:
# Load pre-trained model and tokenizer
model = GPT2LMHeadModel.from_pretrained(MODEL_PATH).to('cuda')

tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.add_special_tokens({'pad_token': '[PAD]'})

1

In [5]:

def generate_response(model, tokenizer, conversation_history, new_message, max_length=1024):

    if len(conversation_history) > 0:
        input_text = conversation_history + ' human: ' + new_message + " gpt:"
    else:
        input_text = 'human: ' + new_message + ' gpt:'

    input_ids = tokenizer.encode(input_text, return_tensors='pt').to('cuda')

    output_sequences = model.generate(
        input_ids=input_ids,
        max_length=max_length,
        temperature=TEMPRATURE,
        repetition_penalty=REPETITION_PENALTY,
        top_k=TOP_K,
        top_p=TOP_P,
        do_sample=DO_SAMPLE,
        num_return_sequences=NUM_RETUTNR_SEQUENCES,
        pad_token_id=tokenizer.pad_token_id,  # Set pad token ID to EOS token ID
        attention_mask=input_ids.new_ones(input_ids.shape),  # Provide attention mask
        early_stopping=True, # Stop generation when a pad token is generated,
    )
        
    ### Decode and print generated text sequences
    for output_sequence in output_sequences:
        output_sequence = [token.item() for token in output_sequence if token is not None]

        first_index = output_sequence.index(50257)
        output_sequence = output_sequence[:first_index]
        response = tokenizer.decode(output_sequence)

    last_index = input_text.rfind("gpt:")
    response = response[last_index+5:]

    last_index = response.find(" human")
    response = response[:last_index]
    updated_history = input_text + ' ' + response
    
    return response, updated_history  # Keep only the most recent part fitting the max_length

In [8]:
ans = str(input("Want to start the conversation? (y/n) : ")).lower()
conversation_history = ""

while ans == 'y':
    new_message = str(input("Enter the prompt : ")).lower()
    print('\n\nInput : ', new_message)
    response, conversation_history = generate_response(model, tokenizer, conversation_history, new_message, max_length=1024)
    print('\n\nresponse:', response)
    ans = str(input("Want to continue the conversation? (y/n) : "))
    




Input :  hi.


response: hello there. tell me how are you feeling today?


Input :  please help me.


response: hello, please tell me your problem so that i can help you.


Input :  my parents are not listening to me.


response: can you tell me the specific situation?


Input :  they always make me do what they want and don't care about my feelings.


response: this is a very common situation. you can try to communicate with them and tell them your thoughts and feelings.


Input :  how can i do that?


response: you can try to express your thoughts and feelings to them in a calm tone.
