In [5]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

# Path to the base model



In [8]:
base_model_path = "D:/Llama-3.2-1B-Instruct"  # Replace this with your base model's path

# Path to the fine-tuned LoRA adapters
lora_model_path = "../LoRA/llama_finetuned_resume"

# Load the base model
tokenizer = AutoTokenizer.from_pretrained(base_model_path, trust_remote_code=True)
base_model = AutoModelForCausalLM.from_pretrained(base_model_path, trust_remote_code=True)

# Apply the LoRA adapters to the base model
model = PeftModel.from_pretrained(base_model, lora_model_path)
model = model.to("cuda")
# Model is now ready for inference
print("Model loaded successfully!")



Model loaded successfully!


In [9]:
def generate_chat_response(conversation_history, user_input, max_length=150, temperature=0.7, top_p=0.9):
    """
    Generates a chatbot-style response from the fine-tuned model.

    Args:
        conversation_history (list of dict): List of conversation turns with keys "role" and "content".
        user_input (str): The latest message from the user.
        max_length (int): Maximum length of the response.
        temperature (float): Sampling temperature.
        top_p (float): Nucleus sampling top-p value.

    Returns:
        str: The assistant's response.
    """
    # Add the user's input to the conversation history
    conversation_history.append({"role": "user", "content": user_input})
    
    # Format the conversation into a single prompt
    prompt = "System: You are a helpful assistant.\n"
    for turn in conversation_history:
        prompt += f"{turn['role'].capitalize()}: {turn['content']}\n"
    prompt += "Assistant:"

    # Tokenize the prompt
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    # Generate response
    outputs = model.generate(
        **inputs,
        max_new_tokens=max_length,
        temperature=temperature,
        top_p=top_p,
        do_sample=True,
        eos_token_id=tokenizer.eos_token_id
    )

    # Decode the output and extract the assistant's response
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    assistant_response = response.split("Assistant:")[-1].strip()

    # Add the assistant's response to the conversation history
    conversation_history.append({"role": "assistant", "content": assistant_response})

    return assistant_response, conversation_history


In [10]:
def chatbot():
    print("Welcome to your Resume Chatbot! (type 'exit' to quit)")
    conversation_history = []  # Initialize conversation history
    
    while True:
        # Get user input
        user_input = input("User: ")
        if user_input.lower() == 'exit':
            print("Chatbot: Goodbye!")
            break
        
        # Generate response
        assistant_response, conversation_history = generate_chat_response(conversation_history, user_input)
        print(f"Chatbot: {assistant_response}")
