In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Import necessary libraries
from transformers import AutoTokenizer, AutoModelForCausalLM
import pandas as pd
import torch

# Path to your fine-tuned model
model_path = 'finetuned_model'  # Update to your model path

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path)

# Load the model with FP16 precision and device mapping
# Avoid manually moving the model when using `device_map="auto"`
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    torch_dtype=torch.float16,  # Use FP16 precision
    device_map="auto"           # Automatically distribute across available devices
)

# Confirm device placement
print(f"Model loaded and distributed across devices: {model.hf_device_map}")

# Load the test dataset
test_dataset = pd.read_csv('ritsu_bot/answer_test.csv')  # Update to the path of your test dataset

# Print dataset information
print(f"Test dataset loaded with {len(test_dataset)} entries.")


Loading checkpoint shards: 100%|██████████| 4/4 [00:04<00:00,  1.04s/it]


Model loaded and distributed across devices: {'': 0}
Test dataset loaded with 196 entries.


In [3]:
# Define the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Set a padding token for the tokenizer if not already set
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token  # Use the end-of-sequence token as the padding token

# Function to generate answers using the fine-tuned model
def generate_answer(question):
    inputs = tokenizer(question, return_tensors="pt", truncation=True, padding=True, max_length=512)
    inputs = {key: value.to(device) for key, value in inputs.items()}  # Move inputs to the same device as the model
    
    # Generate the answer using the model
    with torch.no_grad():  # Disable gradients for inference
        outputs = model.generate(
            **inputs,
            max_length=150,         # Adjust the max length as needed
            num_return_sequences=1, # Number of sequences to return
            temperature=0.7,        # Sampling temperature
            top_k=50,               # Top-k sampling
            top_p=0.9,              # Top-p sampling (nucleus sampling)
            do_sample=True,         # Enable sampling
        )

    # Decode the generated text (answer)
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return answer

# Create a list to store the results
results = []

# Generate answers for each question in the test dataset
for index, row in test_dataset.iterrows():
    question = row['Question']
    filename = row['Filename']
    
    # Generate the answer using the fine-tuned model
    answer = generate_answer(question)
    
    # Append the result to the list
    results.append({"Filename": filename, "Question": question, "Answer": answer})

# Convert the results to a DataFrame and save it as a CSV
results_df = pd.DataFrame(results)
results_df.to_csv('finetuned_model_answers_2.csv', index=False)  # Update the path as needed

print("Answers generated and saved to CSV.")


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Settin

Answers generated and saved to CSV.
