In [None]:
from google.colab import drive
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch
# Google Drive
drive.mount('/content/drive')


model_path = "/content/drive/MyDrive/9900_LLM Q&A System/"


model = GPT2LMHeadModel.from_pretrained(model_path)
tokenizer = GPT2Tokenizer.from_pretrained(model_path)
tokenizer.pad_token = tokenizer.eos_token  # fill token

# evaluate pattern
model.eval()
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

print("Model and tokenizer loaded successfully!")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Model and tokenizer loaded successfully!


In [None]:
import torch

def generate_answer(question, max_length=50):
    """Generate concise responses based on input questions"""
    prompt = f"Q: {question}\nA:"

    # Tokenize the input and convert it to a tensor
    inputs = tokenizer(prompt, return_tensors="pt").to(device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_length=max_length,  # Limit the build length
            temperature=0.5,  # Reduce diversity
            top_k=30,  # Limit the scope of candidates
            top_p=0.8,  # Nuclear sampling, reducing redundancy
            no_repeat_ngram_size=2,  # Prevent the generation of duplicates n-gram
            eos_token_id=tokenizer.eos_token_id,
            early_stopping=True  # stop early build
        )


    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)

    answer = answer.split("A:", 1)[-1].strip()
    return answer


import re

def clean_answer(answer):

    cleaned_answer = re.sub(r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}', '', answer)

    cleaned_answer = re.sub(r'\s+', ' ', cleaned_answer).strip()
    return cleaned_answer


In [None]:
import torch
import re

def generate_answer(question, max_length=50):
    """Generate concise responses based on input questions"""
    prompt = f"Q: {question}\nA:"

    # Tokenize the input and convert it to a tensor
    inputs = tokenizer(prompt, return_tensors="pt").to(device)

    try:
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_length=max_length,  # Limit the build length
                temperature=0.5,  # Reduce diversity
                top_k=30,  # Limit the scope of candidates
                top_p=0.8,  # Nuclear sampling, reducing redundancy
                no_repeat_ngram_size=2,  # Prevent the generation of duplicates n-gram
                eos_token_id=tokenizer.eos_token_id,
                early_stopping=True  # stop early build
            )

        answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
        answer = answer.split("A:", 1)[-1].strip()  # Remove prompt prefix
    except Exception as e:
        answer = f"An error occurred: {str(e)}"

    return answer

def clean_answer(answer):
    """Clean up the generated answer by removing unwanted patterns"""
    # Remove datetime patterns (e.g., "2024-11-07 16:11:45")
    cleaned_answer = re.sub(r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}', '', answer)

    # Remove extra spaces and trim the answer
    cleaned_answer = re.sub(r'\s+', ' ', cleaned_answer).strip()

    # Optionally, remove other unwanted characters or patterns if necessary
    return cleaned_answer


In [None]:
# test question
questions = [
    "How IoT improves the efficiency of building management systems",
    "What are the different types of sensors in the Internet of Things?",
    "What is HVAC in the context of a BMS?",
    "What are the advantages of smart building technologies?"
]

# Traverse the question to generate an answer
for question in questions:
    answer = generate_answer(question)
    print(f"Question: {question}\nAnswer: {answer}\n")





Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Question: How IoT improves the efficiency of building management systems
Answer: A BMS optimizes the energy efficiency by managing energy consumption by using IoT devices to optimize energy use. The system can optimize the heating temperature setpoint by adjusting the fan speed in



Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Question: What are the different types of sensors in the Internet of Things?
Answer: Sensor for IoT is a relatively simple way to connect and communicate. It can connect IoT devices via the internet via a network. The Internet is about connecting things,



Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Question: What is HVAC in the context of a BMS?
Answer: Hvac systems are used in a variety of applications, including IoT devices, sensors, and equipment. These applications can be integrated with a wider range of systems

Question: What are the advantages of smart building technologies?
Answer: A BMS can be integrated with existing systems to improve energy efficiency. It can also be used to optimize energy consumption by providing a wider range of communication protocols.



In [None]:
!pip install faiss-cpu # Install faiss for CPU usage

Collecting faiss-cpu
  Downloading faiss_cpu-1.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.5/27.5 MB[0m [31m86.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.9.0


In [None]:
# 准备验证集输入数据
test_inputs = [
    "Example input text 1",
    "Example input text 2",
    # 添加更多输入
]

# 生成预测
predictions = []
for input_text in test_inputs:
    input_ids = tokenizer.encode(input_text, return_tensors='pt')
    output = model.generate(input_ids, max_length=50)
    decoded_output = tokenizer.decode(output[0], skip_special_tokens=True)
    predictions.append(decoded_output)