## Generate Questions from Sentences

In [None]:
from transformers import pipeline
from tqdm import tqdm

qg_pipeline = pipeline("text2text-generation", model="valhalla/t5-base-qg-hl")

input_file = "Data/sentences.txt"
output_file = "Data/generated_questions.txt"

with open(input_file, "r", encoding="utf-8") as file:
    sentences = [line.strip() for line in file if line.strip()]

with open(output_file, "w", encoding="utf-8") as out_file:
    for sentence in tqdm(sentences, desc="Generating Questions", unit="sentence"):
        questions = qg_pipeline(sentence, max_length=128, num_return_sequences=1)
        for q in questions:
            out_file.write(q["generated_text"] + "\n")  # Write each question on a new line

print("Question generation complete! Questions saved in 'Data/generated_questions.txt'.")


## Create Embeddings for Questions

In [None]:
import numpy as np
from sentence_transformers import SentenceTransformer

# Load the model
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

# Load the dataset
questions_file = "Data/generated_questions.txt"
questions = [line.strip() for line in open(questions_file, "r") if line.strip()]

# Encode all questions
question_embeddings = np.array(model.encode(questions)).astype("float32")

# Save embeddings and questions
np.save("Data/questions_embeddings.npy", question_embeddings)
with open("questions_list.txt", "w") as f:
    f.write("\n".join(questions))

print("Embeddings saved successfully to `questions_embeddings.npy` file!")

## Classify Question whether clear or not

In [None]:
import os
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

# 1. 📂 Define local paths
data_folder = "Data"
embeddings_path = os.path.join(data_folder, "questions_embeddings.npy")
questions_path = os.path.join(data_folder, "questions_list.txt")
sentences_path = os.path.join(data_folder, "sentences.txt")
faiss_index_path = os.path.join(data_folder, "sentence_embeddings.index")

# 2. 🤖 Load Sentence-BERT model
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

# 3. 📥 Load question embeddings and questions
question_embeddings = np.load(embeddings_path)

# Load lecture sentences
with open(sentences_path, "r", encoding="utf-8") as f:
    lecture_sentences = [line.strip() for line in f if line.strip()]

# Load FAISS index
faiss_index = faiss.read_index(faiss_index_path)

# 4. 🔍 Similarity classification function
def classify_question(query, threshold=0.60):
    query_embedding = model.encode([query]).astype("float32")
    similarities = cosine_similarity(query_embedding, question_embeddings)[0]
    max_similarity = np.max(similarities)
    is_clear = max_similarity >= threshold
    return ("Clear" if is_clear else "Vague"), max_similarity

student_question = input("Enter your question: ")
question_valid = classify_question(student_question)
print("The question is: ", question_valid)