In [None]:
import shutil
import os
from google.colab import drive

# Mount Drive
drive.mount('/content/drive')

# Paths
local_file_path = "/content/sentences.txt"
drive_folder = "/content/drive/MyDrive/question-answer"
drive_file_path = os.path.join(drive_folder, "sentences.txt")
drive_sentences_path = os.path.join(drive_folder, "sentences.txt")

# Make sure Drive folder exists
os.makedirs(drive_folder, exist_ok=True)

# Copy file to Drive
if os.path.exists(local_file_path):
    shutil.copy(local_file_path, drive_file_path)
    print(f"'sentences.txt' copied to Google Drive at: {drive_file_path}")
else:
    print("'sentences.txt' not found in Colab. Please upload it first.")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
'sentences.txt' copied to Google Drive at: /content/drive/MyDrive/question-answer/sentences.txt


In [None]:
import torch
from transformers import pipeline
from tqdm import tqdm
import os
from google.colab import drive

# 1. 🚗 Mount Google Drive
drive.mount('/content/drive')

# 2. 📂 Define paths
drive_folder = "/content/drive/MyDrive/question-answer"
drive_sentences_path = os.path.join(drive_folder, "sentences.txt")
drive_output_path = os.path.join(drive_folder, "generated_questions.txt")

# 3. ✅ Check if output already exists
if os.path.exists(drive_output_path):
    print(f"✅ 'generated_questions.txt' already exists at: {drive_output_path}")
else:
    print("🚀 File not found. Starting question generation...")

    # 4. ⚡ Setup device and pipeline
    device = 0 if torch.cuda.is_available() else -1
    qg_pipeline = pipeline("text2text-generation", model="valhalla/t5-base-qg-hl", device=device)

    # 5. 📖 Read sentences from Drive
    with open(drive_sentences_path, "r", encoding="utf-8") as file:
        sentences = [line.strip() for line in file if line.strip()]

    # 6. ✍️ Generate questions and save to Drive
    with open(drive_output_path, "w", encoding="utf-8") as out_file:
        for sentence in tqdm(sentences, desc="Generating Questions", unit="sentence"):
            questions = qg_pipeline(sentence, max_length=128, num_return_sequences=1)
            for q in questions:
                out_file.write(q["generated_text"] + "\n")

    print(f"✅ Question generation complete! Saved to: {drive_output_path}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
✅ 'generated_questions.txt' already exists at: /content/drive/MyDrive/question-answer/generated_questions.txt


In [None]:
import os
import numpy as np
from sentence_transformers import SentenceTransformer

# Define paths
drive_folder = "/content/drive/MyDrive/question-answer"
questions_file = os.path.join(drive_folder, "generated_questions.txt")
embeddings_file = os.path.join(drive_folder, "questions_embeddings.npy")
questions_list_file = os.path.join(drive_folder, "questions_list.txt")

# Load the model
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

# Check if embeddings and questions list already exist
if os.path.exists(embeddings_file) and os.path.exists(questions_list_file):
    print("✅ Found existing embeddings and questions list. Loading from Drive...")

    question_embeddings = np.load(embeddings_file)

    with open(questions_list_file, "r", encoding="utf-8") as f:
        questions = [line.strip() for line in f if line.strip()]

else:
    print("🔍 Embeddings not found. Generating from questions...")

    # Load questions
    with open(questions_file, "r", encoding="utf-8") as file:
        questions = [line.strip() for line in file if line.strip()]

    # Encode questions
    question_embeddings = np.array(model.encode(questions)).astype("float32")

    # Save embeddings and questions
    np.save(embeddings_file, question_embeddings)

    with open(questions_list_file, "w", encoding="utf-8") as f:
        f.write("\n".join(questions))

    print(f"✅ Embeddings and questions saved to: {drive_folder}")


✅ Found existing embeddings and questions list. Loading from Drive...


In [None]:
import os
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from google.colab import drive

# 1. 📁 Mount Google Drive
drive.mount('/content/drive')

# 2. 📂 Define paths inside Drive
drive_folder = "/content/drive/MyDrive/question-answer"
embeddings_path = os.path.join(drive_folder, "questions_embeddings.npy")
questions_path = os.path.join(drive_folder, "questions_list.txt")

# 3. 🤖 Load model
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

# 4. 📥 Load embeddings and questions
question_embeddings = np.load(embeddings_path)

with open(questions_path, "r", encoding="utf-8") as f:
    questions = [line.strip() for line in f if line.strip()]

# 5. 🔍 Similarity function
def is_question_clear(query):
    """Check if a given question is clear based on similarity to existing questions."""
    query_embedding = model.encode([query]).astype("float32")
    similarities = cosine_similarity(query_embedding, question_embeddings)[0]
    max_similarity = np.max(similarities)
    return max_similarity >= 0.6, max_similarity

# 6. 💬 Interactive input loop
while True:
    test_question = input("\nEnter your question (or type 'exit' to quit): ").strip()

    if test_question.lower() == "exit":
        print("👋 Exiting...")
        break

    clear, similarity_score = is_question_clear(test_question)

    if clear:
        print(f"The question is clear (Similarity: {similarity_score:.2f})")
    else:
        print(f"The question is unclear (Similarity: {similarity_score:.2f})")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
