In [5]:
# Install necessary packages in Google Colab
!pip install transformers
!pip install torch
!pip install rake-nltk
!pip install nltk



In [6]:
# Import the required libraries
import nltk
from transformers import pipeline
from rake_nltk import Rake
from nltk.corpus import wordnet

In [7]:
# Download necessary NLTK data
nltk.download('punkt')
nltk.download('punkt_tab')  # Fix for LookupError
nltk.download('wordnet')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [8]:
# Summarizer
def summarize_text(text):
    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
    summary = summarizer(text, max_length=130, min_length=30, do_sample=False)
    return summary[0]['summary_text']

# Keyword Extractor
def extract_keywords(text):
    r = Rake()
    r.extract_keywords_from_text(text)
    keywords = r.get_ranked_phrases()[:5]  # Return top 5 keywords
    return keywords

# Sentence Tokenizer
def tokenize_sentences(text):
    sentences = nltk.sent_tokenize(text)
    return sentences

# Sentence Mapper
def map_sentences_to_keywords(sentences, keywords):
    mapped = []
    for sentence in sentences:
        for keyword in keywords:
            if keyword.lower() in sentence.lower():
                mapped.append((sentence, keyword))
                break
    return mapped

# Distractor Generator
def generate_distractors(word, n=3):
    distractors = []
    for syn in wordnet.synsets(word):
        for lemma in syn.lemmas():
            if lemma.name().lower() != word.lower():
                distractors.append(lemma.name())
                if len(distractors) == n:
                    return distractors
    if not distractors:
        from random import sample
        all_words = list(set([w for s in wordnet.all_synsets() for w in s.lemma_names()]))
        distractors = sample([w for w in all_words if w.lower() != word.lower()], min(n, len(all_words)))
    return distractors

In [9]:
# Main MCQ Generation Function
def generate_mcqs(text):
    summary = summarize_text(text)
    keywords = extract_keywords(summary)
    sentences = tokenize_sentences(summary)
    mapped = map_sentences_to_keywords(sentences, keywords)

    mcqs = []
    for sentence, keyword in mapped:
        distractors = generate_distractors(keyword)
        if distractors:
            mcqs.append({
                'question': sentence.replace(keyword, "________"),
                'answer': keyword,
                'distractors': distractors
            })
    return mcqs

In [10]:
# Function to display MCQs
def display_mcqs(mcqs):
    if not mcqs:
        print("No MCQs were generated. This might be due to difficulties in finding suitable distractors.")
        return

    for i, mcq in enumerate(mcqs, 1):
        print(f"\nQuestion {i}:")
        print(mcq['question'])
        options = [mcq['answer']] + mcq['distractors']
        for j, option in enumerate(options):
            print(f"{chr(65 + j)}. {option}")
        print(f"Answer: {mcq['answer']}")


In [11]:
# Example usage (input directly in Colab)
text = """
Once upon a time, there was a wood on a hill in the outskirts of the city. There was a small stone hut next to the wood. The hut was surrounded by a gray wooden fence. A couple lived inside this hut.

Men and women had only one wish. They wish to have a beautiful daughter to take care of and raise. They are not sure that they will be wonderful parents.
"""

print("\nGenerating MCQs...\n")
mcqs = generate_mcqs(text)
display_mcqs(mcqs)


Generating MCQs...



Your max_length is set to 130, but your input_length is only 87. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=43)



Question 1:
Once upon a time, there was a ________ on a hill in the outskirts of the city.
A. wood
B. forest
C. woods
D. Natalie_Wood
Answer: wood

Question 2:
The hut was surrounded by a ________.
A. gray wooden fence
B. posthypnotic_amnesia
C. comminute
D. cultural_anthropology
Answer: gray wooden fence

Question 3:
A ________ this hut.
A. couple lived inside
B. English_plantain
C. Upjohn
D. dip_into
Answer: couple lived inside

Question 4:
They wish to have a beautiful daughter to ________ of and raise.
A. take care
B. acidulent
C. autogenic_therapy
D. Sjaelland
Answer: take care
