In [1]:
import spacy
import random
from collections import Counter

In [2]:
text='''
Lorem Ipsum is simply dummy text of the printing and typesetting industry. 
Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset 
sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.
'''
num_questions=5

In [3]:
nlp=spacy.load('en_core_web_sm')
# spacy.load('en_core_web_sm')

# Loads a pre-trained English NLP model (in this case, the small model).

# 'en_core_web_sm' means:

# en = English language

# core = core features of spaCy

# web = trained on web data

# sm = small size (faster, but less accurate than md or lg)

In [4]:
text

"\nLorem Ipsum is simply dummy text of the printing and typesetting industry. \nLorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset \nsheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.\n"

In [5]:
doc=nlp(text)
[sent for sent in doc.sents]

# 👉 doc = nlp(text)

# This processes your text using the spaCy pipeline (tokenization, tagging, parsing, etc.).

# doc is a spaCy Doc object that contains the processed text.

# 👉 [sent for sent in doc.sents]

# This is a list comprehension.

# doc.sents is a generator that gives you each sentence (a Span object) in the Doc.

# The code collects each sentence into a Python list.

[
 Lorem Ipsum is simply dummy text of the printing and typesetting industry. ,
 Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book.,
 It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged.,
 It was popularised in the 1960s with the release of Letraset 
 sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.]

In [6]:

len([sent.text for sent in doc.sents])

4

In [7]:
#Extract sentences from the text
sentences=[sent.text for sent in doc.sents]
sentences

['\nLorem Ipsum is simply dummy text of the printing and typesetting industry. \n',
 "Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book.",
 'It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged.',
 'It was popularised in the 1960s with the release of Letraset \nsheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.\n']

In [8]:
selected_sentences=random.sample(sentences,(min(num_questions,len(sentences))))
# 👉 sentences
# A list of sentences (for example, from [sent for sent in doc.sents]).

# 👉 num_questions
# A variable that probably defines how many MCQs (or sentences) you want to select.

# 👉 len(sentences)
# Total number of sentences available.

# 👉 min(num_questions, len(sentences))
# This ensures you don’t try to pick more sentences than exist.
# It chooses the smaller of:

# num_questions (how many you want)

# len(sentences) (how many are available)

# 👉 random.sample(sentences, ...)
# This randomly selects that many sentences from the sentences list without replacement (no duplicates).



In [9]:
selected_sentences

['It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged.',
 "Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book.",
 'It was popularised in the 1960s with the release of Letraset \nsheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.\n',
 '\nLorem Ipsum is simply dummy text of the printing and typesetting industry. \n']

In [10]:
mcq=[]

for sentence in selected_sentences:
    sentence=sentence.lower()
    #process with spacy(sentences)
    sent_doc=nlp(sentence)
    # print(sentence)

    #extract entites(noun) from the sentence
    nouns=[token.text for token in sent_doc if token.pos_=="NOUN"]
    # print(nouns)

    if len(nouns)<2:
        continue

    nouns_counts=Counter(nouns)
    # print(nouns_counts)

    if nouns_counts:
        subject=nouns_counts.most_common(1)[0][0]
        answer_choices=[subject]
        # print(answer_counts)
        question_steam=sentence.replace(subject,"____________")
        # print(question_steam)

    for _ in range(3):
        distractor=random.choice(list(set(nouns)-set([subject])))
        answer_choices.append(distractor)

    random.shuffle(answer_choices)
    print(answer_choices)

    correct_answer=chr(64+ answer_choices.index(subject)+1)
    mcq.append((question_steam,answer_choices,correct_answer))
        # print(distractor)
    

#     👉 mcq = []
# ➡ Creates an empty list that will probably store your generated MCQs later.

# 👉 for sentence in selected_sentences:
# ➡ Loops through each sentence you randomly selected earlier.

# 👉 sent_doc = nlp(sentence)
# ➡ Processes each sentence using the spaCy pipeline (nlp).
# ➡ This gives you sent_doc, a spaCy Doc object where the sentence is tokenized, tagged, etc.

# 👉 print(sentence)
# ➡ Prints the original sentence to the console (for debugging or checking what’s being processed).

# 👉 nouns = [token.text for token in sent_doc if token.pos_ == "NOUN"]
# ➡ List comprehension:

# Loops through each token (word, punctuation, etc.) in sent_doc.

# Checks if the part-of-speech tag (pos_) is "NOUN" (common noun).

# Collects the text of each noun into the nouns list.

# ➡ In short: Extracts all the nouns from the sentence.

# 👉 print(nouns)
# ➡ Prints the list of nouns found in that sentence.


#IN SUMMARY

# 👉 For each selected sentence:
# 1️⃣ Convert to lowercase → This ensures consistent text (e.g., “City” and “city” are treated the same).
# 2️⃣ Process with spaCy (nlp) → Breaks the sentence into tokens and tags their parts of speech.
# 3️⃣ Extract nouns → Creates a list of all the words that are tagged as nouns.
# 4️⃣ Print the noun list → Shows which nouns were found.
# 5️⃣ If fewer than 2 nouns → skip → The sentence is ignored if it doesn’t have enough nouns (likely not useful for an MCQ).
# 6️⃣ Count how often each noun appears → Builds a Counter (a dictionary-like object that counts occurrences of each noun).
# 7️⃣ Print noun counts → Displays how many times each noun appears in the sentence.
    

['leap', 'leap', 'leap', 'centuries']
['galley', 'book', 'printer', 'type']
['passages', 'publishing', 'ipsum', 'release']
['industry', 'text', 'industry', 'industry']


In [11]:
mcq

[('it has survived not only five ____________, but also the leap into electronic typesetting, remaining essentially unchanged.',
  ['leap', 'leap', 'leap', 'centuries'],
  'D'),
 ("lorem ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of ____________ and scrambled it to make a ____________ specimen book.",
  ['galley', 'book', 'printer', 'type'],
  'D'),
 ('it was popularised in the 1960s with the release of letraset \nsheets containing lorem ____________ passages, and more recently with desktop publishing software like aldus pagemaker including versions of lorem ____________.\n',
  ['passages', 'publishing', 'ipsum', 'release'],
  'C'),
 ('\nlorem ipsum is simply dummy ____________ of the printing and typesetting industry. \n',
  ['industry', 'text', 'industry', 'industry'],
  'B')]

In [12]:
import spacy
from collections import Counter
import random

# Load English tokenizer, tagger, parser, NER, and word vectors
nlp = spacy.load("en_core_web_sm")

def generate_mcqs(text, num_questions=5):
    # Process the text with spaCy
    doc = nlp(text)

    # Extract sentences from the text
    sentences = [sent.text for sent in doc.sents]

    # Randomly select sentences to form questions
    selected_sentences = random.sample(sentences, min(num_questions, len(sentences)))

    # Initialize list to store generated MCQs
    mcqs = []

    # Generate MCQs for each selected sentence
    for sentence in selected_sentences:
        # Process the sentence with spaCy
        sent_doc = nlp(sentence)

        # Extract entities (nouns) from the sentence
        nouns = [token.text for token in sent_doc if token.pos_ == "NOUN"]

        # Ensure there are enough nouns to generate MCQs
        if len(nouns) < 2:
            continue

        # Count the occurrence of each noun
        noun_counts = Counter(nouns)

        # Select the most common noun as the subject of the question
        if noun_counts:
            subject = noun_counts.most_common(1)[0][0]

            # Generate the question stem
            question_stem = sentence.replace(subject, "_______")

            # Generate answer choices
            answer_choices = [subject]

            # Add some random words from the text as distractors
            for _ in range(3):
                distractor = random.choice(list(set(nouns) - set([subject])))
                answer_choices.append(distractor)

            # Shuffle the answer choices
            random.shuffle(answer_choices)

            # Append the generated MCQ to the list
            correct_answer = chr(64 + answer_choices.index(subject) + 1)  # Convert index to letter
            mcqs.append((question_stem, answer_choices, correct_answer))

    return mcqs


In [13]:
# Test the function with the provided text
text = """
FastAPI is a modern web framework that is relatively fast and used for building APIs with Python 3.7+ based on standard Python-type hints. FastAPI also assists us in automatically producing documentation for our web service so that other developers can quickly understand how to use it. This documentation simplifies testing web service to understand what data it requires and what it offers. FastAPI has many features like it offers significant speed for development and also reduces human errors in the code. It is easy to learn and is completely production-ready. FastAPI is fully compatible with well-known standards of APIslong the Nile’s shores. This soil was fertile, which means it was good for growing crops. Unlike the Tigris and Euphrates, the Nile River flooded at the same time every year, so farmers could predict when to plant their crops.
"""

results = generate_mcqs(text, num_questions=7)


for i, mcq in enumerate(results,start=1):
    question_stem, answer_choices, correct_answer = mcq
    
    print(f"Q{i}: {question_stem}")
    for j, choice  in enumerate(answer_choices, start=1):
        print(f"{chr(64+j)}: {choice}")
    print(f"Correct Answer: {correct_answer}")

Q1: 
_______ is a modern web framework that is relatively fast and used for building APIs with Python 3.7+ based on standard Python-type hints.
A: APIs
B: web
C: FastAPI
D: type
Correct Answer: C
Q2: FastAPI also assists us in automatically producing _______ for our web service so that other developers can quickly understand how to use it.
A: service
B: web
C: documentation
D: web
Correct Answer: C
Q3: Unlike the Tigris and Euphrates, the Nile River flooded at the same _______ every year, so farmers could predict when to plant their crops.

A: crops
B: crops
C: year
D: time
Correct Answer: D
Q4: FastAPI is fully compatible with well-known _______ of APIslong the Nile’s shores.
A: standards
B: shores
C: shores
D: shores
Correct Answer: A
Q5: This _______ simplifies testing web service to understand what data it requires and what it offers.
A: testing
B: web
C: documentation
D: simplifies
Correct Answer: C
Q6: FastAPI has many _______ like it offers significant speed for development and 

In [None]:
import spacy
from nltk.corpus import wordnet
import random

# Load spaCy English model
nlp = spacy.load("en_core_web_sm")

def generate_mcqs(text, num_mcqs=5):
    doc = nlp(text)
    sentences = [sent.text for sent in doc.sents]
    mcqs = []

    random.shuffle(sentences)  # Shuffle sentences to pick randomly

    for sentence in sentences:
        sent_doc = nlp(sentence.lower())
        # Extract nouns or proper nouns
        candidates = [token.text for token in sent_doc if token.pos_ in ["NOUN", "PROPN"]]

        if not candidates:
            continue

        # Choose a target word (noun/proper noun) to blank
        answer = random.choice(candidates)

        # Formulate question
        question = sentence.replace(answer, "_______", 1)

        # Generate distractors
        distractors = get_distractors(answer)
        
        # If not enough distractors, pad with random general words
        while len(distractors) < 3:
            filler = random.choice(["city", "country", "river", "building", "continent", "company"])
            if filler != answer and filler not in distractors:
                distractors.append(filler)

        # Form options
        options = distractors + [answer]
        random.shuffle(options)

        mcqs.append({
            "question": question,
            "answer": answer,
            "options": options
        })

        if len(mcqs) >= num_mcqs:
            break

    return mcqs

def get_distractors(word):
    distractors = []
    synsets = wordnet.synsets(word)
    if synsets:
        for lemma in synsets[0].lemmas():
            option = lemma.name().replace('_', ' ')
            if option.lower() != word.lower() and option not in distractors:
                distractors.append(option)
            if len(distractors) >= 3:
                break
    return distractors

# --- Example usage ---
input_text = input("Enter your text: ")
mcqs = generate_mcqs(input_text)

# Display MCQs
for i, mcq in enumerate(mcqs, 1):
    print(f"\nQ{i}: {mcq['question']}")
    for opt in mcq['options']:
        print(f"- {opt}")
    print(f"(Correct answer: {mcq['answer']})")
