In [27]:
import spacy
from collections import Counter
import random

# Load English tokenizer, tagger, parser, NER, and word vectors
nlp = spacy.load("en_core_web_sm")


In [35]:
# Sample text input
text = """

he 2024 ICC Men's T20 World Cup was the ninth edition of the ICC Men's T20 World Cup. It was co-hosted by the West Indies and the United States from June 1 to 29, 2024. This was the second time the West Indies had hosted the tournament, while it marked the first major ICC tournament held in the United States.

The tournament expanded its field from 16 to 20 teams, featuring the two co-hosts, the top eight teams from the 2022 T20 World Cup, and teams determined by the ICC Men's T20I Team Rankings and regional qualifiers. Notably, Canada and Uganda qualified for the first time, while the United States automatically qualified as co-hosts.

In the semi-finals, England, the defending champions, were defeated by India, who went on to win their second T20 World Cup title, by beating South Africa in the final by 7 runs. With this victory, India equaled England and West Indies with the most titles in the T20 World Cup's history.
"""

# Generate MCQs
results = generate_mcqs(text, num_questions=5)


In [37]:
def generate_mcqs(text, num_questions=5):
    # Process the text with spaCy
    doc = nlp(text)

    # Extract sentences from the text
    sentences = [sent.text for sent in doc.sents]

    # Randomly select sentences to form questions
    selected_sentences = random.sample(sentences, min(num_questions, len(sentences)))

    # Initialize list to store generated MCQs
    mcqs = []
    all_nouns = [token.text for token in doc if token.pos_ == "NOUN"]
    
    # Ensure unique distractors
    unique_nouns = list(set(all_nouns)) # Get all unique nouns from the text

    # Generate MCQs for each selected sentence
    for sentence in selected_sentences:
        # Process the sentence with spaCy
        sent_doc = nlp(sentence)

        # Extract entities (nouns) from the sentence
        nouns = [token.text for token in sent_doc if token.pos_ == "NOUN"]

        # Ensure there are enough nouns to generate MCQs
        if len(nouns) < 1:  # Change to ensure at least 1 noun for question
            continue

        # Count the occurrence of each noun
        noun_counts = Counter(nouns)

        # Select the most common noun as the subject of the question
        if noun_counts:
            subject = noun_counts.most_common(1)[0][0]

            # Generate the question stem
            question_stem = sentence.replace(subject, "_")

            # Generate answer choices
            answer_choices = [subject]

            # Ensure unique distractors by avoiding repeats
            distractors = list(set(unique_nouns) - set([subject]))  # Exclude the subject from distractors
            if len(distractors) >= 3:
                answer_choices += random.sample(distractors, 3)  # Ensure 3 unique distractors
            else:
                continue  # Skip if not enough distractors are available

            # Shuffle the answer choices
            random.shuffle(answer_choices)

            # Append the generated MCQ to the list
            correct_answer = chr(64 + answer_choices.index(subject) + 1)  # Convert index to letter
            mcqs.append((question_stem, answer_choices, correct_answer))

            # Stop if we've generated enough MCQs
            if len(mcqs) == num_questions:
                break

    return mcqs


In [39]:
# Print generated MCQs
for i, mcq in enumerate(results, start=1):
    question_stem, answer_choices, correct_answer = mcq
    
    print(f"Q{i}: {question_stem}")
    for j, choice in enumerate(answer_choices, start=1):
        print(f"{chr(64+j)}: {choice}")
    print(f"Correct Answer: {correct_answer}")
    print("_________________________________________________________________________________________________________________________________________")


Q1: With this _, India equaled England and West Indies with the most titles in the T20 World Cup's history.

A: victory
B: field
C: titles
D: hosts
Correct Answer: A
_________________________________________________________________________________________________________________________________________
Q2: In the _-finals, England, the defending champions, were defeated by India, who went on to win their second T20 World Cup title, by beating South Africa in the final by 7 runs.
A: edition
B: tournament
C: Rankings
D: semi
Correct Answer: D
_________________________________________________________________________________________________________________________________________
Q3: This was the second time the West Indies had hosted the _, while it marked the first major ICC _ held in the United States.


A: semi
B: Rankings
C: -
D: tournament
Correct Answer: D
________________________________________________________________________________________________________________________________