In [None]:
import nltk
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('popular')

In [1]:
print("hello")

hello


In [2]:
!pip install yake

Defaulting to user installation because normal site-packages is not writeable


In [3]:
import yake

def getImportantWords(art, top_n=25):
    # Create a YAKE keyword extractor
    kw_extractor = yake.KeywordExtractor(lan="en", n=1, top=top_n)  
    # lan="en" = English language
    # n=1 = extract unigrams (single-word keywords, similar to PROPN in pke)

    # Extract keywords
    keywords = kw_extractor.extract_keywords(art)

    # Get just the words, not the scores
    result = [kw for kw, score in keywords]
    
    return result

# Example usage:
text = """
In the heart of the quaint village of Eldermere, a mysterious tree stood tall in the town square. \nIts gnarled branches bore fruits that resembled pears, but with an unusual twist: they seemed to shimmer with a golden hue. \nThe villagers affectionately named it the 'Shakespear' tree, believing it held magical properties.\nLegend had it that anyone who tasted a Shakespear would gain a glimpse into their future. Curiosity\nspread like wildfire, and soon, villagers flocked to the tree, eager for a taste of destiny. Young Emma, a\nspirited girl with dreams of becoming a writer, felt an undeniable pull toward the shimmering fruit.
"""
impWords = getImportantWords(text)
print(impWords)

['Eldermere', 'square', 'tree', 'Shakespear', 'heart', 'quaint', 'village', 'mysterious', 'stood', 'tall', 'town', 'villagers', 'Emma', 'pears', 'twist', 'hue', 'gnarled', 'branches', 'bore', 'resembled', 'unusual', 'shimmer', 'golden', 'believing', 'properties']


In [6]:
#Step 3- Split the whole text article into an array/list of individual sentences. This will help us fetch the sentences related to the keywords easily

from nltk.tokenize import sent_tokenize
def splitTextToSents(art):
    s=[sent_tokenize(art)]
    s=[y for x in s for y in x]
    s=[sent.strip() for sent in s if len(sent)>15] #Removes all the sentences that have length less than 15 so that we can ensure that our questions have enough length for context
    return s
sents=splitTextToSents(text) #Achieve a well splitted set of sentences from the text article
#print(sents)

In [7]:
sents

['In the heart of the quaint village of Eldermere, a mysterious tree stood tall in the town square.',
 'Its gnarled branches bore fruits that resembled pears, but with an unusual twist: they seemed to shimmer with a golden hue.',
 "The villagers affectionately named it the 'Shakespear' tree, believing it held magical properties.",
 'Legend had it that anyone who tasted a Shakespear would gain a glimpse into their future.',
 'Curiosity\nspread like wildfire, and soon, villagers flocked to the tree, eager for a taste of destiny.',
 'Young Emma, a\nspirited girl with dreams of becoming a writer, felt an undeniable pull toward the shimmering fruit.']

In [8]:
#Step 4- Map the sentences which contain the keywords to the related keywords so that we can easily lookup the sentences related to the keywords
!pip install flashtext
from flashtext import KeywordProcessor
def mapSents(impWords,sents):
    processor=KeywordProcessor() #Using keyword processor as our processor for this task
    keySents={}
    for word in impWords:
        keySents[word]=[]
        processor.add_keyword(word) #Adds key word to the processor
    for sent in sents:
        found=processor.extract_keywords(sent) #Extract the keywords in the sentence
        for each in found:
            keySents[each].append(sent) #For each keyword found, map the sentence to the keyword
    for key in keySents.keys():
        temp=keySents[key]
        temp=sorted(temp,key=len,reverse=True) #Sort the sentences according to their decreasing length in order to ensure the quality of question for the MCQ 
        keySents[key]=temp
    return keySents
mappedSents=mapSents(impWords,sents) #Achieve the sentences that contain the keywords and map those sentences to the keywords using this function
#print(mappedSents)

Defaulting to user installation because normal site-packages is not writeable


In [9]:
#Step 5- Get the sense of the word. In order to attain a quality set of distractors we need to get the right sense of the keyword. This is explained in detail in the seperate alogrithm documentation

from pywsd.similarity import max_similarity
from pywsd.lesk import adapted_lesk
from pywsd.lesk import simple_lesk
from pywsd.lesk import cosine_lesk
from nltk.corpus import wordnet as wn
def getWordSense(sent,word):
    word=word.lower() 
    if len(word.split())>0: #Splits the word with underscores(_) instead of spaces if there are multiple words
        word=word.replace(" ","_")
    synsets=wn.synsets(word,'n') #Sysnets from Google are invoked
    if synsets:
        wup=max_similarity(sent,word,'wup',pos='n')
        adapted_lesk_output = adapted_lesk(sent, word, pos='n')
        lowest_index=min(synsets.index(wup),synsets.index(adapted_lesk_output))
        return synsets[lowest_index]
    else:
        return None
#print("fin")

Warming up PyWSD (takes ~10 secs)... took 17.83272933959961 secs.


In [14]:
!pip install rake-nltk
import re
import random
from nltk.corpus import wordnet as wn
from rake_nltk import Rake
import nltk


# Download necessary NLTK data
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('punkt')
nltk.download('rake_nltk')

# Input text
text = """
In the heart of the quaint village of Eldermere, a mysterious tree stood tall in the town square. Its gnarled branches bore fruits that resembled pears, but with an unusual twist: they seemed to shimmer with a golden hue. The villagers affectionately named it the 'Shakespear' tree, believing it held magical properties.
Legend had it that anyone who tasted a Shakespear would gain a glimpse into their future. Curiosity
spread like wildfire, and soon, villagers flocked to the tree, eager for a taste of destiny. Young Emma, a
spirited girl with dreams of becoming a writer, felt an undeniable pull toward the shimmering fruit.
One crisp autumn morning, she approached the tree, heart racing. With a deep breath, she plucked a
Shakespear and took a bite. Instantly, a whirlwind of visions enveloped her. She saw herself standing on a
grand stage, the applause of a thousand voices echoing in her ears. In another glimpse, she wandered
through enchanted forests, her stories coming to life.
Determined to fulfill these dreams, Emma spent every spare moment writing. The villagers, inspired by
her passion, began sharing their own tales. The square buzzed with creativity, and soon, Eldermere
became a hub of storytelling.
As the seasons changed, Emma’s words took flight. She published her first book, a collection of
enchanting stories, and it captured the hearts of many beyond Eldermere. The Shakespear tree
continued to stand, its golden pears glimmering, a reminder that dreams, when nurtured, could blossom
into reality.
And so, in the embrace of magic and creativity, the legacy of the Shakespear lived on, inspiring
generations to reach for their dreams.
"""

# Extract keywords
def extract_keywords(text, top_n=5):
    r = Rake()
    r.extract_keywords_from_text(text)
    phrases = r.get_ranked_phrases()
    return phrases[:top_n]

# Generate distractors using WordNet
def generate_distractors(word):
    distractors = set()
    synsets = wn.synsets(word, pos=wn.NOUN)
    if synsets:
        syn = synsets[0]
        hypernyms = syn.hypernyms()
        for hyper in hypernyms:
            for hyponym in hyper.hyponyms():
                for lemma in hyponym.lemmas():
                    name = lemma.name().replace('_', ' ').capitalize()
                    if name.lower() != word.lower():
                        distractors.add(name)
    return list(distractors)[:3]

# Create an MCQ
def create_mcq(keyword, text):
    # Try to create a question
    question = f"What is special about {keyword} in the story?"
    
    # Generate distractors
    distractors = generate_distractors(keyword)
    
    # If no distractors found, use generic placeholders
    if len(distractors) < 3:
        distractors += ["Magic", "Legend", "Tree"]
        distractors = distractors[:3]
    
    # Prepare options
    options = [keyword] + distractors
    random.shuffle(options)
    
    # Format options
    labels = ['A', 'B', 'C', 'D']
    option_texts = [f"{labels[i]}. {opt}" for i, opt in enumerate(options)]
    
    # Print MCQ
    print(f"Q: {question}")
    for opt in option_texts:
        print(opt)
    print(f"Answer: {keyword}\n")

# Main
keywords = extract_keywords(text)
for kw in keywords:
    # Clean keyword
    clean_kw = kw.split()[0].capitalize()  # use first word of phrase as keyword
    create_mcq(clean_kw, text)


Defaulting to user installation because normal site-packages is not writeable
Collecting rake-nltk
  Downloading rake_nltk-1.0.6-py3-none-any.whl.metadata (6.4 kB)
Downloading rake_nltk-1.0.6-py3-none-any.whl (9.1 kB)
Installing collected packages: rake-nltk
Successfully installed rake-nltk-1.0.6


[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\atulm\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\atulm\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\atulm\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Error loading rake_nltk: Package 'rake_nltk' not found in
[nltk_data]     index


Q: What is special about Emma in the story?
A. Legend
B. Magic
C. Emma
D. Tree
Answer: Emma

Q: What is special about One in the story?
A. Duodecimal digit
B. One
C. Septet
D. 2
Answer: One

Q: What is special about Gnarled in the story?
A. Legend
B. Tree
C. Gnarled
D. Magic
Answer: Gnarled

Q: What is special about Curiosity in the story?
A. Muddiness
B. Curiosity
C. Inwardness
D. Consciousness
Answer: Curiosity

Q: What is special about Mysterious in the story?
A. Mysterious
B. Tree
C. Magic
D. Legend
Answer: Mysterious

