In [12]:
# Let's create a function to check if all provided words are included in the text

import random
import spacy
import pyinflect
import nltk
from nltk.corpus import wordnet as wn
from nltk.stem import WordNetLemmatizer
import re

# Ensure the required nltk data is downloaded
# nltk.download('wordnet')
# nltk.download('averaged_perceptron_tagger')

nlp = spacy.load("en_core_web_sm")
lemmatizer = WordNetLemmatizer()

def get_word_forms_(word):
    words = classify_words(word)

    return words

def classify_words(word):
    noun_forms = None
    verb_forms = None
    adjective_forms = None
    adverb_forms = None

    if is_adjective(word):
        adjective_forms = get_list_forms(word, 'JJ')
    
    if is_adverb(word):
        adverb_forms = get_list_forms(word, 'RB')

    if is_noun(word):
        noun_forms = get_list_noun_forms(word)
    
    if is_verb(word):
        verb_forms = get_list_verb_forms(word)

    return noun_forms, verb_forms, adjective_forms, adverb_forms

def is_noun(word):
    # Check if the word has any noun synsets in WordNet
    return any(ss.pos() == 'n' for ss in wn.synsets(word))

def is_verb(word):
    # Check if the word has any verb synsets in WordNet
    return any(ss.pos() == 'v' for ss in wn.synsets(word))

def is_adjective(word):
    # Check if the word has any adjective synsets in WordNet
    return any(ss.pos() == 'a' or ss.pos() == 's' for ss in wn.synsets(word))

def is_adverb(word):
    # Check if the word has any adverb synsets in WordNet
    return any(ss.pos() == 'r' for ss in wn.synsets(word))


def get_list_forms(word, pos):
    doc = nlp(word)
    token = doc[0]

    base_form = token.text
    comparative_form = token._.inflect(f'JJR' if pos == 'JJ' else 'RBR') if token._.inflect(f'JJR' if pos == 'JJ' else 'RBR') else word
    superlative_form = token._.inflect(f'JJS' if pos == 'JJ' else 'RBS') if token._.inflect(f'JJS' if pos == 'JJ' else 'RBS') else word
    
    return base_form, comparative_form, superlative_form
    

def get_list_noun_forms(word):
    doc = nlp(word)
    token = doc[0]

    singular_form = token._.inflect('NN') if token._.inflect('NN') else word
    plural_form = token._.inflect('NNS') if token._.inflect('NNS') else word
    
    return singular_form, plural_form
    
def get_list_verb_forms(word):
    if word.lower() == "be":
        return 'be', 'was', 'were', 'being', 'been', 'am', 'is', 'are'
        
    base_form = lemmatizer.lemmatize(word, 'v')
    
    return base_form,conjugate_verb(base_form, 'VBD'),conjugate_verb(base_form, 'VBG'),conjugate_verb(base_form, 'VBN'),base_form, conjugate_verb(base_form, 'VBZ')

def get_list_idiom_forms(word):
    
    first_word = word.split()[0]
    rest_of_words = ' '.join(word.split()[1:])

    if first_word.lower() == "be":
        return 'be'+' '+ rest_of_words, 'was'+' '+ rest_of_words, 'were'+' '+ rest_of_words, 'being'+' '+ rest_of_words, 'been'+' '+ rest_of_words, 'am'+' '+ rest_of_words, 'is'+' '+ rest_of_words, 'are'+' '+ rest_of_words

    base_form = lemmatizer.lemmatize(first_word, 'v')
    return base_form +' '+ rest_of_words,conjugate_verb(base_form, 'VBD')+' '+ rest_of_words,conjugate_verb(base_form, 'VBG')+' '+ rest_of_words,conjugate_verb(base_form, 'VBN')+' '+ rest_of_words,base_form+' '+ rest_of_words, conjugate_verb(base_form, 'VBZ')+' '+ rest_of_words

def conjugate_verb(base, tense):
    doc = nlp(base)
    token = doc[0]
    if tense == 'VBD':
        return token._.inflect("VBD")
    elif tense == 'VBG':
        return token._.inflect("VBG")
    elif tense == 'VBN':
        return token._.inflect("VBN")
    elif tense == 'VBZ':
        return token._.inflect("VBZ")
    return base


def convert_to_unique_list(input_str):
    """
    Converts a string representation of a list of tuples to a list of unique values,
    removing any 'None' values.
    
    Args:
        input_str (str): The input string to be processed.
    
    Returns:
        list: A list of unique values.
    """
    # Remove the parentheses and quotes
    cleaned_str = re.sub(r'[()"]', '', input_str)
    # Split the string into a list of tuples
    tuples = [tuple(x.strip().split(', ')) for x in cleaned_str.split('), (')]
    # print("tuples1: ", tuples)
    
    # Flatten the list of tuples and remove duplicates
    result = list(set([item for tup in tuples for item in tup]))
    
    # Remove any 'None' values
    result = [x for x in result if x != 'None']

    result = [item.strip("'") for item in result]

    return result


def string_to_random_list(input_string):
    """
    Takes a comma-separated string as input and returns a list of strings in random order.
    """
    word_list = [word.strip() for word in input_string.split(',')]
    random.shuffle(word_list)
    return list(set(word_list))

def check_words_in_text(text, words):
    if type(words) is not list:
        words = string_to_random_list(words)
        
    # Convert both text and words to lowercase
    text = text.lower()
    words = [word.lower() for word in words]    
    
    # missing_words = [word for word in words if word not in text]
    
    for word in words :
        word_forms = get_word_forms_(word) # from word
        word_forms_list = convert_to_unique_list(str(word_forms))
        if len(word_forms_list) == 0:
            idioms_forms = None
            word_forms_list.append(word)
            if is_verb(word.split()[0]): 
                idioms_forms = list(get_list_idiom_forms(word))
                for idiom in idioms_forms:
                    word_forms_list.append(idiom)
                word_forms_list= list(set(word_forms_list))
        word_flag = False
        for each_word in word_forms_list:
            if each_word in text:
                word_flag = True
        if word_flag == True: pass
        elif word_flag == False:
            missing_words.append(word) 

    return missing_words


words_list = None
story_text = ""
missing_words = []

# Provided words list
words_list = ['major/minor', 'office hours', 'graduates/post-graduates', 'participation', 'course outline/syllabus', 'getting a raise', 'temporary job', 'conditional offer', 'certificate programs', 'take-home exam', 'make-up exam', 'admissions status - regular/transfer/visitor/no credit', 'elective', 'open-book exam', 'restriction (against a course)', 'withdrawal', 'regular session/semester', 'independent study', 'permanent position', 'fees', 'extra-curricular activities', 'drop and add', 'tuition', 'job qualification', 'academic probation', 'upper-division/lower-division', 'summer session/semester', 'quarter system', 'summer job/position', 'non-credit course', 'office clerk', 'academic advisor/counselor', 'pay check', 'placement test', 'career day/job fair', 'academic session', 'fast-track class', 'academic tutor', 'honors program', 'department', 'citation', 'faculty', 'acceptance/admission', 'accreditation', 'grade point average (gpa)', 'being on contract', 'disenrollment/drop-out', 'degree requirements/graduation requirements', 'audit']

# Given story text
story_text = """
Scene 1: Emily meets Dr. Smith for Academic Advising

Emily: Hi Dr. Smith, I’m Emily. I’m here for advice on my major and minor choices.

Dr. Smith: Hi Emily. Have you decided on your major yet?

Emily: I’m leaning towards a major in Psychology and a minor in Sociology.

Dr. Smith: That sounds like a solid combination. Make sure to check the course outline and syllabus for each class. Some courses might have restrictions.

Emily: I will. I also have a question about office hours. When are yours?

Dr. Smith: My office hours are posted outside my office, but generally, they are Monday and Wednesday afternoons.

Emily: Great, I’ll make sure to visit if I have more questions. By the way, what’s the admissions status of a transfer student?

Dr. Smith: A transfer student is someone who comes from another institution with some credits already completed. They need to check if those credits meet our degree requirements.

Scene 2: John, a Recent Graduate, Talks about His Experience

Emily: Hi John, I heard you just graduated. Congratulations!

John: Thanks, Emily! It feels great to finally be done.

Emily: Can you tell me about your experience with the graduation requirements?

John: Sure. Meeting the degree requirements was tough. I had to maintain a good grade point average (GPA) and complete both upper-division and lower-division courses.

Emily: Did you take any certificate programs?

John: Yes, I took a couple of certificate programs in addition to my major. It added some fees, but it was worth it.

Emily: What was the most challenging part?

John: For me, the open-book exams and take-home exams were challenging because they required a lot of independent study. But participation in extra-curricular activities helped balance things out.

Scene 3: Lisa, the Part-time Office Clerk, Balances Work and Study

Emily: Hi Lisa, how do you manage your job as an office clerk with your studies?

Lisa: Hi Emily. It’s all about time management. I work part-time and try to schedule my classes in the morning.

Emily: Are you planning to get a permanent position after you graduate?

Lisa: Yes, I’m hoping to. Right now, I’m on a temporary job contract, but I’m gaining valuable experience.

Emily: What about getting a raise? Is that possible in your current role?

Lisa: It is, but it depends on my performance during the probationary period and the qualifications I bring to the job.

Scene 4: Discussion about Exams and Academic Policies

Emily: Dr. Smith, I have a question about make-up exams. What if I miss an exam?

Dr. Smith: If you miss an exam, you need to notify the faculty as soon as possible. We can arrange a make-up exam under certain conditions.

Emily: What about if I want to audit a class?

Dr. Smith: Auditing a class means you attend it without receiving credit. It’s a good option if you’re interested in the subject but don’t need the credit.

Emily: I’m also considering a fast-track class during the summer session. Do you think that’s a good idea?

Dr. Smith: Fast-track classes are intensive, but they can help you catch up or get ahead. Just make sure it fits into your academic session plan.

Scene 5: Career Day and Job Fair Preparation

Emily: Lisa, are you going to the career day and job fair?

Lisa: Yes, I am. It’s a great opportunity for campus recruitment. Plus, they often have mock interviews which are really helpful.

Emily: That sounds useful. I’m also looking for a summer job or position related to my major.

Lisa: Definitely attend. They have various employers looking for both temporary and permanent positions.

Scene 6: Discussing Academic Challenges

Emily: Dr. Smith, what happens if someone is on academic probation?

Dr. Smith: If a student’s GPA falls below the required level, they are placed on academic probation. They must improve their grades in the following semester to avoid disenrollment or drop-out.

Emily: That sounds serious. I’ll make sure to keep my grades up.

Dr. Smith: Good idea. Remember, you can always seek help from an academic tutor or counselor if you need it.
"""


# story_text = "The quick brown fox jumps over the lazy dog. It give off, are made up of, **sprang up** and **comprised**"
# words_list = "Spring up, Comprise, Flight, Cat, Dog, give off, be made up of"


# Check for missing words
missing_words = check_words_in_text(story_text, words_list)
print(missing_words)
print(len(missing_words))


['major/minor', 'graduates/post-graduates', 'course outline/syllabus', 'conditional offer', 'admissions status - regular/transfer/visitor/no credit', 'elective', 'restriction (against a course)', 'withdrawal', 'regular session/semester', 'drop and add', 'tuition', 'job qualification', 'upper-division/lower-division', 'summer session/semester', 'quarter system', 'summer job/position', 'non-credit course', 'academic advisor/counselor', 'pay check', 'placement test', 'career day/job fair', 'honors program', 'department', 'citation', 'acceptance/admission', 'accreditation', 'being on contract', 'disenrollment/drop-out', 'degree requirements/graduation requirements']
29


In [6]:
word='be made up of'
word_forms_list=[]
# if len(word_forms_list) ==0:
#     word_forms_list.append(word)
first_word = word.split()[0]
rest_of_words = ' '.join(word.split()[1:])
print(rest_of_words)
print(first_word + ' '+ rest_of_words)

if len(word_forms_list) == 0:
    idioms_forms = None
    word_forms_list.append(word)
    if is_verb(word.split()[0]): 
        idioms_forms = list(get_list_idiom_forms(word))
        for idiom in idioms_forms:
            word_forms_list.append(idiom)
        word_forms_list= list(set(word_forms_list))

print(word_forms_list)

made up of
be made up of
['is made up of', 'being made up of', 'was made up of', 'be made up of', 'been made up of']


In [3]:
# Let's create a function to check if all provided words are included in the text

import random


def string_to_random_list(input_string):
    """
    Takes a comma-separated string as input and returns a list of strings in random order.
    """
    word_list = [word.strip() for word in input_string.split(',')]
    random.shuffle(word_list)
    return list(set(word_list))

def check_words_in_text(text, words):
    if type(words) is not list:
        words = string_to_random_list(words)
        
    # Convert both text and words to lowercase
    text = text.lower()
    words = [word.lower() for word in words]    
    
    missing_words = [word for word in words if word not in text]

    return missing_words


words_list = None
story_text = ""
missing_words = []

# Provided words list
words_list = ['hairline', 'likely', 'be made up of', 'loyalty', 'tremulous', 'make attractive', 'intentionally', 'incitement', 'sympathize', 'calculatedly', 'diminutive', 'trembling', 'come before', 'irremediable', 'appoint', 'distribute', 'keep in check', 'snake', 'presume', 'primarily', 'accept as true', 'eccentricity', 'rule out', 'allegiance', 'coarse', 'inducement', 'accustomed', 'yearn', 'shrewd', 'be composed of', 'aberration', 'ornament', 'meander', 'rebellion', 'incident', 'come out', 'terminate', 'specify', 'pine', 'feel compassion', 'idiosyncrasy', 'devise', 'revolt', '(sudden) advance', 'trace', 'sole', 'relinquish', 'departure', 'solitary', 'wind', 'divergence', 'trustworthy', 'irreversible']




# Given story text
story_text = """
### The Evolution of Robotics and Artificial Intelligence

The field of robotics and artificial intelligence (AI) has experienced a **sudden advance** over the past few decades. This **phenomenon** can be **traced** back to numerous incremental improvements, **primarily** driven by innovations in computing power and algorithmic efficiency. Modern robots **are made up of** sophisticated sensors and AI systems that enable them to perform tasks once thought to be the **sole** domain of humans.

### Design and Structure

Robots **be composed of** various components, each playing a crucial role in their functionality. These components include **hairline** precise circuits and **diminutive** yet powerful processors that **snake** through their bodies, enabling **shrewd** decision-making processes. The design is often **ornamented** to **make attractive** their appearance, making them more appealing to users.

### Ethical and Social Implications

The deployment of AI and robotics is not without its ethical dilemmas. One must **keep in check** the potential for misuse, which could lead to **irremediable** harm. Developers must **intentionally** design systems with ethical considerations, **calculatingly** weighing the benefits against the risks. For instance, AI systems must **rule out** biases and ensure fairness to garner public **trustworthy**.

The relationship between humans and robots also **comes with** its peculiar **idiosyncrasies**. For example, there is a **yearning** among some for robots that can **sympathize** and understand human emotions. This desire highlights the **eccentricity** of human-robot interactions, where people often project their **loyalty** and **allegiance** onto machines.

### Challenges and Adaptations

One of the major challenges in the field is the potential for societal **rebellion** against widespread automation. This **rebellion** could be seen as an **aberration**, a **divergence** from the usual acceptance of technology. Incidents of **revolt** against robots, fueled by fears of job displacement, are **likely** to increase. It is essential to **devise** strategies to **distribute** the benefits of robotics evenly and ensure that the workforce is **accustomed** to new roles.

Moreover, developers must be prepared to address any **incitement** to fear through transparent communication and ethical practices. As robots **come out** of the laboratories and into everyday life, there will be a period of adjustment, where old roles are **terminated** and new ones are created.

### Future Prospects

Looking forward, the future of robotics and AI appears **irreversible** in its trajectory. As technology continues to advance, robots will become more integrated into daily life. The **calculated** steps taken now will shape how society adapts to these changes. The goal is to create a harmonious coexistence where robots are seen not as **coarse** intruders but as **trustworthy** companions.

In conclusion, the evolution of robotics and AI is a complex journey marked by innovation, ethical considerations, and societal impacts. By understanding and addressing the challenges, we can ensure a future where technology enhances human life while maintaining the delicate balance of our social fabric.

"""


# story_text = "The quick brown fox jumps over the lazy dog. It give off, **sprang up** and **comprised**"
# words_list = "Spring up, Comprise, Flight, Cat, Dog, give off"

# Check for missing words
missing_words = check_words_in_text(story_text, words_list)
print(missing_words)


['be made up of', 'tremulous', 'calculatedly', 'trembling', 'come before', 'appoint', 'presume', 'accept as true', 'inducement', 'meander', 'specify', 'pine', 'feel compassion', 'idiosyncrasy', '(sudden) advance', 'relinquish', 'departure', 'solitary', 'wind']


In [6]:
# Let's create a function to check if all provided words are included in the text

import random


def string_to_random_list(input_string):
    """
    Takes a comma-separated string as input and returns a list of strings in random order.
    """
    word_list = [word.strip() for word in input_string.split(',')]
    random.shuffle(word_list)
    return list(set(word_list))

def check_words_in_text(text, words):
    if type(words) is not list:
        words = string_to_random_list(words)

    if type(text) is list:
        text = ' '.join(text)
    print(text)
    # Convert both text and words to lowercase
    text = text.lower()
    words = [word.lower() for word in words]    
    
    missing_words = [word for word in words if word not in text]

    return missing_words


words_list = None
story_text = ""
missing_words = []

# Provided words list
words_list = ['tremulous', 'calculatedly', 'trembling', 'come before', 'appoint', 'presume', 'accept as true', 'inducement', 'meander', 'specify', 'pine', 'feel compassion', '(sudden) advance', 'relinquish', 'departure', 'solitary', 'wind']

print(len(words_list))

# Given story text
story_text = ['be made up of', 'tremulous', 'calculatedly', 'trembling', 'come before', 'appoint', 'presume', 'accept as true', 'inducement', 'meander', 'specify', 'pine', 'feel compassion', 'idiosyncrasy', '(sudden) advance', 'relinquish', 'departure', 'solitary', 'wind']

print(len(story_text))
# story_text = "The quick brown fox jumps over the lazy dog. It **sprang up** and **comprised**"
# words_list = "Spring up, Comprise, Flight, Cat, Dog"

# Check for missing words
missing_words = check_words_in_text(story_text, words_list)
print(missing_words)


17
19
be made up of tremulous calculatedly trembling come before appoint presume accept as true inducement meander specify pine feel compassion idiosyncrasy (sudden) advance relinquish departure solitary wind
[]


In [5]:
import random

def string_to_random_list(input_string):
    """
    Takes a comma-separated string as input and returns a list of strings in random order.
    """
    word_list = [word.strip() for word in input_string.split(',')]
    random.shuffle(word_list)
    return list(set(word_list))

def get_count(input_list):
    return len(input_list)

def get_count_comma(input_string):
    return input_string.count(",")

input_string =''
output_list = []

input_string = 'plausible, believable, probable, convert, alter, counterpart, complement, equivalent, nonetheless, pronounced, striking, marked, noticeable, distinct, celebrated, renowned, well-known, exceedingly, excessively, chancy, perilous, upright, up-and-down, erect, persist, endure, remain, extent, discern, discerning, astute, perceptive, substitutive, substitute, scheme, refine, valuable, dear, priceless, precise, subtle, hardly perceived, slight, conducive to, favorable to, helpful for, beneficial to, preoccupied with, absorbed in, immersed in, swift, fleet, menace, threaten, intimidate, frighten, akin to, consume, belittle, disregard, underestimate, commend, admire, praise, compliment, dictate, sue, take a strong legal action against, charge, novelty, fragmentation, destruction, disintegration, disruption, principally, primarily, chiefly, sufficient, adequate, ample, oddly, exceptionally, atypically, bar, obstruction, immense, colossal, reproduce, duplicate, imitate, breed, multiply, propagate, allow, enable, permit, let, penetrate, pierce, go through, device, apparatus, appliance, divergent, varying, dissimilar, flaw, defect, fault, blemish, buffer, cushion, refute, disprove, rebut, prove false, household, domesticate, tame, train, on the spur of the moment, without planning, on impulse, temperate, moderate, mild, clement, consistent with, in agreement with, congruent with, splendor, magnificence, grandeur, majesty, succeed, flourish, brew, loom, be on the way, persevere, carry on, persist, preserve, bias, prejudice, administer, conduct, groundless, unfounded, baseless, arouse, stimulate, provoke, incite, awake, engross, engage, virtual, unexplored, uncovered, tuned to, in agreement with'

output_list = string_to_random_list(input_string)
print(output_list)
print(get_count_comma(input_string))
print(get_count(output_list))

['immense', 'disprove', 'atypically', 'chancy', 'primarily', 'preserve', 'valuable', 'perilous', 'discerning', 'praise', 'magnificence', 'tame', 'immersed in', 'sufficient', 'clement', 'moderate', 'refine', 'pronounced', 'extent', 'admire', 'allow', 'let', 'sue', 'oddly', 'apparatus', 'reproduce', 'breed', 'household', 'bias', 'frighten', 'distinct', 'scheme', 'remain', 'perceptive', 'helpful for', 'without planning', 'brew', 'novelty', 'probable', 'persist', 'flourish', 'uncovered', 'excessively', 'train', 'incite', 'in agreement with', 'commend', 'be on the way', 'precise', 'disintegration', 'colossal', 'congruent with', 'appliance', 'marked', 'bar', 'virtual', 'propagate', 'multiply', 'akin to', 'loom', 'chiefly', 'device', 'menace', 'compliment', 'favorable to', 'complement', 'disruption', 'equivalent', 'fleet', 'conducive to', 'believable', 'on impulse', 'succeed', 'exceedingly', 'fault', 'groundless', 'prove false', 'arouse', 'striking', 'upright', 'beneficial to', 'disregard', '

In [5]:
import spacy
import pyinflect
import nltk
from nltk.corpus import wordnet as wn
from nltk.stem import WordNetLemmatizer
import re

# Ensure the required nltk data is downloaded
# nltk.download('wordnet')
# nltk.download('averaged_perceptron_tagger')

nlp = spacy.load("en_core_web_sm")
lemmatizer = WordNetLemmatizer()

def get_word_forms_(word):
    words = classify_words(word)
    return words

def get_word_forms(word):
    noun_forms = None
    verb_forms = None
    adjective_forms = None
    adverb_forms = None

    if is_adjective(word):
        adjective_forms = get_forms(word, 'JJ')
    
    if is_adverb(word):
        adverb_forms = get_forms(word, 'RB')

    if is_noun(word):
        noun_forms = get_noun_forms(word)
    
    if is_verb(word):
        verb_forms = get_verb_forms(word)

    return {
        'noun_forms': noun_forms,
        'verb_forms': verb_forms,
        'adjective_forms': adjective_forms,
        'adverb_forms': adverb_forms
    }


def classify_words(word):
    noun_forms = None
    verb_forms = None
    adjective_forms = None
    adverb_forms = None

    if is_adjective(word):
        adjective_forms = get_list_forms(word, 'JJ')
    
    if is_adverb(word):
        adverb_forms = get_list_forms(word, 'RB')

    if is_noun(word):
        noun_forms = get_list_noun_forms(word)
    
    if is_verb(word):
        verb_forms = get_list_verb_forms(word)

    return noun_forms, verb_forms, adjective_forms, adverb_forms

def is_noun(word):
    # Check if the word has any noun synsets in WordNet
    return any(ss.pos() == 'n' for ss in wn.synsets(word))

def is_verb(word):
    # Check if the word has any verb synsets in WordNet
    return any(ss.pos() == 'v' for ss in wn.synsets(word))

def is_adjective(word):
    # Check if the word has any adjective synsets in WordNet
    return any(ss.pos() == 'a' or ss.pos() == 's' for ss in wn.synsets(word))

def is_adverb(word):
    # Check if the word has any adverb synsets in WordNet
    return any(ss.pos() == 'r' for ss in wn.synsets(word))


def get_list_forms(word, pos):
    doc = nlp(word)
    token = doc[0]

    base_form = token.text
    comparative_form = token._.inflect(f'JJR' if pos == 'JJ' else 'RBR') if token._.inflect(f'JJR' if pos == 'JJ' else 'RBR') else word
    superlative_form = token._.inflect(f'JJS' if pos == 'JJ' else 'RBS') if token._.inflect(f'JJS' if pos == 'JJ' else 'RBS') else word
    
    return base_form, comparative_form, superlative_form
    

def get_list_noun_forms(word):
    doc = nlp(word)
    token = doc[0]

    singular_form = token._.inflect('NN') if token._.inflect('NN') else word
    plural_form = token._.inflect('NNS') if token._.inflect('NNS') else word
    
    return singular_form, plural_form
    

def get_list_verb_forms(word):
    base_form = lemmatizer.lemmatize(word, 'v')
    
    return base_form,conjugate_verb(base_form, 'VBD'),conjugate_verb(base_form, 'VBG'),conjugate_verb(base_form, 'VBN'),base_form, conjugate_verb(base_form, 'VBZ')
    



def get_forms(word, pos):
    doc = nlp(word)
    token = doc[0]

    base_form = token.text
    comparative_form = token._.inflect(f'JJR' if pos == 'JJ' else 'RBR') if token._.inflect(f'JJR' if pos == 'JJ' else 'RBR') else word
    superlative_form = token._.inflect(f'JJS' if pos == 'JJ' else 'RBS') if token._.inflect(f'JJS' if pos == 'JJ' else 'RBS') else word
    
    return {
        'base_form': base_form,
        'comparative_form': comparative_form,
        'superlative_form': superlative_form
    }

def get_noun_forms(word):
    doc = nlp(word)
    token = doc[0]

    singular_form = token._.inflect('NN') if token._.inflect('NN') else word
    plural_form = token._.inflect('NNS') if token._.inflect('NNS') else word
    
    return {
        'singular_form': singular_form,
        'plural_form': plural_form
    }

def get_verb_forms(word):
    base_form = lemmatizer.lemmatize(word, 'v')
    
    return {
        'base_form': base_form,
        'past_tense': conjugate_verb(base_form, 'VBD'),
        'gerund_or_present_participle': conjugate_verb(base_form, 'VBG'),
        'past_participle': conjugate_verb(base_form, 'VBN'),
        'non_3rd_person_singular_present': base_form,
        '3rd_person_singular_present': conjugate_verb(base_form, 'VBZ'),
    }

def conjugate_verb(base, tense):
    doc = nlp(base)
    token = doc[0]
    if tense == 'VBD':
        return token._.inflect("VBD")
    elif tense == 'VBG':
        return token._.inflect("VBG")
    elif tense == 'VBN':
        return token._.inflect("VBN")
    elif tense == 'VBZ':
        return token._.inflect("VBZ")
    return base




def convert_to_unique_list(input_str):
    """
    Converts a string representation of a list of tuples to a list of unique values,
    removing any 'None' values.
    
    Args:
        input_str (str): The input string to be processed.
    
    Returns:
        list: A list of unique values.
    """
    # Remove the parentheses and quotes
    cleaned_str = re.sub(r'[()"]', '', input_str)
    # Split the string into a list of tuples
    tuples = [tuple(x.strip().split(', ')) for x in cleaned_str.split('), (')]
    # print("tuples1: ", tuples)
    
    # Flatten the list of tuples and remove duplicates
    result = list(set([item for tup in tuples for item in tup]))
    
    # Remove any 'None' values
    result = [x for x in result if x != 'None']

    result = [item.strip("'") for item in result]

    return result



# Example usage
word = "be"
# word_forms = get_word_forms(word)
word_forms = get_word_forms_(word)
print(word_forms)
print(convert_to_unique_list(str(word_forms)))


(('be', 'be'), ('be', 'was', 'being', 'been', 'be', 'is'), None, None)
['being', 'was', 'is', 'been', 'be']


In [1]:
import random
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

def string_to_random_list(input_string):
    """
    Takes a comma-separated string as input and returns a list of strings in random order.
    """
    word_list = [word.strip() for word in input_string.split(',')]
    random.shuffle(word_list)
    return list(set(word_list))

def check_words_in_text(text, words):
    if type(words) is not list:
        words = [word.strip() for word in words.split(',')]
    
    # Convert text to lowercase
    text_words = [word.lower() for word in text.split()]
    
    missing_words = []
    for word in words:
        if word not in text_words:
            missing_words.append(word)
    
    return missing_words

# Example usage
story_text = "The quick brown fox jumps over the lazy dog. It **sprang up** and **comprised**. **oddities**"
words_list = "Spring up, Comprise, Flight, Cat, Dog, oddity"

missing_words = check_words_in_text(story_text, words_list)
print("result: ", missing_words)



result:  ['Spring up', 'Comprise', 'Flight', 'Cat', 'Dog', 'oddity']


In [3]:
# Let's create a function to check if all provided words are included in the text

import random
import spacy
import pyinflect
import nltk
from nltk.corpus import wordnet as wn
from nltk.stem import WordNetLemmatizer
import re

# Ensure the required nltk data is downloaded
# nltk.download('wordnet')
# nltk.download('averaged_perceptron_tagger')

nlp = spacy.load("en_core_web_sm")
lemmatizer = WordNetLemmatizer()

def get_word_forms_(word):
    words = classify_words(word)

    return words

def classify_words(word):
    noun_forms = None
    verb_forms = None
    adjective_forms = None
    adverb_forms = None

    if is_adjective(word):
        adjective_forms = get_list_forms(word, 'JJ')
    
    if is_adverb(word):
        adverb_forms = get_list_forms(word, 'RB')

    if is_noun(word):
        noun_forms = get_list_noun_forms(word)
    
    if is_verb(word):
        verb_forms = get_list_verb_forms(word)

    return noun_forms, verb_forms, adjective_forms, adverb_forms

def is_noun(word):
    # Check if the word has any noun synsets in WordNet
    return any(ss.pos() == 'n' for ss in wn.synsets(word))

def is_verb(word):
    # Check if the word has any verb synsets in WordNet
    return any(ss.pos() == 'v' for ss in wn.synsets(word))

def is_adjective(word):
    # Check if the word has any adjective synsets in WordNet
    return any(ss.pos() == 'a' or ss.pos() == 's' for ss in wn.synsets(word))

def is_adverb(word):
    # Check if the word has any adverb synsets in WordNet
    return any(ss.pos() == 'r' for ss in wn.synsets(word))


def get_list_forms(word, pos):
    doc = nlp(word)
    token = doc[0]

    base_form = token.text
    comparative_form = token._.inflect(f'JJR' if pos == 'JJ' else 'RBR') if token._.inflect(f'JJR' if pos == 'JJ' else 'RBR') else word
    superlative_form = token._.inflect(f'JJS' if pos == 'JJ' else 'RBS') if token._.inflect(f'JJS' if pos == 'JJ' else 'RBS') else word
    
    return base_form, comparative_form, superlative_form
    

def get_list_noun_forms(word):
    doc = nlp(word)
    token = doc[0]

    singular_form = token._.inflect('NN') if token._.inflect('NN') else word
    plural_form = token._.inflect('NNS') if token._.inflect('NNS') else word
    
    return singular_form, plural_form
    
def get_list_verb_forms(word):
    if word.lower() == "be":
        return 'be', 'was', 'were', 'being', 'been', 'am', 'is', 'are'
        
    base_form = lemmatizer.lemmatize(word, 'v')
    
    return base_form,conjugate_verb(base_form, 'VBD'),conjugate_verb(base_form, 'VBG'),conjugate_verb(base_form, 'VBN'),base_form, conjugate_verb(base_form, 'VBZ')

def get_list_idiom_forms(word):
    
    first_word = word.split()[0]
    rest_of_words = ' '.join(word.split()[1:])

    if first_word.lower() == "be":
        return 'be'+' '+ rest_of_words, 'was'+' '+ rest_of_words, 'were'+' '+ rest_of_words, 'being'+' '+ rest_of_words, 'been'+' '+ rest_of_words, 'am'+' '+ rest_of_words, 'is'+' '+ rest_of_words, 'are'+' '+ rest_of_words

    base_form = lemmatizer.lemmatize(first_word, 'v')
    return base_form +' '+ rest_of_words,conjugate_verb(base_form, 'VBD')+' '+ rest_of_words,conjugate_verb(base_form, 'VBG')+' '+ rest_of_words,conjugate_verb(base_form, 'VBN')+' '+ rest_of_words,base_form+' '+ rest_of_words, conjugate_verb(base_form, 'VBZ')+' '+ rest_of_words

def conjugate_verb(base, tense):
    doc = nlp(base)
    token = doc[0]
    if tense == 'VBD':
        return token._.inflect("VBD")
    elif tense == 'VBG':
        return token._.inflect("VBG")
    elif tense == 'VBN':
        return token._.inflect("VBN")
    elif tense == 'VBZ':
        return token._.inflect("VBZ")
    return base


def convert_to_unique_list(input_str):
    """
    Converts a string representation of a list of tuples to a list of unique values,
    removing any 'None' values.
    
    Args:
        input_str (str): The input string to be processed.
    
    Returns:
        list: A list of unique values.
    """
    # Remove the parentheses and quotes
    cleaned_str = re.sub(r'[()"]', '', input_str)
    # Split the string into a list of tuples
    tuples = [tuple(x.strip().split(', ')) for x in cleaned_str.split('), (')]
    # print("tuples1: ", tuples)
    
    # Flatten the list of tuples and remove duplicates
    result = list(set([item for tup in tuples for item in tup]))
    
    # Remove any 'None' values
    result = [x for x in result if x != 'None']

    result = [item.strip("'") for item in result]

    return result


def string_to_random_list(input_string):
    """
    Takes a comma-separated string as input and returns a list of strings in random order.
    """
    word_list = [word.strip() for word in input_string.split(',')]
    random.shuffle(word_list)
    return list(set(word_list))

def check_words_in_text(text, words):
    if type(words) is not list:
        words = string_to_random_list(words)
        
    # Convert both text and words to lowercase
    text = text.lower()
    words = [word.lower() for word in words]    
    
    # missing_words = [word for word in words if word not in text]
    
    for word in words :
        word_forms = get_word_forms_(word) # from word
        word_forms_list = convert_to_unique_list(str(word_forms))
        if len(word_forms_list) == 0:
            idioms_forms = None
            word_forms_list.append(word)
            if is_verb(word.split()[0]): 
                idioms_forms = list(get_list_idiom_forms(word))
                for idiom in idioms_forms:
                    word_forms_list.append(idiom)
                word_forms_list= list(set(word_forms_list))
        word_flag = False
        for each_word in word_forms_list:
            if each_word in text:
                word_flag = True
        if word_flag == True: pass
        elif word_flag == False:
            missing_words.append(word) 

    return missing_words


words_list = None
story_text = ""
missing_words = []

# Provided words list
# Given story text
story_text = "The quick brown fox jumps over the lazy dog. It give off, are made up of, **sprang up** and **comprised**"
words_list = "Spring up, Comprise, Flight, Cat, Dog, give off, be made up of"


# Check for missing words
missing_words = check_words_in_text(story_text, words_list)
print(missing_words)
print(len(missing_words))


Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
     --------------------------------------- 12.8/12.8 MB 12.1 MB/s eta 0:00:00
Installing collected packages: en-core-web-sm
Successfully installed en-core-web-sm-3.7.1
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')



[notice] A new release of pip is available: 24.0 -> 24.1.1
[notice] To update, run: C:\Users\usabu\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip
