In [35]:
def run_tests(func, test_cases):
    for i, args in enumerate(test_cases):
        result = func(*args)
        formatted_args = [f'"{arg}"' if isinstance(arg, str) else str(arg) for arg in args]
        excel_formula = f"={func.__name__.upper()}({', '.join(formatted_args)})"
        print(f"Case {i+1}: {args} -> {result} | Excel: {excel_formula}")

In [36]:
def classify_intent(text):
    """Classify the intent of the input text using NLTK's NaiveBayesClassifier.
    Args:
        text (str): Text to analyze
    Returns:
        str: Classified intent (capitalized)
    """
    import nltk
    from nltk.classify import NaiveBayesClassifier
    from nltk.tokenize import word_tokenize
    nltk.download('punkt')
    
    def extract_features(text):
        """Extract features from text for classification."""
        words = word_tokenize(text.lower())
        return dict([(word, True) for word in words])
    
    # Training data with labeled intents
    training_data = [
        ("what is the weather like", "question"),
        ("what time is it", "question"),
        ("where are you from", "question"),
        ("who made this", "question"),
        ("please help me", "request"),
        ("could you assist me", "request"),
        ("show me how to", "request"),
        ("i need help with", "request"),
        ("hello there", "greeting"),
        ("hi how are you", "greeting"),
        ("good morning", "greeting"),
        ("nice to meet you", "greeting"),
        ("goodbye for now", "farewell"),
        ("see you later", "farewell"),
        ("i have to go", "farewell"),
        ("thanks for your help", "farewell"),
        ("i like this product", "statement"),
        ("the weather is nice", "statement"),
        ("this works well", "statement"),
        ("interesting idea", "statement")
    ]
    
    # Prepare and train the classifier
    featuresets = [(extract_features(text), intent) for (text, intent) in training_data]
    classifier = NaiveBayesClassifier.train(featuresets)
    
    # Classify the input text
    features = extract_features(text)
    intent = classifier.classify(features)
    return intent.capitalize()

test_cases = [
    ["What's the temperature today?"],
    ["Can you help me find my files?"],
    ["Hi, nice to meet you!"],
    ["Bye, thanks for all your help!"],
    ["The system is working perfectly."],
    ["Where did you put the documents?"],
    ["Please show me the way."]
]

run_tests(classify_intent, test_cases)

Case 1: ["What's the temperature today?"] -> Question | Excel: =CLASSIFY_INTENT("What's the temperature today?")
Case 2: ['Can you help me find my files?'] -> Request | Excel: =CLASSIFY_INTENT("Can you help me find my files?")
Case 3: ['Hi, nice to meet you!'] -> Greeting | Excel: =CLASSIFY_INTENT("Hi, nice to meet you!")
Case 4: ['Bye, thanks for all your help!'] -> Farewell | Excel: =CLASSIFY_INTENT("Bye, thanks for all your help!")
Case 5: ['The system is working perfectly.'] -> Question | Excel: =CLASSIFY_INTENT("The system is working perfectly.")
Case 6: ['Where did you put the documents?'] -> Question | Excel: =CLASSIFY_INTENT("Where did you put the documents?")
Case 7: ['Please show me the way.'] -> Request | Excel: =CLASSIFY_INTENT("Please show me the way.")


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\brent\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\brent\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\brent\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\brent\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\brent\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\brent\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\brent\AppData\Roaming\

In [37]:
import nltk
import numpy

# Download all required NLTK data
for package in ['punkt', 'averaged_perceptron_tagger_eng', 'maxent_ne_chunker_tab', 'words']:
    nltk.download(package)

def named_entity_recognition(text):
    """Extract named entities from text.
    Args:
        text (str): Text to analyze
    Returns:
        list: List of named entities found
    """
    # Tokenize and tag the text
    tokens = nltk.word_tokenize(text)
    pos_tags = nltk.pos_tag(tokens)
    
    # Extract named entities
    named_entities = nltk.ne_chunk(pos_tags)
    entities = []
    
    # Process each chunk
    for chunk in named_entities:
        if hasattr(chunk, 'label'):
            entity = ' '.join(c[0] for c in chunk.leaves())
            entity_type = chunk.label()
            entities.append(f"{entity} ({entity_type})")
    
    return entities if entities else ['No named entities found']

test_cases = [
    ["John works at Microsoft in Seattle."],
    ["The United States and Canada signed a trade agreement."],
    ["Tesla CEO Elon Musk announced new plans."],
    ["Mount Everest is in Nepal."],
    ["Sarah visited Paris last summer."]
]

run_tests(named_entity_recognition, test_cases)
# Ignore dowload error in VS Code

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\brent\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     C:\Users\brent\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package maxent_ne_chunker_tab to
[nltk_data]     C:\Users\brent\AppData\Roaming\nltk_data...
[nltk_data]   Package maxent_ne_chunker_tab is already up-to-date!
[nltk_data] Downloading package words to
[nltk_data]     C:\Users\brent\AppData\Roaming\nltk_data...
[nltk_data]   Package words is already up-to-date!


Case 1: ['John works at Microsoft in Seattle.'] -> ['John (PERSON)', 'Microsoft (ORGANIZATION)', 'Seattle (GPE)'] | Excel: =NAMED_ENTITY_RECOGNITION("John works at Microsoft in Seattle.")
Case 2: ['The United States and Canada signed a trade agreement.'] -> ['United States (GPE)', 'Canada (GPE)'] | Excel: =NAMED_ENTITY_RECOGNITION("The United States and Canada signed a trade agreement.")
Case 3: ['Tesla CEO Elon Musk announced new plans.'] -> ['Tesla (PERSON)', 'CEO Elon Musk (ORGANIZATION)'] | Excel: =NAMED_ENTITY_RECOGNITION("Tesla CEO Elon Musk announced new plans.")
Case 4: ['Mount Everest is in Nepal.'] -> ['Mount (PERSON)', 'Everest (ORGANIZATION)', 'Nepal (GPE)'] | Excel: =NAMED_ENTITY_RECOGNITION("Mount Everest is in Nepal.")
Case 5: ['Sarah visited Paris last summer.'] -> ['Sarah (PERSON)', 'Paris (GPE)'] | Excel: =NAMED_ENTITY_RECOGNITION("Sarah visited Paris last summer.")


In [38]:
def tokenize_text(text):
    """Split text into words and sentences.
    Args:
        text (str): Input text
    Returns:
        dict: Dictionary with word and sentence tokens
    """
    from nltk.tokenize import word_tokenize, sent_tokenize
    import nltk
    nltk.download('punkt')
    
    return {
        'words': word_tokenize(text),
        'sentences': sent_tokenize(text)
    }

test_cases = [
    ["Natural language processing helps computers understand and work with human language."],
    ["The researchers are developing better models. They're making progress daily."]
]

run_tests(tokenize_text, test_cases)

Case 1: ['Natural language processing helps computers understand and work with human language. '] -> {'words': ['Natural', 'language', 'processing', 'helps', 'computers', 'understand', 'and', 'work', 'with', 'human', 'language', '.'], 'sentences': ['Natural language processing helps computers understand and work with human language.']} | Excel: =TOKENIZE_TEXT("Natural language processing helps computers understand and work with human language. ")
Case 2: ["The researchers are developing better models. They're making progress daily."] -> {'words': ['The', 'researchers', 'are', 'developing', 'better', 'models', '.', 'They', "'re", 'making', 'progress', 'daily', '.'], 'sentences': ['The researchers are developing better models.', "They're making progress daily."]} | Excel: =TOKENIZE_TEXT("The researchers are developing better models. They're making progress daily.")


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\brent\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\brent\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [39]:
def reduce_words(text):
    """Perform stemming and lemmatization on text.
    Args:
        text (str): Input text
    Returns:
        dict: Dictionary with stemmed and lemmatized words
    """
    from nltk.tokenize import word_tokenize
    from nltk.stem import PorterStemmer, WordNetLemmatizer
    import nltk
    nltk.download('punkt')
    nltk.download('wordnet')
    
    words = word_tokenize(text)
    stemmer = PorterStemmer()
    lemmatizer = WordNetLemmatizer()
    return {
        'stemmed': [stemmer.stem(word) for word in words],
        'lemmatized': [lemmatizer.lemmatize(word) for word in words]
    }

test_cases = [
    ["The researchers are developing better models. They're making progress daily."]
]

run_tests(reduce_words, test_cases)

Case 1: ["The researchers are developing better models. They're making progress daily."] -> {'stemmed': ['the', 'research', 'are', 'develop', 'better', 'model', '.', 'they', "'re", 'make', 'progress', 'daili', '.'], 'lemmatized': ['The', 'researcher', 'are', 'developing', 'better', 'model', '.', 'They', "'re", 'making', 'progress', 'daily', '.']} | Excel: =REDUCE_WORDS("The researchers are developing better models. They're making progress daily.")


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\brent\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\brent\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [40]:
def analyze_pos(text):
    """Tag parts of speech in text.
    Args:
        text (str): Input text
    Returns:
        list: List of (word, POS) tuples
    """
    from nltk import pos_tag, word_tokenize
    import nltk
    nltk.download('punkt')
    nltk.download('averaged_perceptron_tagger')
    
    words = word_tokenize(text)
    return pos_tag(words)

test_cases = [
    ["The quick brown fox jumps over the lazy dog."]
]

run_tests(analyze_pos, test_cases)

Case 1: ['The quick brown fox jumps over the lazy dog.'] -> [('The', 'DT'), ('quick', 'JJ'), ('brown', 'NN'), ('fox', 'NN'), ('jumps', 'VBZ'), ('over', 'IN'), ('the', 'DT'), ('lazy', 'JJ'), ('dog', 'NN'), ('.', '.')] | Excel: =ANALYZE_POS("The quick brown fox jumps over the lazy dog.")


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\brent\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\brent\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


In [41]:
def process_text(text):
    """Clean and process text.
    Args:
        text (str): Input text
    Returns:
        list: Processed words
    """
    from nltk.tokenize import word_tokenize
    from nltk.corpus import stopwords
    import nltk
    nltk.download('punkt')
    nltk.download('stopwords')
    
    stop_words = set(stopwords.words('english'))
    words = word_tokenize(text.lower())
    return [word for word in words if word.isalnum() and word not in stop_words]

test_cases = [
    ["Natural language processing helps computers understand and work with human language!"]
]

run_tests(process_text, test_cases)

Case 1: ['Natural language processing helps computers understand and work with human language!'] -> ['natural', 'language', 'processing', 'helps', 'computers', 'understand', 'work', 'human', 'language'] | Excel: =PROCESS_TEXT("Natural language processing helps computers understand and work with human language!")


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\brent\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\brent\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [42]:
def find_collocations(text):
    """Find common word pairs and trigrams.
    Args:
        text (str): Input text
    Returns:
        dict: Dictionary with bigrams and trigrams
    """
    from nltk.tokenize import word_tokenize
    from nltk import ngrams
    import nltk
    nltk.download('punkt')
    
    words = word_tokenize(text)
    return {
        'bigrams': list(ngrams(words, 2)),
        'trigrams': list(ngrams(words, 3))
    }

test_cases = [
    ["Natural language processing helps computers understand human language."]
]

run_tests(find_collocations, test_cases)

Case 1: ['Natural language processing helps computers understand human language.'] -> {'bigrams': [('Natural', 'language'), ('language', 'processing'), ('processing', 'helps'), ('helps', 'computers'), ('computers', 'understand'), ('understand', 'human'), ('human', 'language'), ('language', '.')], 'trigrams': [('Natural', 'language', 'processing'), ('language', 'processing', 'helps'), ('processing', 'helps', 'computers'), ('helps', 'computers', 'understand'), ('computers', 'understand', 'human'), ('understand', 'human', 'language'), ('human', 'language', '.')]} | Excel: =FIND_COLLOCATIONS("Natural language processing helps computers understand human language.")


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\brent\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
