In [1]:
import nltk
from nltk.tokenize import word_tokenize
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import string

# Download NLTK resources (run this once per Colab session)
nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\sthaa\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\sthaa\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\sthaa\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [2]:
corpus = """The Sunshine-Filled Afternoon of Timmy and Fido

The sky was a brilliant blue, with just a few wispy clouds scattered about, as Timmy and his trusty companion Fido prepared for their adventure to the park. Fido, a playful and energetic golden retriever, had been waiting eagerly by the door, his tail wagging excitedly as he clutched his favorite toy, a bright red ball.

As they strolled through the park's gates, the fresh air and lush green grass filled their senses. Children's laughter and the chirping of birds created a lively atmosphere, setting the tone for a fun-filled afternoon. Timmy, with his messy brown hair and wide smile, couldn't wait to spend quality time with his best friend.

*Fido's Joy is Contagious*

As they made their way to the open field, Fido's enthusiasm became contagious. Timmy couldn't help but giggle at Fido's antics, as the dog bounded ahead, his red ball held tightly in his jaws. The sun cast a warm glow over the scene, illuminating the vibrant colors of the ball and Fido's gleaming coat.

With a burst of energy, Fido suddenly darted off, racing across the field with his prized ball. Timmy followed close behind, his eyes shining with excitement. The wind ruffled Timmy's hair as he chased after Fido, the sound of their laughter and Fido's barks filling the air.

*A Game of Catch*

As they reached the center of the field, Timmy called out to Fido, "Hey buddy, want to play catch?" Fido, understanding the cue, promptly dropped the ball at Timmy's feet. With a grin, Timmy picked up the ball and tossed it high into the air. Fido, with lightning-fast reflexes, leapt into action, catching the ball in mid-air.

The game of catch continued, with Timmy and Fido taking turns throwing and catching the ball. Their joy was palpable, as they reveled in the simple pleasure of playing together. As the afternoon wore on, the sun began to dip, casting a warm orange glow over the park.

*A Heartwarming Conclusion*

As the sun began to set, Timmy and Fido settled down on a nearby bench, exhausted but content. Fido, still clutching his beloved red ball, nuzzled Timmy's hand, his tail wagging softly. Timmy smiled, scratching Fido behind the ears, "You're the best buddy a guy could ask for, Fido."

As they sat together, watching the stars begin to twinkle in the evening sky, Timmy and Fido shared a special moment, one that would be etched in their memories forever. The red ball, now worn and faded, had brought them even closer together, a symbol of the unbreakable bond between a boy and his loyal companion."""

In [3]:
# Helper functions for text processing and question answering.

# Tokenize, lowercase, and remove punctuation from text
def preprocess_text(text):
  text = text.lower()
  text = ''.join([char for char in text if char not in string.punctuation]) # removing punctuation
  tokens = word_tokenize(text) # tokenizing
  return tokens

# Simple function to process a question and find the best matching sentence
def simple_qa(question, corpus):
    # Preprocess both text corpus and the question
    corpus_tokens = [preprocess_text(sentence) for sentence in corpus.split('\n') if sentence]
    question_tokens = preprocess_text(question)

    # Vectorize text corpus and question using TF-IDF
    vectorizer = TfidfVectorizer(tokenizer=lambda x:x, preprocessor=lambda x:x) # Using tokenizer to skip text processing and vectorizing pre-processed tokens
    vectorizer.fit(corpus_tokens)
    corpus_vectors = vectorizer.transform(corpus_tokens)
    question_vector = vectorizer.transform([question_tokens])

    # Finding the sentence with the highest cosine similarity
    similarity_scores = cosine_similarity(question_vector, corpus_vectors).flatten()
    best_match_index = np.argmax(similarity_scores)

    # Return the best matching sentence
    return corpus.split('\n')[best_match_index]

# Part 2: Testing the Model

# Sample questions
questions = [
    "Who is Timmy?",
    "Who is Fido’s best friend?",
    "what do the friends like to  do?",
    "What are the applications of a ball?",
    "How many parks are there in Toronto for the pals to play in?" #testing questions beyond the text
]

# Process questions and print results
print("The Corpus used for this example is : \n", corpus)
print("\n-------------------------------------\n")
for question in questions:
    answer = simple_qa(question, corpus)
    print(f"Question: {question}")
    print(f"Answer: {answer}\n")

# Part 3: Experiment and Reflect (Assignment)
# - Try different questions and observe how well the model does and where it fails.
# - Add more text to the corpus and repeat the tests, does the model's accuracy change?
# - Think about what is required to have a robust question answering system.

The Corpus used for this example is : 
 The Sunshine-Filled Afternoon of Timmy and Fido

The sky was a brilliant blue, with just a few wispy clouds scattered about, as Timmy and his trusty companion Fido prepared for their adventure to the park. Fido, a playful and energetic golden retriever, had been waiting eagerly by the door, his tail wagging excitedly as he clutched his favorite toy, a bright red ball.

As they strolled through the park's gates, the fresh air and lush green grass filled their senses. Children's laughter and the chirping of birds created a lively atmosphere, setting the tone for a fun-filled afternoon. Timmy, with his messy brown hair and wide smile, couldn't wait to spend quality time with his best friend.

*Fido's Joy is Contagious*

As they made their way to the open field, Fido's enthusiasm became contagious. Timmy couldn't help but giggle at Fido's antics, as the dog bounded ahead, his red ball held tightly in his jaws. The sun cast a warm glow over the scene,

