In [1]:
import spacy
from pos_tag_dict import pos_tag_dict

In [2]:
# !python -m spacy download en_core_web_md

In [242]:
import re
def is_valid_time_format(input_str):
    # Define a regular expression pattern for the valid time format
    pattern = r'^\d{1,2}:\d{2}(AM|PM)?$'

    # Use re.match to check if the input string matches the pattern
    if re.match(pattern, input_str):
        return True
    else:
        return False
    
def get_pos_tag(word):

    if is_valid_time_format(word): return "TIME"
    else: return pos_tag_dict[word.lower()]

def get_word_list(text):
    text = text[:-1] if not text[-1].isalnum() else text
    return text.split(' ')
    

def split_question(question):
    
    split_index = None
    word_list = get_word_list(question)

    if question[:4] == 'What' and question[-2:] == 'do':
        return "VERB", word_list[1:]

    if 'How many' in question:
        return 'NUM', word_list[2:]

    for index in range(len(word_list)):
        word = word_list[index]
        if get_pos_tag(word) in ["VERB", "AUX"]:
            split_index = index
            break

    if not split_index: return
    
    question_type = ' '.join(word_list[:split_index])
    # print(question_type)
    target_pos = get_target_pos(question_type)
    hint_words = word_list[split_index+1:] if word in ['does', 'do', 'did'] else word_list[split_index:]
    hint_words = [word.lower() for word in hint_words]
    
    return target_pos, hint_words

def get_target_pos(question_type):
    if question_type in lookup_dict: return lookup_dict[question_type]
    return "NOUN"
    

def get_word_index(word, word_list):

    index_list = []
    for index in range(len(word_list)):
        candidate = word_list[index].lower()
        if word == candidate: index_list.append(index)

    return index_list

def search_sentence(sentence, target_pos, hint_words, question):

    sentence_word_list = get_word_list(sentence)
    min_dist = len(sentence_word_list)
    answer = None
    for hint_word in hint_words:
        if hint_word not in [word.lower() for word in sentence_word_list] or hint_word in ignore_list: continue
        hint_index_list = get_word_index(hint_word, sentence_word_list)
        for word_index, word in enumerate(sentence_word_list):
            # print(word)
            if word in question: continue
            if get_pos_tag(word) == "PROPN" and word in hint_words: continue
            if get_pos_tag(word) not in target_pos: continue
            dist = min(map(lambda hint_index: abs(hint_index-word_index), hint_index_list))
            if dist <= min_dist:
                min_dist = dist
                answer = word
                
        print("HINT: "+hint_word)
        return answer

    return answer



In [None]:
    
def solve(sentence, question):

    target_pos, hint_words = split_question(question)
    print(target_pos, hint_words)
    answer = search_sentence(sentence, target_pos, hint_words, question)

    return answer

In [None]:
import re

# Helper function to check if the input string is a valid time format.
def is_valid_time_format(input_str):
    # Define a regular expression pattern for the valid time format.
    pattern = r'^\d{1,2}:\d{2}(AM|PM)?$'

    # Use re.match to check if the input string matches the pattern.
    return bool(re.match(pattern, input_str))

# Function to get the POS tag of a word.
def get_pos_tag(word):
    # If the word matches the valid time format, return "TIME".
    if is_valid_time_format(word): return "TIME"
    # Otherwise, retrieve the POS tag from the pre-defined dictionary.
    else: return pos_tag_dict[word.lower()]

# Function to convert a text into a list of words.
def get_word_list(text):
    # Remove punctuation from the end of the text, if present.
    text = text[:-1] if not text[-1].isalnum() else text
    return text.split(' ')

# Function to split a question into its type and hint words.
def split_question(question):
    word_list = get_word_list(question)

    # Special handling for questions starting with "What" and ending in "do".
    if question[:4] == 'What' and question[-2:] == 'do':
        return "VERB", word_list[1:]

    # Special handling for questions with "How many".
    if 'How many' in question:
        return 'NUM', word_list[2:]

    # Look for the first occurrence of a VERB or AUX in the question.
    for index in range(len(word_list)):
        word = word_list[index]
        if get_pos_tag(word) in ["VERB", "AUX"]:
            split_index = index
            break

    # If no verb or aux found, return None.
    if not split_index: return
    
    # Determine the question type.
    question_type = ' '.join(word_list[:split_index])
    
    # Based on the question type, identify the target POS for the answer.
    target_pos = get_target_pos(question_type)

    # Identify hint words from the question.
    hint_words = word_list[split_index+1:] if word in ['does', 'do', 'did'] else word_list[split_index:]
    hint_words = [word.lower() for word in hint_words]
    
    return target_pos, hint_words

# Get target POS tags based on the question type.
def get_target_pos(question_type):
    if question_type in lookup_dict: return lookup_dict[question_type]
    return "NOUN"

# Helper function to get the indices of a word in a list.
def get_word_index(word, word_list):
    index_list = []
    for index in range(len(word_list)):
        candidate = word_list[index].lower()
        if word == candidate: index_list.append(index)
    return index_list

# Function to search the sentence for the answer based on the hints.
def search_sentence(sentence, target_pos, hint_words, question):
    sentence_word_list = get_word_list(sentence)
    min_dist = len(sentence_word_list)
    answer = None

    # Iterate through the hint words to identify the closest match.
    for hint_word in hint_words:
        if hint_word not in [word.lower() for word in sentence_word_list] or hint_word in ignore_list: continue
        hint_index_list = get_word_index(hint_word, sentence_word_list)
        for word_index, word in enumerate(sentence_word_list):
            # Ignore words that are present in the question.
            if word in question: continue

            # Ignore proper nouns that are also hint words.
            if get_pos_tag(word) == "PROPN" and word in hint_words: continue

            # If the word's POS tag doesn't match the target, continue.
            if get_pos_tag(word) not in target_pos: continue

            # Calculate the distance between the hint word and the current word.
            dist = min(map(lambda hint_index: abs(hint_index-word_index), hint_index_list))
            if dist <= min_dist:
                min_dist = dist
                answer = word
                
        # Return the answer once found.
        return answer

    return answer


In [176]:
ignore_list = ['and', 'the']

In [130]:
lookup_dict = {
    "Where": ["NOUN"],
    "Who": ["PROPN", "PRON"],
    "What":["NOUN"],
    "How long":["ADJ"],
    "How far":["UNIT"],
    "How big":["ADJ"],
    "How":["VERB"],
    "At what time":["TIME"],
    "When":["TIME"],
    "What color":["ADJ"],
    "How many":["NUM"],
}

In [8]:
sentence_1 = "Ada brought a short note to Irene."
question_1 = "Who brought the note?"
question_2 = "What did Ada bring?"
question_3 = "Who did Ada bring the note to?"
question_4 = "How long was the note?"

sentence_2 = "David and Lucy walk one mile to go to school every day at 8:00AM when there is no snow."
question_5 = "Who does Lucy go to school with?"
question_6 = "Where do David and Lucy go?"
question_7 = "How far do David and Lucy walk?"
question_8 = "How do David and Lucy get to school?"
question_9 = "At what time do David and Lucy walk to school?"

In [310]:
f = open('mostcommon.txt', 'r')
vocab = f.read()
vocab = vocab.split('\n')[:-1]

In [314]:
for word in vocab:
    print(f'"{word}":"{get_pos_tag(word)}",')

"Serena":"VERB",
"Andrew":"PROPN",
"Bobbie":"PROPN",
"Cason":"NOUN",
"David":"PROPN",
"Farzana":"PROPN",
"Frank":"PROPN",
"Hannah":"PROPN",
"Ida":"PROPN",
"Irene":"PROPN",
"Jim":"PROPN",
"Jose":"PROPN",
"Keith":"PROPN",
"Laura":"PROPN",
"Lucy":"PROPN",
"Meredith":"NOUN",
"Nick":"PROPN",
"Ada":"PROPN",
"Yeeling":"VERB",
"Yan":"NOUN",
"the":"PRON",
"of":"ADP",
"to":"ADP",
"and":"CCONJ",
"a":"X",
"in":"ADP",
"is":"AUX",
"it":"PRON",
"you":"PRON",
"that":"SCONJ",
"he":"PRON",
"was":"AUX",
"for":"ADP",
"on":"ADP",
"are":"AUX",
"with":"ADP",
"as":"ADP",
"I":"PRON",
"his":"PRON",
"they":"PRON",
"be":"AUX",
"at":"ADP",
"one":"NUM",
"have":"VERB",
"this":"PRON",
"from":"ADP",
"or":"CCONJ",
"had":"VERB",
"by":"ADP",
"hot":"ADJ",
"but":"CCONJ",
"some":"PRON",
"what":"PRON",
"there":"ADV",
"we":"PRON",
"can":"AUX",
"out":"ADV",
"other":"ADJ",
"were":"AUX",
"all":"PRON",
"your":"PRON",
"when":"SCONJ",
"up":"ADV",
"use":"VERB",
"word":"NOUN",
"how":"SCONJ",
"said":"VERB",
"an":"PRON",
"each":"PRON",

In [232]:
pos_tag_dict['children'] = "PRON"

In [233]:
sentence_test = 'There are a thousand children in this town.'
question_test = 'Who is in this town'

solve(sentence_test, question_test)

['PROPN', 'PRON'] ['is', 'in', 'this', 'town']
HINT: in


'children'

In [170]:
pos_tag_dict['her'] = "NOUN"

In [225]:
sentence_test = 'Serena saw a home last night with her friend'
question_test = 'Who was with Serena'

solve(sentence_test, question_test)

['PROPN', 'PRON'] ['was', 'with', 'serena']
HINT: with


'friend'

In [238]:
'What will Lucy do'[-2:]

'do'

In [243]:
sentence_test = 'Lucy will write a book'
question_test = 'What will Lucy do'

solve(sentence_test, question_test)

VERB ['will', 'Lucy', 'do']
HINT: will


'write'

In [223]:
sentence_test = 'The white dog and the blue horse play together'
question_test = 'What color is the horse'

solve(sentence_test, question_test)

['ADJ'] ['is', 'the', 'horse']
HINT: horse


'blue'

In [218]:
sentence_test = 'The blue bird will sing in the morning'
question_test = 'What will sing in the morning'

solve(sentence_test, question_test)

['NOUN'] ['will', 'sing', 'in', 'the', 'morning']
HINT: will


'bird'

In [217]:
sentence_test = 'Give us all your money'
question_test = 'Who should you give your money to?'

solve(sentence_test, question_test)

['PROPN', 'PRON'] ['should', 'you', 'give', 'your', 'money', 'to']
HINT: give


'us'

In [216]:
sentence_test = 'It is a small world after all'
question_test = 'How big is the world?'

solve(sentence_test, question_test)

['ADJ'] ['is', 'the', 'world']
HINT: is


'small'

In [215]:
pos_tag_dict['morning'] = "TIME"
pos_tag_dict['house'] = "NOUN"

In [214]:
sentence_test = 'There are a thousand children in this town'
question_test = 'How many children are in this town'

solve(sentence_test, question_test)

NUM ['children', 'are', 'in', 'this', 'town']
HINT: children


'thousand'

In [213]:
sentence_test = 'Serena saw a home last night with her friend'
question_test = 'Who was with Serena'

solve(sentence_test, question_test)

['PROPN', 'PRON'] ['was', 'with', 'serena']
HINT: with


'friend'

In [212]:
print(solve(sentence_1, question_1))  # "Ada"
print(solve(sentence_1, question_2))  # "note" or "a note"
print(solve(sentence_1, question_3))  # "Irene"
print(solve(sentence_1, question_4))  # "short"

print(solve(sentence_2, question_5))  # "David"
print(solve(sentence_2, question_6))  # "school"
print(solve(sentence_2, question_7))  # "mile" or "a mile"
print(solve(sentence_2, question_8))  # "walk"
print(solve(sentence_2, question_9))  # "8:00AM"

['PROPN', 'PRON'] ['brought', 'the', 'note']
HINT: brought
Ada
['NOUN'] ['ada', 'bring']
HINT: ada
note
['PROPN', 'PRON'] ['ada', 'bring', 'the', 'note', 'to']
HINT: ada
Irene
['ADJ'] ['was', 'the', 'note']
HINT: note
short
['PROPN', 'PRON'] ['lucy', 'go', 'to', 'school', 'with']
HINT: lucy
David
['NOUN'] ['david', 'and', 'lucy', 'go']
HINT: david
school
['UNIT'] ['david', 'and', 'lucy', 'walk']
HINT: david
mile
['VERB'] ['david', 'and', 'lucy', 'get', 'to', 'school']
HINT: david
walk
['TIME'] ['david', 'and', 'lucy', 'walk', 'to', 'school']
HINT: david
8:00AM
