# Intro to Information Retrieval

## some examples

### the most basic info retrieval algorithm

In [3]:
docs_animals = ['cat', 'dog', 'zebra', 'monkey', 'cow', 'dog', 'zebra', 'cat', 'cat']

In [2]:
def quick_search(doc, query):
    return [keyword for keyword in doc if keyword == query]

In [4]:
print(quick_search(docs_animals, query='cat'))

['cat', 'cat', 'cat']


In [5]:
print(quick_search(docs_animals, query='dog'))

['dog', 'dog']


## what if we have docs that have more than one words?!

In [6]:
documents = [
    ['word1', 'word2', 'word3'],
    ['word1', 'word2'],
    ['word1', 'word3'],
    ['word1']
]

In [7]:
def quick_search_multi_words(docs, query):
    return [doc for doc in docs if query in doc]

In [8]:
print(quick_search_multi_words(documents, query='word1'))

[['word1', 'word2', 'word3'], ['word1', 'word2'], ['word1', 'word3'], ['word1']]


In [9]:
print(quick_search_multi_words(documents, query='word3'))

[['word1', 'word2', 'word3'], ['word1', 'word3']]


## Inverted Index

In [10]:
inverted_index = {
    'word1': [0, 1, 2, 3],
    'word2': [0, 1],
    'word3': [0, 2]
}

In [11]:
print(inverted_index['word1'])

[0, 1, 2, 3]


In [13]:
print(inverted_index['word3'])

[0, 2]


In [14]:
def indexed_search(docs, index, query):
    return [docs[doc_id] for doc_id in index[query]]

In [15]:
print(indexed_search(docs=documents, index=inverted_index, query='word2'))

[['word1', 'word2', 'word3'], ['word1', 'word2']]
