In this walkthrough we will build a full text search capable application by incrementally building the features to be exposed via a simple REST API.

## Setup

1. Create a cluster in Atlas:

<img src="assets/create_cluster.png" style="width: 400px;"/>

2. Import data into the cluster or use existing data.

<img src="assets/import_data.png" style="width: 400px;"/>

3. Create the FTS Index

<img src="assets/create_index.png" style="width: 400px;"/>


## Install Prerequisites

In [9]:
! pip install pymongo

You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.8/bin/python3.8 -m pip install --upgrade pip' command.[0m


In [10]:
# Just making sure everything prints cleanly

from pygments.style import Style
from pygments.token import Token
from pygments import highlight
from pygments.lexers import JsonLexer
from pygments.formatters import Terminal256Formatter


class MyStyle(Style):
    styles = {
        Token.String: 'ansigreen',
        Token.Literal: 'ansibrightyellow',
        Token.Keyword: 'ansimagenta',
        Token.Operator: 'ansibrightmagenta'
    }

def pp(doc):
    formatted_json = json.dumps(json.loads(json_util.dumps(doc)), indent=4)
    colorful_json = highlight(formatted_json, JsonLexer(), Terminal256Formatter(style=MyStyle))
    print(colorful_json)

In [11]:
import pymongo
from bson import json_util
import ssl
from config import mongo_uri
import json
    
conn = pymongo.MongoClient(mongo_uri, ssl_cert_reqs=ssl.CERT_NONE)

movies_collection = conn['sample_mflix']['movies']

## Basic Search

Run a simple text search.

In [12]:
pipeline = [
    {
        '$search': {
            'text': {
                'query': "fight club",
                'path': "title"
            }
        }
    },
    {
        '$project': {
            'title':1,
            '_id':0,
            'score': {
                '$meta': 'searchScore'
            }
        }
    }
]
docs = movies_collection.aggregate(pipeline)
pp(docs)

conn.close()

[
    {
        "title": [32m"Fight Club"[39m,
        "score": [93m7.341153144836426[39m
    },
    {
        "title": [32m"Fight, Zatoichi, Fight"[39m,
        "score": [93m4.634571075439453[39m
    },
    {
        "title": [32m"Fight, Zatoichi, Fight"[39m,
        "score": [93m4.634571075439453[39m
    },
    {
        "title": [32m"Girl Fight"[39m,
        "score": [93m3.8967673778533936[39m
    },
    {
        "title": [32m"Street Fight"[39m,
        "score": [93m3.8967673778533936[39m
    },
    {
        "title": [32m"The Club"[39m,
        "score": [93m3.4443857669830322[39m
    },
    {
        "title": [32m"Typhoon Club"[39m,
        "score": [93m3.4443857669830322[39m
    },
    {
        "title": [32m"Suicide Club"[39m,
        "score": [93m3.4443857669830322[39m
    },
    {
        "title": [32m"Typhoon Club"[39m,
        "score": [93m3.4443857669830322[39m
    },
    {
        "title": [32m"Club Paradise"[39m,
        "score": [93

## Fuzzy

Often referred to as approximate string matching, fuzzy matching is a technique of finding strings that match a pattern approximately rather than exactly. It's common use case is when there are common mispellings, when users make errors ("fat fingering"), etc.

**maxEdits** uses the Levenshtein distance, which is the difference between two string sequences.

In [13]:
pipeline = [
    {
        '$search': {
            'text': {
                'query': "might cub",
                'path': "title",
                'fuzzy':{
                    'maxEdits':2
                }
            }
        }
    },
    {
        '$project': {
            'title':1,
            '_id':0,
            'score': {
                '$meta': 'searchScore'
            }
        }
    }
]
docs = movies_collection.aggregate(pipeline)
pp(docs)

conn.close()

[
    {
        "title": [32m"Fight Club"[39m,
        "score": [93m4.288863182067871[39m
    },
    {
        "title": [32m"Fright Night"[39m,
        "score": [93m3.5497400760650635[39m
    },
    {
        "title": [32m"Fright Night"[39m,
        "score": [93m3.5497400760650635[39m
    },
    {
        "title": [32m"Friday Night Lights"[39m,
        "score": [93m3.0529441833496094[39m
    },
    {
        "title": [32m"Eight Crazy Nights"[39m,
        "score": [93m3.0529441833496094[39m
    },
    {
        "title": [32m"Eight Miles High"[39m,
        "score": [93m2.834876775741577[39m
    },
    {
        "title": [32m"Fight for Your Right Revisited"[39m,
        "score": [93m2.7260446548461914[39m
    },
    {
        "title": [32m"Cube"[39m,
        "score": [93m2.6997640132904053[39m
    },
    {
        "title": [32m"Cut!"[39m,
        "score": [93m2.6997640132904053[39m
    },
    {
        "title": [32m"Fright Night Part 2"[39m,
        "

## Highlighting

Add a relevance score and hit highlights to the results.

In [14]:
pipeline = [
        {
            '$search': {
                'text': {
                    'query': "fight",
                    'path': "title"
                },
                # text highlighting
                'highlight': { "path": "title" }
            }
        }, {
            '$project': {
                'title':1,
                '_id':0,
                'score': {
                    '$meta': 'searchScore'
                },
                'highlights': {"$meta": "searchHighlights"},
                'score': {
                    '$meta': 'searchScore'
                }
            }
        }
    ]

docs = movies_collection.aggregate(pipeline)
pp(docs)

conn.close()

[
    {
        "title": [32m"Fight, Zatoichi, Fight"[39m,
        "score": [93m4.634571075439453[39m,
        "highlights": [
            {
                "path": [32m"title"[39m,
                "texts": [
                    {
                        "value": [32m"Fight"[39m,
                        "type": [32m"hit"[39m
                    },
                    {
                        "value": [32m", Zatoichi, "[39m,
                        "type": [32m"text"[39m
                    },
                    {
                        "value": [32m"Fight"[39m,
                        "type": [32m"hit"[39m
                    }
                ],
                "score": [93m1.1319897174835205[39m
            }
        ]
    },
    {
        "title": [32m"Fight, Zatoichi, Fight"[39m,
        "score": [93m4.634571075439453[39m,
        "highlights": [
            {
                "path": [32m"title"[39m,
                "texts": [
                    {
    