## We can trigger whenever new document added into collection or update any document from Atlas

### Create a event trigger

In [None]:
exports = async function(changeEvent) {

    const doc = changeEvent.fullDocument;

    const url = 'https://api.openai.com/v1/embeddings';
    
    const openai_key = context.values.get("openAI_secret");
    try {
        console.log(`Processing document with id: ${doc._id}`);

        
        let response = await context.http.post({
            url: url,
             headers: {
                'Authorization': [`Bearer ${openai_key}`],
                'Content-Type': ['application/json']
            },
            body: JSON.stringify({
                
                input: doc.plot,
                model: context.values.get("model")
            })
        });

        
        let responseData = EJSON.parse(response.body.text());

        if(response.statusCode === 200) {
            console.log("Successfully received embedding.");

            const embedding = responseData.data[0].embedding;

            const collection = context.services.get("cluster0").db("sample_mflix").collection("movies");

            const result = await collection.updateOne(
                { _id: doc._id },
                { $set: { plot_embedding: embedding }}
            );

            if(result.modifiedCount === 1) {
                console.log("Successfully updated the document.");
            } else {
                console.log("Failed to update the document.");
            }
        } else {
            console.log(`Failed to receive embedding. Status code: ${response.statusCode}`);
        }

    } catch(err) {
        console.error(err);
    }
};

### Create Vector Embeddings
To create vector embeddings, use a function that makes an API request to the text embedding model of your choice. The text embedding model will create embeddings based on the text it receives.

In [None]:
def get_embeddings(text, model, api_key):
    url = "https://api.openai.com/v1/embeddings"
    headers = {"Content-Type": "application/json", "Authorization": "Bearer " + api_key}
    data = {"input": text, "model": model, "options": {"wait_for_model": True}}

    response = requests.post(url, headers=headers, data=json.dumps(data))
    responseData = response.json()

    return responseData["data"][0]["embedding"]

In [1]:
# example in Atlas to create serach index
db.movies.createSearchIndex(
  "vectorPlotIndex",
  "vectorSearch",
  {
     "fields": [
        {
           "type": "vector",
           "path": "plot_embedding",
           "numDimensions": 1536,
           "similarity": "cosine"
        }
     ]
  }
);

### Create a Vector Search Index with a Pre-filter Field
To create a vector search index, use createSearchIndex() method, which expects the name, type, and definition of the index. In this example, we use the type filter so that we can pre-filter on the year field when we use $vectorSearch.

In [2]:
db.movies.createSearchIndex(
  "vectorPlotIndex",
  "vectorSearch",
  {
     "fields": [
        {
           "type": "vector",
           "path": "plot_embedding",
           "numDimensions": 1536,
           "similarity": "cosine"
        },
        {
          "type": "filter",
          "path": "year"
        }
     ]
  }
);

NameError: name 'db' is not defined

In [None]:
pipeline = [
    {
        "$vectorSearch": {
            "index": "vectorPlotIndex",
            "path": "plot_embedding",
            "queryVector": embedding,
            "numCandidates": 100,
            "limit": 10
        }
    },
    {
        "$project": {
            "title": 1,
            "plot": 1,
            "score": {"$meta": "vectorSearchScore"}
        }
    }
]

x = collection.aggregate(pipeline)

### Create a Vector Search Query with a Filter
The following code uses $vectorSearch with a filter on the year field to pre-filter the data before performing a vector search.

In [None]:
pipeline = [
    {
        "$vectorSearch": {
            "index": "vectorPlotIndex",
            "path": "plot_embedding",
            "queryVector": embedding,
            "numCandidates": 100,
            "filter": {"year": {"$gt": 2010}},
            "limit": 10
        }
    },
    {
        "$project": {
            "title": 1,
            "plot": 1,
            "year": 1,
            "score": {"$meta": "vectorSearchScore"}
        }
    }
]

x = collection.aggregate(pipeline)

### Using vecotor search for semantic searches

In [None]:

limit_results = {
    "$limit" : limit
}

make_array = {
    "$group":{"_id": None, "docs": {"$push": "$$ROOT"}}
}

add_rank = 
    "$unwind": { "path": "$docs", "includeArrayIndex": "rank" }
}

def make_compute_score_doc (priority, score_field_name):
    return {
        "$addFields": {
            score_field_name: {
                "$divide": [
                    1.0,
                    { "$add": [ "$rank", priority, 1] }
                ]
        }
    }
}

In [None]:

combine_search_results = {
    "$group": {
        "_id":
        "$_id",
        "vs_score": {"$max": "$vs_score"},
        "ts_score": {"$max": "$ts_score"},
        "title": {"$first": "$title"},
        "plot": {"$first": "$plot"},
        "year": {"$first": "$year"}
    }
}

In [None]:

project_combined_results = {
    "$project": {
        "_id": 1,
        "title": 1,
        "plot": 1,
        "year": 1,
        "score": {
            "$let": {
                "vars": {
                    "vs_score": { "$ifNull": ["$vs_score", 0] },
                    "ts_score": { "$ifNull": ["$ts_score", 0] }
                },
                "in": { "$add": ["$$vs_score", "$$ts_score"] }
            }
        }
}

In [None]:
sort_results = {
    "$sort": {"score: -1}
}

In [None]:
limit_results = {
    "$limit": limit
}

In [None]:

pipeline = [
    vector_search,
    make_array,
    add_rank,
    make_compute_score_doc (vector_priority, "vs_score"),
    make_projection_doc("vs_score"),
    {
        "$unionWith": { "coll": "movies",
            "pipeline": [
                text_search,
                limit_results,
                make_array,
                add_rank,
                make_compute_score_doc(text_priority, "ts_score"),
                make_projection_doc("ts_score")
            ]
        ]
    }
},
combine_search_results,
project_combined_results,
sort_results,
limit_results

In [None]:
x = collection.aggregate(pipelinei)

### expected results

````
[
    {
        "_id": ObjectId("573a13c1f29313caabd649a8"),
        "title": "Escape Plan",
        "plot": "When a structural-security authority finds himself set up and incarcerated in the world's most secret and secure prison..",
    }, 
    {
        "year": 2013,
        "score": 0.5,
        "_id": ObjectId("573a1398f29313caabceb851"),
        "title": "Lock Up",
        "plot": "...transferred from minimum security to maximum security by a vindictive warden.",
        "year": 1989,
        "score": 0.5,
    }
];

### with score results

```

[
{
"_id": ObjectId("573a13c1f29313caabd649a8"),
"title": "Escape Plan",
"plot": "When a structural-security authority finds himself set up and incarcerated in the world's most secret and secure prison..",
}, {
"year": 2013,
"score": 0.5,
"_id": ObjectId("573a1398f29313caabceb851"),
"title": "Lock Up",
"plot": "...transferred from minimum security to maximum security by a vindictive warden.",
"year": 1989,
"score": 0.5,
];
},

In [None]:
# all together

vector_search = {
    "$vectorSearch": {
        "index":          "vectorPlotIndex",
        "path":           "plot_embedding",
        "queryVector":    embedding,
        "numCandidates":  num_candidates,
        "limit":          limit
    }
}

make_array = {
    "$group": { "_id": None, "docs": {"$push": "$$ROOT"} }
}

add_rank = {
    "$unwind": { "path": "$docs", "includeArrayIndex": "rank" }
}

def make_compute_score_doc(priority, score_field_name):
    return {
        "$addFields": {
            score_field_name: {
                "$divide": [
                    1.0,
                    { "$add": ["$rank", priority, 1] }
                ]
            }
        }
    }

def make_projection_doc(score_field_name):
    return  {
        "$project": {
            score_field_name:  1,
            "_id":             "$docs._id",
            "title":           "$docs.title",
            "plot":            "$docs.plot",
            "year":            "$docs.year",
        }
    }


text_search = {
    "$search": {
        "index":  "plotIndex",
        "text":   { "query": query, "path": "plot" },
    }
}

limit_results = {
    "$limit" : limit
}

combine_search_results = {
    "$group": {
        "_id":        "$_id",
        "vs_score":   {"$max":    "$vs_score"},
        "ts_score":   {"$max":    "$ts_score"},
        "title":      {"$first":  "$title"},
        "plot":       {"$first":  "$plot"},
        "year":       {"$first":  "$year"}
    }
}

project_combined_results = {
    "$project": {
        "_id":        1,
        "title":      1,
        "plot":       1,
        "year":       1,
        "score": {
            "$let": {
                "vars": {
                    "vs_score":  { "$ifNull":  ["$vs_score", 0] },
                    "ts_score":  { "$ifNull":  ["$ts_score", 0] }
                },
                "in": { "$add": ["$$vs_score", "$$ts_score"] }
            }
        }
    }
}

sort_results = {
    "$sort": { "score": -1}
}

pipeline = [
    vector_search,
    make_array,
    add_rank,
    make_compute_score_doc(vector_priority, "vs_score"),
    make_projection_doc("vs_score"),
    {
        "$unionWith": { "coll": "movies",
            "pipeline": [
                text_search,
                limit_results,
                make_array,
                add_rank,
                make_compute_score_doc(text_priority, "ts_score"),
                make_projection_doc("ts_score")
            ]
        }
    },
    combine_search_results,
    project_combined_results,
    sort_results,
    limit_results
]

x = collection.aggregate(pipeline)