### DESCRIPTION:
    This example shows how to generate MQL (MongoDB Query language) from user input using OpenAI GPT3.5 completion model
### REQUIREMENTS:
    Create an .env file with your OpenAI API key and save it in the root directory of this project with the following
    OPENAI_DEPLOYMENT_ENDPOINT ="<your openai endpoint>" 
    OPENAI_API_KEY = "<your openai api key>"
    OPENAI_DEPLOYMENT_NAME = "<your gpt35 deployment name>"
    OPENAI_DEPLOYMENT_VERSION = "<gpt35 api version>"
    OPENAI_MODEL_NAME="<gpt35 model name>"


In [79]:
from dotenv import load_dotenv
import pandas as pd
import utils
from azure.cosmos import CosmosClient
import pymongo

In [80]:
#connect to Cosmos for Mongo DB
client = pymongo.MongoClient(utils.COSMOS_MONGO_DB_CONN_STRING)
try:
    client.server_info() # validate connection string
except pymongo.errors.ServerSelectionTimeoutError:
    raise TimeoutError("Invalid API for MongoDB connection string or timed out when attempting to connect")
db = client["music"]
songsCollection = db["songs"]

In [None]:
def convert_to_json_array(arr):
    if not arr is None:
        arr = arr.replace('[','')
        arr = arr.replace(']','')
        return arr.split(',')
    else:
        return None
    
#load data into cosmos mongo db 
df = pd.read_csv('./data/music/final.csv')
for index, row in df.iterrows():
    song = {
        "track_id": row['track_id'],
        "artist_id": row['artist_id'],
        "album_id": row['album_id'],
        "path": row['path'],
        "duration": row['duration'],
        "moods": convert_to_json_array(row['mood']),
        "instruments": convert_to_json_array(row['instrument']),
        "genres": convert_to_json_array(row['genre'])
    }
    obj = songsCollection.insert_one(song)
    print(f'{song}  {obj.inserted_id}')

In [81]:
# count the documents in our mongoDB
print(songsCollection.count_documents({}))

7214


In [82]:
#find a song with mood = happy
print(songsCollection.find_one({"moods": "happy"}))

{'_id': ObjectId('64b225762a1a1cc5b121d46b'), 'track_id': 'track_0021492', 'artist_id': 'artist_003177', 'album_id': 'album_003005', 'path': '92/21492.mp3', 'duration': 80.0, 'moods': ['funny', 'happy', 'positive', 'relaxing'], 'instruments': ['guitar'], 'genres': ['alternative', 'electronic', 'experimental']}


In [83]:
from dotenv import load_dotenv
import pandas as pd
import utils
import os
import openai
import json 

load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") 
OPENAI_DEPLOYMENT_ENDPOINT = os.getenv("OPENAI_DEPLOYMENT_ENDPOINT")
OPENAI_DEPLOYMENT_NAME = os.getenv("OPENAI_DEPLOYMENT_NAME")
OPENAI_MODEL_NAME = os.getenv("OPENAI_MODEL_NAME")
OPENAI_DEPLOYMENT_VERSION = os.getenv("OPENAI_DEPLOYMENT_VERSION")
# Configure OpenAI API
openai.api_type = "azure"
openai.api_version = OPENAI_DEPLOYMENT_VERSION
openai.api_base = OPENAI_DEPLOYMENT_ENDPOINT
openai.api_key = OPENAI_API_KEY

In [84]:
def call_openai(template_prefix, text):
    prompt = template_prefix + text + template_sufix
    response = openai.Completion.create(
        engine=utils.OPENAI_DEPLOYMENT_NAME,
        prompt=prompt,
        temperature=0,
        max_tokens=4096,
        top_p=0.95,
        frequency_penalty=0,
        presence_penalty=0,
        stop=["<|im_end|>"])
    response = response['choices'][0]['text']
    response = utils.remove_chars("\n", response)
    response=utils.start_after_string("Answer:", response)
    response=utils.remove_tail_tags("<|im_end|>", response)
    return response

def call_openai_mql_response(template_prefix, text):
    response = call_openai(template_prefix, text)
    arr1 = response.split("(")
    arr2 = arr1[1].split(")")
    response = arr2[0].replace
    response = arr2[0]
    response = response.replace("$and", "\"$and\"")
    response = response.replace("$or", "\"$or\"")
    response = response.replace("$eq", "\"$eq\"")
    response = response.replace("$ne", "\"$ne\"")
    response = response.replace("$gt", "\"$gt\"")
    response = response.replace("$gte", "\"$gte\"")
    response = response.replace("$lt", "\"$lt\"")
    response = response.replace("$lte", "\"$lte\"")
    response = response.replace("$in", "\"$in\"")
    response = response.replace("$nin", "\"$nin\"")
    response = response.replace("$exists", "\"$exists\"")
    response = response.replace("moods", "\"moods\"")
    response = response.replace("instruments", "\"instruments\"")
    response = response.replace("genres", "\"genres\"")
    return response

In [85]:
mongoquery_template_prefix = """
<|im_start|>system
I have a mongoDB table containing the following columns: 
{\"moods\": [],\"instruments\": [],\"genres\": []}
Write an MQL query based on the user input below:

Answer in a concise MQL query format
user input: 
"""
template_sufix = "<|im_end|>\n<|im_start|>assistant"

In [86]:
response = call_openai_mql_response(mongoquery_template_prefix, "I would like to find music for guitar or piano, for a happy mood")
print(songsCollection.find_one(json.loads(response)))

{'_id': ObjectId('64b225762a1a1cc5b121d46b'), 'track_id': 'track_0021492', 'artist_id': 'artist_003177', 'album_id': 'album_003005', 'path': '92/21492.mp3', 'duration': 80.0, 'moods': ['funny', 'happy', 'positive', 'relaxing'], 'instruments': ['guitar'], 'genres': ['alternative', 'electronic', 'experimental']}
