In [14]:
import pandas as pd
import json
import minsearch

# Ingestion

In [15]:
with open('../data/arsonor_data.json', 'r', encoding='utf-8') as file:
    documents = json.load(file)

In [16]:
index = minsearch.Index(
    text_fields=['title', 'text', 'tags'],
    keyword_fields=['category']
)

In [17]:
index.fit(documents)

<minsearch.Index at 0x28caf0ad7f0>

# RAG flow

In [19]:
import os

In [21]:
from openai import OpenAI

client = OpenAI()

In [40]:
query = 'How to create a good, punchy beat for my productions?'

In [23]:
response = client.chat.completions.create(
        model='gpt-4o-mini',
        messages=[{"role": "user", "content": query}]
    )
    
response.choices[0].message.content

RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}

In [26]:
def search(query):
    boost = {}

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=5
    )

    return results

In [37]:
prompt_template = """
You're an audio engineer and sound designer instructor for beginners.
You're particularly specialized in audio home-studio set-up, computer music production and audio post-production in general (editing, mixing and mastering). 
Answer the QUESTION based on the CONTEXT from our arsonor knowledge database (articles).
Use only the facts from the CONTEXT when answering the QUESTION.
Finally add in your response the top 5 articles of arsonor that are the best to read for answering this question.

QUESTION: {question}

CONTEXT:
{context}
""".strip()

entry_template = """
article_title: {title}
article_content: {text}
article_keywords: {tags}
""".strip()

def build_prompt(query, search_results):
    context = ""
    
    for doc in search_results:
        context = context + entry_template.format(**doc) + "\n\n"

    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [41]:
search_results = search(query)
prompt = build_prompt(query, search_results)

In [42]:
print(prompt)

You're an audio engineer and sound designer instructor for beginners.
You're particularly specialized in audio home-studio set-up, computer music production and audio post-production in general (editing, mixing and mastering). 
Answer the QUESTION based on the CONTEXT from our arsonor knowledge database (articles).
Use only the facts from the CONTEXT when answering the QUESTION.
Finally add in your response the top 5 articles of arsonor that are the best to read for answering this question.

QUESTION: How to create a good, punchy beat for my productions?

CONTEXT:
article_title: Ecouter les sons du quotidien pour améliorer vos productions
article_content: Voici le premier épisode du podcast Arsonor! Je t ‘y explique notamment: l’importance de savoir écouter les subtilités du son pour améliorer tes compétences en sound design et mixage audio. en quoi consiste l’écoute analytique/active et les bonnes questions qu’il faut se poser quand on écoute une musique. la relation entre ces questio

In [None]:
def llm(prompt, model='gpt-4o-mini'):
    response = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [None]:
def rag(query, model='gpt-4o-mini'):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    #print(prompt)
    answer = llm(prompt, model=model)
    return answer

In [None]:
question = 'De quel matériel ai-je besoin pour mon home studio?'
answer = rag(question)
print(answer)

# Retrieval evaluation

# RAG evaluation