# 1. Packages

In [38]:
import os 
from getpass import getpass 
import sys

from langchain_mistralai import ChatMistralAI, MistralAIEmbeddings

from operator import itemgetter
from typing import Dict, List

from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables import Runnable, RunnableParallel, RunnableLambda
from langchain_core.output_parsers import StrOutputParser
from langchain_core.vectorstores import VectorStore
from langchain_pinecone import PineconeVectorStore
from langchain_core.documents import Document
from langchain_core.runnables import chain
from langchain_core.messages import AIMessage, HumanMessage
from pinecone import Pinecone

from pinecone.data.index import Index
from dotenv import load_dotenv

# 2. Setup

In [39]:
load_dotenv(dotenv_path="../../.env")

True

In [40]:
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ["LANGCHAIN_ENDPOINT"] ="https://api.smith.langchain.com"
os.environ["LANGCHAIN_PROJECT"]="kn-eval"

In [41]:
MISTRAL_API_KEY = os.environ["MISTRAL_API_KEY"]

In [42]:
mistral = ChatMistralAI(model="open-mistral-nemo", api_key=MISTRAL_API_KEY)
embeddings = MistralAIEmbeddings(model="mistral-embed", api_key=MISTRAL_API_KEY)



# 3. KN Setup

In [43]:
# Add knoledgenest path to allow for imports in the notebook
current_dir = os.getcwd()

# Add the parent directory of knowledgenest to sys.path
sys.path.append(os.path.abspath(os.path.join(current_dir, '..', '..')))

from knowledgenest.vector_database import init_pinecone, EMBEDDING_MODEL_DIM, SIM_METRIC
from knowledgenest.chat.utils import KNRag

In [44]:
eval_idx_name = "knowledgenest-eval"
PINECONE_API_KEY = os.environ["PINECONE_API_KEY"]

In [45]:
idx = init_pinecone(PINECONE_API_KEY, eval_idx_name, EMBEDDING_MODEL_DIM, SIM_METRIC)

### Ingest evaluation sources for RAG if necessary

In [46]:
from utils import embed_and_ingest_article
from utils import embed_and_ingest_video

In [47]:
vector_count = idx.describe_index_stats()["total_vector_count"]
is_empty = vector_count == 0

In [48]:
articles_sources_links = [
    "https://paulgraham.com/foundermode.html",
    "https://www.paulgraham.com/persistence.html",
    "https://www.paulgraham.com/reddits.html",
    "https://www.paulgraham.com/google.html",
    "https://www.hopsworks.ai/post/mlops-to-ml-systems-with-fti-pipelines",
    "https://www.palladiummag.com/2024/08/30/when-the-mismanagerial-class-destroys-great-companies/",
]

# Collection of short news videos to which the LLM could have had access to when trained. 
video_sources_links = [
    "https://www.youtube.com/watch?v=8QLVX9A7hqI",
    "https://www.youtube.com/watch?v=TNc14W8YOuI",
    "https://www.youtube.com/watch?v=sic0OJyyeZ0",
    "https://www.youtube.com/watch?v=2HGWuflXCUY",
    "https://www.youtube.com/watch?v=EDgD7NMY60U",
    "https://www.youtube.com/watch?v=GUr2AA6ljeU",
    "https://www.youtube.com/watch?v=DUPH2n3g5bg",
    "https://www.youtube.com/watch?v=0kOu4GLZRo0",
    "https://www.youtube.com/watch?v=SsH23u6XiGY",
    "https://www.youtube.com/watch?v=rvu8N6bA3PI"
]

In [49]:
if is_empty:
    # ingest evaluation sources

    # TODO put that in ThreadExecutor
    for article_url in articles_sources_links:
        embed_and_ingest_article(article_url, idx)
    for video_url in video_sources_links:
        embed_and_ingest_video(video_url, idx)

In [50]:
#from concurrent.futures import ThreadPoolExecutor
#
#if is_empty:
#    # Embed articles
#    with ThreadPoolExecutor(max_workers=5) as executor:
#        futures = [executor.submit(embed_and_ingest_article, url, idx) for url in articles_sources_links]
#        executor.shutdown(wait=True)
#
#    # Embed videos
#    with ThreadPoolExecutor(max_workers=5) as executor:
#        futures = [executor.submit(embed_and_ingest_video, url, idx) for url in video_sources_links]
#        executor.shutdown(wait=True)
#

# 4. Evals

### A. Create Dataset

In [51]:
from langsmith import Client
from langsmith.evaluation import evaluate

In [52]:
client = Client()
base_dataset_name = "kn-eval-qa"

In [53]:
# Test QA
inputs = [
    # First article
    "What is founder mode ?",

    # Second Article
    "What are the main behaviours that set apart persistent from obstinate people ?",

    # Third Article
    "Why did YC did not fund the first idea of Reddit's founders ?",

    # Fourth article
    "What should young people do if they want to start a startup ?",

    # Fifth article
    "What are the responsabilities of the three main components of machine learning systems?",

    # Sixth article
    "Explain the concept of 'portfolio theory of the firm' ?",

    # First video 
    "What are the main threats to the Schenghen Area ?",
    "Which countries are supporting the most the reestablishment of EU national border controls ?",

    # Second video
    "According to Draghi's report, what are the main explanations for Europe's productivity slow down ?",
    "Give an overview of Draghi's proposed plan to revitalize Europe's economy",

    # Third video
    "On which grounds did president Macron choose its new prime minister ?",

    # Fourth video
    "Why was Spain traditionnaly migrant-friendly ?",

    # Fifth video
    "What are the main driving forces behind the surge of the AFD ?",

    # Sixth video
    "What are the measures that Orban took to revive Hungary's birth rate ?",

    # Seventh video
    "Explain what prevented Belgium to form a goverment ?",

    # Heigth video (can also include information from third video)
    "Why did'nt president Macron nominate a leftwing prime minister",

    # Ninth video
    "What recent events provoked tensions between Germany and Poland ?"
]

outputs = [
    # First Article
    ("According to Paul Graham, founder mode is how company should be ran when they are still led by"
     "its founders, as opposed as when it's led by professional managers which is the 'manager mode'."
     "Founder mode is not very well known, not teached in business school but we know it differs from"
     "manager mode as lots of founders have tried to mimic manager mode without success, as opposed"
     "to founders acting differently and achieving great success (as is the case with Steve Jobs)"),

    # Second Article
    ("Persistent people keep listening to others and trying new things and they tend to be more"
     "focused on the most important things, the overall picture and goal and not too muched"
     "attached to details"),

    # Third Article
    ("YC did not find the first idea of Steve and Alexis - Reddit's founders - because they thought"
     "the idea was bad and they were still focusing on funding idea at this time."),

     # Fourth Article
     ("There are three main things that young people should do in order to optimize their chances"
      "of founding a successfull startup: learning a technology, follow their interests and build"
      "projects"),

     # Fifth Article
     ("The three main parts of efficient machine learning systems are the feature pipeline, the training"
      "pipeline and the inference pipeline. The feature pipeline computes and updates features from the"
      "data sources, the training pipelines regularly train machine learning systems and version their"
      "weights, and the inference pipeline is responsible for answering to client requests"),

     # Sixth Article
     ("The portofolio of the firm is the phenomenon by which companies are not treated as human organizations"
      "but only as a package of financial products which parts can be traded or new parts can be added in order"
      "to maximize the financial figures of the balance sheet"),

    # First video
    "The main threats to the Schenghen Area is the reestablishment of national bordel control aiming to fight illegal immigration and cross-borer crime",
    ("The main countries supporting the reestablishment of national border controls are Germany, Poland, Hungary and Denmark."
     "Other countries like France could follow suit as well"),

    # Second Video
    "According to Draghi, demographic decline, global markets fragmentation and industrial stagnation accounts for Europe's economical slow down",
    ("Draghi's proposes three main transformations to revive Europe's economy : "
     "Invest in new technologies in software and AI",
     "Invest in decarbonizing the economy",
     "Diversify supply chains and deepen its internal integration"),
    
    # Third Video
    ("Because Barnier is very experienced, especially when it comes to EU politics",
     "and because the national rally did not oppose him as strongly as others."),

    # Fourth video
    ("Because of the legacy of Franco's dictatorship that brought skepticism towards"
     "nationalism, because Spain's welfare state offer very little to immigrants and"
     "because the Spanish press has been sober on the subject"),

    # Fifth video
    ("The Afd has capitalized on the increasing anti-immigrant sentiment that followed"
     "the 2010's migration, economic slow down, war in Ukraine and anti-green sentiments"),

    # Sixth video
    ("Family tax reduction, lifetime tax exemption for women with more"
     "than four children, loans that don't have to be repaid for couple with children"
     "and housing subsidies."),

    # Seventh video
    ("Because the different parties could not get along when it comes to fiscal policies"
    "and because there were lack of trust between the different parties."),

    
    # Heigth
    ("Because a left government would be very influenced by Melenchon's party"
     "and because it would lack a sustainable majority in Parliament"),
    
    # Ninth video
    ("The two main issues are the destruction of the Nord Stream pipeline"
     "for which Poland is suspected to have played a part and the waning German"
     "support for the war in Ukraine")
]

In [54]:
dataset_name = base_dataset_name + "-v0"
datasets = client.list_datasets(dataset_name=dataset_name)
try:
    next(datasets) # dataset already exists
    print(f"Dataset {dataset_name} already exists")
    pass
except StopIteration:
    # dataset does not exist
    dataset = client.create_dataset(
        dataset_name=dataset_name,
        description="Input question of RAG KN",
    )
    client.create_examples(
        inputs=[{"question": q} for q in inputs],
        outputs=[{"answer": a} for a in outputs],
        dataset_id=dataset.id,
    )

### B. Evaluate RAG pipeline

In [55]:
# We test the pipeline with mistral
provider = "mistral" 

In [56]:
from langsmith import trace

def predict_rag_answer(example: dict):
    """Use this for answer evaluation"""
    kn_rag =  KNRag(provider=provider, pc_idx=idx)
    message = example["question"]
    with trace("KNRag", inputs={"message": message}):
        response = kn_rag.answer(dict(messages=[HumanMessage(message)]), stream=False)
    return {"answer": response["output"]}

In [57]:
from langsmith.evaluation import evaluate, LangChainStringEvaluator

qa_evalulator = [LangChainStringEvaluator("cot_qa")]

test_results = evaluate(
    predict_rag_answer,
    data=dataset_name,
    evaluators=qa_evalulator,
    experiment_prefix="MistralNemoKNRag",
    num_repetitions=1,
)

                seed was transferred to model_kwargs.
                Please confirm that seed is what you intended.


View the evaluation results for experiment: 'MistralNemoKNRag-5675a162' at:
https://smith.langchain.com/o/a700c4b6-5caf-57dc-a929-900e043ce283/datasets/b0dac92b-d193-4c82-8fa9-5665cc078be0/compare?selectedSessions=ffccee93-3d7e-4a36-acf3-42dea73a92fc




Error running target function: Error response 429 while fetching https://api.mistral.ai/v1/chat/completions: {"message":"Requests rate limit exceeded"}
An error occurred with MistralAI: 'data'
Error running target function: 'data'
An error occurred with MistralAI: 'data'
Error running target function: 'data'
17it [00:22,  1.32s/it]


### C. CleanUp indexes (optional)

In [25]:
from utils import delete_index

delete_index(PINECONE_API_KEY, eval_idx_name)