In [1]:
%pip install llama-index transformers torch

Note: you may need to restart the kernel to use updated packages.


In [2]:
from llama_index.embeddings import HuggingFaceEmbedding

# loads BAAI/bge-small-en-v1.5
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from llama_index.llms import Ollama
from llama_index import SimpleDirectoryReader, ServiceContext, KeywordTableIndex

In [4]:
llm = Ollama(model="llama2", temperature=0)

In [21]:
%pip install mlflow

Collecting mlflow
  Downloading mlflow-2.9.1-py3-none-any.whl.metadata (13 kB)
Collecting cloudpickle<4 (from mlflow)
  Downloading cloudpickle-3.0.0-py3-none-any.whl.metadata (7.0 kB)
Collecting databricks-cli<1,>=0.8.7 (from mlflow)
  Downloading databricks_cli-0.18.0-py2.py3-none-any.whl.metadata (4.0 kB)
Collecting entrypoints<1 (from mlflow)
  Downloading entrypoints-0.4-py3-none-any.whl (5.3 kB)
Collecting gitpython<4,>=2.1.0 (from mlflow)
  Using cached GitPython-3.1.40-py3-none-any.whl.metadata (12 kB)
Collecting protobuf<5,>=3.12.0 (from mlflow)
  Using cached protobuf-4.25.1-cp37-abi3-macosx_10_9_universal2.whl.metadata (541 bytes)
Collecting importlib-metadata!=4.7.0,<8,>=3.7.0 (from mlflow)
  Downloading importlib_metadata-7.0.0-py3-none-any.whl.metadata (4.9 kB)
Collecting sqlparse<1,>=0.4.0 (from mlflow)
  Downloading sqlparse-0.4.4-py3-none-any.whl (41 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.2/41.2 kB[0m [31m3.3 MB/s[0m eta 

In [None]:
mlflow.evaluate(model=llm, )

In [5]:
docs = SimpleDirectoryReader("data").load_data()

In [6]:
service_cxt = ServiceContext.from_defaults(llm=llm, embed_model=embed_model,
                                           chunk_size=512)

In [7]:
index = KeywordTableIndex.from_documents(docs, service_context=service_cxt)

In [8]:
query_engine = index.as_query_engine()

In [9]:
# response = query_engine.query("summarize in a sentence the contribution of streaming llm")

In [10]:
# print(response)

In [11]:
import pandas as pd

In [12]:
df = pd.read_csv("qna.csv")

In [13]:
df.head()

Unnamed: 0,Question #,Question Prompt,Answer Choices,Correct Answer Choice
0,1.1,Archaea are the third domain of life and have ...,Choice 1 of 2:prokaryotes\n\nChoice 2 of 2:euk...,1- prokaryotes
1,1.2,"Archaea contain histone proteins, a feature co...",Choice 1 of 2:prokaryotes\n\nChoice 2 of 2:euk...,2- eukaryotes
2,1.3,"Archaea contain their genome in the cytoplasm,...",Choice 1 of 2:prokaryotes\n\nChoice 2 of 2:euk...,1- prokaryotes
3,1.4,Which of the following statements about telome...,Choice 1 of 4:Telomerase extends the 5’ ends o...,2 - Since telomeres are repeats of the sequenc...
4,1.5,Which of the following features apply to proka...,Choice 1 of 5:occurs in the cytoplasm\nChoice ...,1- occurs in the cytoplasm; 4- transcribed mRN...


In [15]:
from llama_index.evaluation import CorrectnessEvaluator, FaithfulnessEvaluator, RelevancyEvaluator

In [16]:
correctness_evaluator = CorrectnessEvaluator(service_context=service_cxt)
faithfulness_evaluator = FaithfulnessEvaluator(service_context=service_cxt)
relevancy_evaluator = RelevancyEvaluator(service_context=service_cxt)

In [20]:
question = df.iloc[0, 1]
choices = df.iloc[0, 2]
qna = f"Give the correct answer choice number to the following question\n{question}\n {choices}"
response = query_engine.query(qna)

# correctness evaluation
correct_answer = df.iloc[0, 3]
correctness_eval = correctness_evaluator.evaluate(query=qna, response=response,
                                                    reference=correct_answer)
correctness_score = correctness_eval.score
print(f"Correctness score: {correctness_score}")

RuntimeError: asyncio.run() cannot be called from a running event loop

In [19]:
data = []
for i in range(len(df)):
    question = df.iloc[i, 1]
    choices = df.iloc[i, 2]
    qna = f"Give the correct answer choice number to the following question\n{question}\n {choices}"
    response = query_engine.query(qna)

    # correctness evaluation
    correct_answer = df.iloc[i, 3]
    correctness_eval = await correctness_evaluator.evaluate(query=qna, response=response,
                                                      reference=correct_answer)
    correctness_score = correctness_eval.score
    # faithfulness evaluation
    faithfulness_eval = await faithfulness_evaluator.evaluate_response(query=qna, response=response)
    faithfulness_flag = faithfulness_eval.passing

    # relevancy evaluation
    relevancy_eval = await relevancy_evaluator.evaluate_response(query=qna, response=response)
    relevancy_flag = relevancy_eval.passing

    data.append({"question": question, "choices": choices, "qna": qna, "response": response,
                          "correctness_score": correctness_score, "faithfulness_flag": faithfulness_flag,
                          "relevancy_flag": relevancy_flag}, index=[0])
df = pd.DataFrame(data)
df.to_csv("results.csv", mode='a', header=False)

RuntimeError: asyncio.run() cannot be called from a running event loop