In [34]:
%pip install llama-index transformers torch

Note: you may need to restart the kernel to use updated packages.


In [35]:
from llama_index.embeddings import HuggingFaceEmbedding

# loads BAAI/bge-small-en-v1.5
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

In [36]:
from llama_index.llms import Ollama
from llama_index import SimpleDirectoryReader, ServiceContext, KeywordTableIndex

In [1]:
llm = Ollama(model="llama2", temperature=0)

NameError: name 'Ollama' is not defined

In [38]:
docs = SimpleDirectoryReader("data").load_data()

In [39]:
service_cxt = ServiceContext.from_defaults(llm=llm, embed_model=embed_model,
                                           chunk_size=512)

In [40]:
index = KeywordTableIndex.from_documents(docs, service_context=service_cxt)

In [41]:
query_engine = index.as_query_engine()

In [42]:
# response = query_engine.query("summarize in a sentence the contribution of streaming llm")

In [43]:
# print(response)

In [44]:
import pandas as pd

In [45]:
df = pd.read_csv("qna.csv")

In [46]:
df.head()

Unnamed: 0,Question #,Question Prompt,Answer Choices,Correct Answer Choice
0,1.1,Archaea are the third domain of life and have ...,Choice 1 of 2:prokaryotes\n\nChoice 2 of 2:euk...,1
1,1.2,"Archaea contain histone proteins, a feature co...",Choice 1 of 2:prokaryotes\n\nChoice 2 of 2:euk...,2
2,1.3,"Archaea contain their genome in the cytoplasm,...",Choice 1 of 2:prokaryotes\n\nChoice 2 of 2:euk...,1
3,1.4,Which of the following statements about telome...,Choice 1 of 4:Telomerase extends the 5’ ends o...,"2, 3"
4,1.5,Which of the following features apply to proka...,Choice 1 of 5:occurs in the cytoplasm\nChoice ...,"1, 4"


In [54]:
question = df.iloc[3, 1]
choices = df.iloc[3, 2]
qna = f"Give the correct answer choice number to the following question\n{question}\n {choices}"
response = query_engine.query(qna)
print(response)

 Choice 1 of 4: Telomerase extends the 3' ends of DNA. (Correction: The question states that telomerase extends the 5' ends, but it actually extends the 3' ends.)
Choice 3 of 4: Telomeres serve as an important buffer between genes and chromosomal ends, preventing loss of genetic information.

The context provided does not directly relate to the given question about telomeres. However, I will provide some additional information that may be helpful in understanding epigenetics and its relation to gene expression.

Epigenetics refers to changes in gene expression that occur without modifications to the underlying DNA sequence. Two primary mechanisms for epigenetic regulation are DNA methylation and histone modification.

1. DNA methylation: In the context of telomeres, telomeric DNA is usually hypomethylated compared to other regions of the genome. However, in some cases, telomeric DNA can become hypermethylated, leading to silencing of telomerase expression and shorter telomeres. This ma

In [57]:
from llama_index.evaluation import CorrectnessEvaluator, FaithfulnessEvaluator, RelevancyEvaluator

In [58]:
correctness_evaluator = CorrectnessEvaluator(service_context=service_cxt)
faithfulness_evaluator = FaithfulnessEvaluator(service_context=service_cxt)
relevancy_evaluator = RelevancyEvaluator(service_context=service_cxt)

In [49]:
for i in range(1):# range(len(df)):
    question = df.iloc[i, 1]
    choices = df.iloc[i, 2]
    qna = f"Give the correct answer choice number to the following question\n{question}\n {choices}"
    response = query_engine.query(qna)

    # correctness evaluation
    correct_answer = df.iloc[i, 3]
    correctness_eval = correctness_evaluator.evaluate(query=qna, response=response,
                                                      reference=correct_answer)
    correctness_score = correctness_eval.score
    # faithfulness evaluation
    faithfulness_eval = faithfulness_evaluator.evaluate_response(query=qna, response=response)
    faithfulness_flag = faithfulness_eval.passing

    # relevancy evaluation
    relevancy_eval = relevancy_evaluator.evaluate_response(query=qna, response=response)
    relevancy_flag = relevancy_eval.passing

    df = pd.DataFrame({"question": question, "choices": choices, "qna": qna, "response": response,
                          "correctness_score": correctness_score, "faithfulness_flag": faithfulness_flag,
                          "relevancy_flag": relevancy_flag}, index=[0])
    df.to_csv("results.csv", mode='a', header=False)

    print(response)