In [None]:
import os
hf_token = "HUGGINGFACE TOKEN HERE"
os.environ["HUGGINGFACEHUB_API_TOKEN"]=hf_token
os.environ["HF_TOKEN"]=hf_token
os.environ['HF_HOME'] = 'YOUR_HOME_DIR/.cache/huggingface/'
os.environ['TRANSFOMERS_CACHE'] = 'YOUR_HOME_DIR/.cache/huggingface/'
import transformers
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings
import openai

In [None]:
EMBED_MODEL = HuggingFaceEmbedding(model_name="abhinand/MedEmbed-large-v0.1")

Settings.embed_model = EMBED_MODEL
embed_dim = len(EMBED_MODEL.get_text_embedding("hi"))

In [None]:
## LOAD VECTORE INDEX FROM DISK (NOT WORKING PROPERLY)

from llama_index.core import StorageContext, VectorStoreIndex, load_index_from_storage
from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.core.storage.index_store import SimpleIndexStore
from llama_index.core.graph_stores import SimpleGraphStore
from llama_index.core.node_parser import MarkdownNodeParser
from llama_index.readers.docling import DoclingReader
from llama_index.vector_stores.milvus import MilvusVectorStore
from docling.document_converter import DocumentConverter
from llama_index.node_parser.docling import DoclingNodeParser
from docling.chunking import HybridChunker
from pathlib import Path

persist_dir = "YOUR_HOME_DIR/datasets/persistent_vector_store"

vector_store = MilvusVectorStore(
    uri=str(Path("YOUR_HOME_DIR/datasets/docling_md_vectordb.db")),
    dim=embed_dim,
    overwrite=False,
)

storage_context = StorageContext.from_defaults(
    vector_store=vector_store,
    docstore=SimpleDocumentStore.from_persist_dir(persist_dir=persist_dir),
    graph_store=SimpleGraphStore.from_persist_dir(persist_dir=persist_dir),
    index_store=SimpleIndexStore.from_persist_dir(persist_dir=persist_dir),
)

index = load_index_from_storage(storage_context)

In [None]:
import requests
import json

retriever = index.as_retriever(similarity_top_k=5)

def generate_response(question, model_name="qwen-32b-COT"):
    retrieved_docs = retriever.retrieve(question)
    print(f"Retrieved {len(retrieved_docs)} docs")
    sources = [s.get_content(s.metadata) for s in retrieved_docs]
    sourcesStr = "\n\n".join(sources)
    
    QUERY = f"### Input:\n{question}\nContext:\n{sourcesStr}\n"
    
    data = {
        "model": model_name,
        "prompt": QUERY,
        "stream": True,
        "options": {"num_predict":4096}
    }
    url = "http://leinevmgpu001.mh-hannover.local:11434/api/generate"
    #for i in range(5):
    #    try:
    response = requests.post(url, json=data, timeout=120)
    con_text = ""
    for l in response.text.split("\n"):
        try:
            obj = json.loads(l)
        except:
            #print(f"Failed at '{l}'")
            continue    
        if "done" in obj:
            if obj["done"] == True:
                text = con_text
            else:
                con_text += obj["response"]
    #        break
    #    except:
    #        print(f"Errored {i}")
    return text, sources

In [None]:
modelNames = ["llama3.1:8b-instruct-fp16"]

for usedModel in modelNames:
    print(f"Generating for model {usedModel}")
    usedDataset = "deepseek_COT_raft"
    savePath = f"YOUR_HOME_DIR/datasets/evaluation/human_eval/{usedModel.replace(':', '-').replace('/', '-')}"
    
    from pyarrow.parquet import ParquetDataset
    import time
    dataset = ParquetDataset(f"YOUR_HOME_DIR/datasets/evaluation/human_eval/base.parquet")
    pds = dataset.read().to_pandas()
    
    test_cases = []
    for idx, row in pds.iterrows():
        d = row.to_dict()
        question = d["question"]
        output = d["answer"]
        try:
            generated_output, used_sources = generate_response(question, model_name=usedModel)
            case = {}
            case["input"] = question
            case["expected_output"] = output
            case["actual_output"] = generated_output
            case["retrieval_context"] = used_sources
            test_cases.append(case)
        except Exception as e:
            print("error at ", idx)
    
    print(f"Saving to {savePath}")
    
    for e in test_cases:
        for k in e.keys():
            print(k)
            print(e[k])
        print("#" * 50)
    
    from datasets import Dataset
    
    def saveDS():
        for tc in test_cases:
            yield tc
    
    tc_dataset = Dataset.from_generator(saveDS)
    tc_dataset.save_to_disk(savePath)

In [None]:
vector_store.client.close()

In [None]:
import requests

# Define the base URL for the Ollama server
hostname = "leinevmgpu001.mh-hannover.local"
url = f"http://{hostname}:11434/api/tags"  # Adjust the URL if needed based on your server setup

try:
    # Send a GET request to the server to fetch installed models
    response = requests.get(url)
    
    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        models = response.json()  # Assuming the server responds with a JSON list of models
        print("Installed models on the Ollama server:")
        print([m["name"] for m in models["models"]])
    else:
        print(f"Failed to fetch models. HTTP Status Code: {response.status_code}")
except requests.exceptions.RequestException as e:
    print(f"An error occurred: {e}")
