In [1]:
import os
import shutil
import pandas as pd
import numpy as np
import requests

from langchain_chroma import Chroma
from langchain_community.document_loaders import DataFrameLoader
from langchain_huggingface import HuggingFaceEmbeddings
import torch

In [2]:
df = pd.read_csv('input_data/soc_sample.csv', header=0)
df = df[df.iloc[:, 0].str.startswith('in the city of Halle')]

In [3]:
# Make vector store statbel with unnormalized huggingface embedding
embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
collection_name="statbel"

recreate_vectorstore = 0
if recreate_vectorstore:
    try:
        os.remove('chroma.sqlite3')
    except:
        pass
        
    vector_store = Chroma(collection_name=collection_name, 
                      embedding_function=embedding_function,
                      persist_directory=".")
    
    loader = DataFrameLoader(df, page_content_column="text")
    docs = loader.load()
    
    vector_store.add_documents(docs)
else:
    vector_store = Chroma(collection_name=collection_name, 
                      embedding_function=embedding_function,
                      persist_directory=".")

  from tqdm.autonotebook import tqdm, trange


In [4]:
# Start asking some questions ..
query = "in the city of Halle how many males of age 65 which are born in bel and married?"

results = vector_store.similarity_search(query, k=10)

# Combine the retrieved documents to use as context
context = "\n".join(result.page_content.strip() for result in results)


for i, result in enumerate(results):
    print(result.page_content)

in the city of Halle the number of males born in bel married and age 85 is 57
in the city of Halle the number of males born in bel married and age 84 is 57
in the city of Halle the number of males born in bel married and age 35 is 77
in the city of Halle the number of males born in bel married and age 82 is 50
in the city of Halle the number of males born in bel married and age 81 is 75
in the city of Halle the number of males born in bel married and age 83 is 59
in the city of Halle the number of males born in bel married and age 65 is 120
in the city of Halle the number of males born in bel married and age 77 is 95
in the city of Halle the number of males born in bel married and age 38 is 95
in the city of Halle the number of males born in bel married and age 86 is 46


In [5]:
# Query the llm
query_w_context = f"""Answer the question based only on the following context: {context}

Question: {query}"""

# Function to query local flask server llm (mistral-7b-orca)
def query_llm(prompt):
    url = "http://192.168.0.23:8000/generate"
    response = requests.post(url, json={"prompt": prompt})
    data = response.json()
    return data

response = query_llm(query_w_context)
print(query_w_context)

Answer the question based only on the following context: in the city of Halle the number of males born in bel married and age 85 is 57
in the city of Halle the number of males born in bel married and age 84 is 57
in the city of Halle the number of males born in bel married and age 35 is 77
in the city of Halle the number of males born in bel married and age 82 is 50
in the city of Halle the number of males born in bel married and age 81 is 75
in the city of Halle the number of males born in bel married and age 83 is 59
in the city of Halle the number of males born in bel married and age 65 is 120
in the city of Halle the number of males born in bel married and age 77 is 95
in the city of Halle the number of males born in bel married and age 38 is 95
in the city of Halle the number of males born in bel married and age 86 is 46

Question: in the city of Halle how many males of age 65 which are born in bel and married?


In [6]:
processing_type = response["device"]
answer = response["response"]['choices'][0]['text'].strip()

output = f'''
Generated on {processing_type}.

Response: {answer}

'''

print(output)


Generated on cuda.

Response: Based on the context, there are 120 males born in Halle who are males born in bel married and age 65.


