In [1]:
import os
import pandas as pd
import numpy as np
import requests

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.document_loaders import TextLoader

import torch

In [2]:
# Make vector store statbel_huggingface with unnormalized huggingface embedding
# Load the text document
embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
collection_name="astro_hf"

recreate_vectorstore = 0
if recreate_vectorstore:
    try:
        os.remove('chroma.sqlite3')
    except:
        pass
    vector_store = Chroma(collection_name=collection_name, 
                      embedding_function=embedding_function,
                      persist_directory=".")
    
    loader = TextLoader("input_data/astronomy.txt")
    documents = loader.load()
    
    # Split the text into chunks with LangChain's text splitter
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, 
                                                   chunk_overlap=40,
                                                   separators=["\n\n", "\n", ". ", "! ", "? "],
                                                   keep_separator=True)
    docs = text_splitter.split_documents(documents)

    # Filter chunks to ensure they meet the minimum size requirement
    min_chunk_size = 200
    filtered_chunks = [chunk for chunk in docs if len(chunk.page_content) >= min_chunk_size]
    vector_store.add_documents(filtered_chunks)

    # Check out some chunks created by RecursiveCharacterTextSplitter
    for i, chunk in enumerate(docs):
        if i < 5:
            print(f"Chunk {i + 1}:")
            print(f"Length: {len(chunk.page_content)}")
            print(chunk.page_content)
            print("-" * 50)
else:
    vector_store = Chroma(collection_name=collection_name, 
                      embedding_function=embedding_function,
                      persist_directory=".")

  from tqdm.autonotebook import tqdm, trange


In [3]:
query = "What is astronomy?"
results = vector_store.similarity_search(query, k=2)

# Combine the retrieved documents to use as context
context = "\n\n".join(result.page_content.strip() for result in results)

for i, result in enumerate(results):
    if i < 5:
        print(result.page_content)
        print("-" * 50)

Astronomy is a natural science that studies celestial objects and the phenomena that occur in the cosmos. It uses mathematics, physics, and chemistry in order to explain their origin and their overall evolution. Objects of interest include planets, moons, stars, nebulae, galaxies, meteoroids, asteroids, and comets. Relevant phenomena include supernova explosions, gamma ray bursts, quasars, blazars, pulsars, and cosmic microwave background radiation. More generally, astronomy studies everything that originates beyond Earth's atmosphere. Cosmology is a branch of astronomy that studies the universe as a whole.
--------------------------------------------------
Astronomy is one of the oldest natural sciences. The early civilizations in recorded history made methodical observations of the night sky. These include the Egyptians, Babylonians, Greeks, Indians, Chinese, Maya, and many ancient indigenous peoples of the Americas. In the past, astronomy included disciplines as diverse as astrometr

In [6]:
# Query the llm
query_w_context = f"""Answer the question based only on the following context: {context}

Question: {query}"""

# Function to query local flask server llm (mistral-7b-orca)
def query_llm(prompt):
    url = "http://192.168.0.23:8000/generate"
    response = requests.post(url, json={"prompt": prompt})
    data = response.json()
    return data

response = query_llm(query_w_context)
print(query_w_context)

Answer the question based only on the following context: Astronomy is a natural science that studies celestial objects and the phenomena that occur in the cosmos. It uses mathematics, physics, and chemistry in order to explain their origin and their overall evolution. Objects of interest include planets, moons, stars, nebulae, galaxies, meteoroids, asteroids, and comets. Relevant phenomena include supernova explosions, gamma ray bursts, quasars, blazars, pulsars, and cosmic microwave background radiation. More generally, astronomy studies everything that originates beyond Earth's atmosphere. Cosmology is a branch of astronomy that studies the universe as a whole.

Astronomy is one of the oldest natural sciences. The early civilizations in recorded history made methodical observations of the night sky. These include the Egyptians, Babylonians, Greeks, Indians, Chinese, Maya, and many ancient indigenous peoples of the Americas. In the past, astronomy included disciplines as diverse as as

In [7]:
processing_type = response["device"]
answer = response["response"]['choices'][0]['text'].strip()

output = f'''
Generated on {processing_type}.

Response: {answer}

'''

print(output)


Generated on cuda.

Response: Answer: Astronomy is a natural science that studies celestial objects and the phenomena that occur in the cosmos.


