In [1]:
import numpy as np
import faiss
import litellm
from litellm import completion
from dotenv import load_dotenv
import os
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import ArxivLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from operator import itemgetter
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

# Load API keys
load_dotenv(".env")
apikey = os.getenv('OPENAI_API_KEY')
print(apikey)

sk--lxq9ReUmWxgxJhWPPwRNg


In [2]:
import requests

# Example query to the arXiv API directly
url = "http://export.arxiv.org/api/query"
params = {
    "search_query": "all:Retrieval Augmented Generation",
    "start": 0,
    "max_results": 5
}

response = requests.get(url, params=params)

if response.status_code == 200:
    print("API Request Successful")
    print("Response:", response.text[:500])  # Print first 500 characters of the response
else:
    print("Failed to fetch data from arXiv:", response.status_code)

API Request Successful
Response: <?xml version="1.0" encoding="UTF-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
  <link href="http://arxiv.org/api/query?search_query%3Dall%3ARetrieval%20Augmented%20Generation%26id_list%3D%26start%3D0%26max_results%3D5" rel="self" type="application/atom+xml"/>
  <title type="html">ArXiv Query: search_query=all:Retrieval Augmented Generation&amp;id_list=&amp;start=0&amp;max_results=5</title>
  <id>http://arxiv.org/api/UOKvU43HASfzk48jVrFyBsmll1A</id>
  <updated>2024-05-23T00:00:00-04:00</updat


In [3]:
# Load the document pertaining to a particular topic
docs = ArxivLoader(query=""" all:"attention mechanisms" AND (all:"convolutional neural networks" OR all:"CNN") AND NOT all:"transformer" """, load_max_docs=5).load()

# Split the dpocument into smaller chunks
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=350, chunk_overlap=50
)

chunked_documents = text_splitter.split_documents(docs)

In [4]:
print(type(docs))
print(len(docs))
print(type(docs[0].page_content))

print(type(chunked_documents))
print(len(chunked_documents))
print(chunked_documents[0])




<class 'list'>
5
<class 'str'>
<class 'list'>
199
page_content='1\nPulmonary Disease Classiﬁcation Using Globally\nCorrelated Maximum Likelihood:\nan Auxiliary Attention mechanism for\nConvolutional Neural Networks\nEdward Verenich, Tobias Martin, Alvaro Velasquez, Nazar Khan, and Faraz Hussain\nAbstract—Convolutional neural networks (CNN) are now being\nwidely used for classiﬁying and detecting pulmonary abnormal-\nities in chest radiographs. Two complementary generalization\nproperties of CNNs, translation invariance and equivariance,\nare particularly useful in detecting manifested abnormalities\nassociated with pulmonary disease, regardless of their spatial\nlocations within the image. However, these properties also come\nwith the loss of exact spatial information and global relative\npositions of abnormalities detected in local regions. Global\nrelative positions of such abnormalities may help distinguish\nsimilar conditions, such as COVID-19 and viral pneumonia. In\nsuch instance

In [5]:
# Test
response = completion(
    api_key=apikey,
    base_url="https://drchat.xyz",
    model = "gpt-3.5-turbo-16k",
    custom_llm_provider="openai",
    messages = [{ "content": "What is a cat?","role": "user"}],
    temperature=0.5
)
print(response.choices[0].message.content)

A cat is a small carnivorous mammal that is often kept as a pet. It belongs to the Felidae family and is known for its furry coat, retractable claws, and sharp teeth. Cats are known for their agility, flexibility, and hunting skills. They come in various breeds and sizes, with domestic cats being the most common. Cats are popular pets due to their independent nature, companionship, and ability to catch pests.


In [6]:
user_query = input("Ask a research question!")

# Create multiple search queries
search_split_prompt = f"""
Your role is that of a researcher attempting to answer a question. Given a question from the user,
your job is to come up with an ArXiv query that searches for the exact information needed to answer the question.

You can include all syntax that involves including multiple terms, search by abstract, title, etc.

Example 1:
Given question: What are the ethical concerns associated with the use of facial recognition technology?
Your Answer: ("facial recognition technology" OR "facial recognition systems" OR "facial recognition software") AND ("ethical concerns" OR "ethical implications" OR "ethical issues")

Example 2:
Given question: What are some prominent attention mechanisms for convolutional neural networks, and how are they used in the autonomous vehicle industry?
Your Answer: all:"attention mechanisms" AND ("convolutional neural networks" OR "CNN") AND all:"attention mechanisms" AND ("autonomous vehicles" OR "self-driving cars")

Question: {user_query},

As shown above, your response should solely be an ArXiv Query, and nothing else.

"""
response = completion(
    api_key=apikey,
    base_url="https://drchat.xyz",
    model = "gpt4-1106-preview",
    custom_llm_provider="openai",
    messages = [{ "content": search_split_prompt,"role": "user"}],
    temperature=0.5
)
print("Response recieved")
print(response.choices[0].message.content)
arxiv_queries_list = response.choices[0].message.content.split("|")


# Each element contains vector stores for each search query developed by LLM
chunks_for_queries = []
for q in arxiv_queries_list:
    docs = ArxivLoader(query=q, load_max_docs=5).load()
    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=350, chunk_overlap=50
    )
    chunked_documents = text_splitter.split_documents(docs)
    chunks_for_queries.append(chunked_documents)

Response recieved
title:("TVM" OR "tensor virtual machine") OR abstract:("TVM" OR "tensor virtual machine")


In [7]:
print(type(chunks_for_queries))
print(len(chunks_for_queries[0]))

<class 'list'>
247


In [8]:
# Instantiate the Embedding Model
embeddings = OpenAIEmbeddings(model="text-embedding-3-small",openai_api_key=apikey, base_url="https://drchat.xyz")
# Create Index- Load document chunks into the vectorstore
vectorstore_list = []
for x in chunks_for_queries:
    faiss_vectorstore = FAISS.from_documents(
        documents=x,
        embedding=embeddings,
    )
    print(type(faiss_vectorstore))
    vectorstore_list.append(faiss_vectorstore)


# Create a retriver and retrieve relevant documents for each vector store
relevant_documents_list = []
for x in vectorstore_list:
    
    relevant_documents = x.similarity_search(user_query, k = 5)
    print(type(relevant_documents))
    relevant_documents_list.append(relevant_documents)

<class 'langchain_community.vectorstores.faiss.FAISS'>
<class 'list'>


In [9]:
print(relevant_documents_list)

[[Document(page_content='months to execute on tens and even hundreds of thousands of com-\npute nodes with CPUs. TVM provides an opportunity to improve\nthe performance of these dense matrix factorizations on GPUs and\nAI accelerators. In this paper, we propose a new autotuning frame-\nwork using Bayesian Optimization in ytopt [9, 10] and use the TVM\ntensor expression language to implement linear algebra kernels\nsuch as LU, Cholesky, and 3mm from PolyBench 4.2 [12]. We use\nthese scientific kernels to evaluate the effectiveness of our methods\non a GPU cluster, called Swing [8], at Argonne National Laboratory.\nIn this paper, we make the following contributions:\n• We propose a new autotuning framework for TVM-based\nscientific tensor applications using Bayesian Optimization.\n• We use TVM to implement scientific kernels such as LU,\nCholesky, and 3mm.\n• We evaluate the effectiveness of the proposed autotuning\nframework and compare its performance with AutoTVM.\nThe remainder of th

In [10]:
question_prompt = f"""
Given the following context: {relevant_documents_list}

Answer the following question: {user_query}

Only answer the question if the answer is in the context. Otherwise, say that you don't know.
"""

response = completion(
    api_key=apikey,
    base_url="https://drchat.xyz",
    model = "gpt-3.5-turbo-16k",
    custom_llm_provider="openai",
    messages = [{ "content": question_prompt,"role": "user"}],
    temperature=0.5
)

print(response.choices[0].message.content)

TVM stands for Tensor Virtual Machine. It is an open-source machine learning compiler framework designed to optimize computations across various hardware platforms. It provides an opportunity to improve the performance of dense matrix factorizations on GPUs and AI accelerators.
