In [1]:
from llama_index.llms.ollama import Ollama
from llama_index.core import Settings

Settings.llm = Ollama(model="llama2", request_timeout=60.0)



In [2]:
import requests
import feedparser
import os

#url = "https://connect.biorxiv.org/biorxiv_xml.php?subject=developmental_biology"

url = "http://arxiv.org/rss/cs.CL"

# Make a GET request to fetch the content of the page
response = requests.get(url)

# Parse the content as XML using feedparser
feed = feedparser.parse(response.content)

# Create the 'abstracts' folder if it doesn't exist
if not os.path.exists('abstracts'):
    os.makedirs('abstracts')
# else, remove all files in the folder
else:
    for file in os.listdir('abstracts'):
        file_path = os.path.join('abstracts', file)
        os.remove(file_path)

# Access individual entries and save them as separate documents
for i, entry in enumerate(feed.entries, start=1):
    title = entry.title
    summary = entry.summary
    link = entry.link
    # Create a file path for the abstract
    file_path = os.path.join('abstracts', f'abstract_{i}.txt')

    # Write the title and summary to the file
    with open(file_path, 'w', encoding='utf-8') as file:
        file.write(f'Title: {title}\n\n')
        file.write(f'Link: {link}\n\n')
        file.write(f'Summary: {summary}\n')

print("All abstracts saved successfully.")

All abstracts saved successfully.


In [3]:
# Load documents from a directory
from llama_index.core import SimpleDirectoryReader,ServiceContext,VectorStoreIndex

documents = SimpleDirectoryReader('abstracts').load_data()
print("Number of Input documents:", len(documents))

Number of Input documents: 149


In [4]:
service_context = ServiceContext.from_defaults(llm=Settings.llm,embed_model="local")
index = VectorStoreIndex.from_documents(documents,service_context=service_context)
query_engine = index.as_query_engine(llm=Settings.llm)

print("Number of nodes:", len(index.docstore.docs))

  service_context = ServiceContext.from_defaults(llm=Settings.llm,embed_model="local")


Number of nodes: 149


In [5]:
# Setup index query engine using LLM 
query_engine = index.as_query_engine(
    streaming=True,similarity_top_k=50)

qq = "Are there any papers about large language modeling? If so, could you group them by topic, give each paper a novelty score (0-10) and provide me the titles, links and a short summary?"

streaming_response = query_engine.query(qq)
streaming_response.print_response_stream()


Sure! Here are the grouped papers you provided, along with their novelty scores (on a scale of 0-10):

**Sparse Logistic Regression with High-order Features for Automatic Grammar Rule Extraction from Treebanks**

* Title: Sparse Logistic Regression with High-order Features for Automatic Grammar Rule Extraction from Treebanks
* Link: <https://arxiv.org/abs/2403.17534>
* Abstract: 8/10

**Large Language Modeling for Scientific Hypothesis Testing: A Survey**

* Title: Large Language Modeling for Scientific Hypothesis Testing: A Survey
* Link: <https://arxiv.org/abs/2309.06578v3>
* Abstract: 7/10

**EthioLLM: Multilingual Large Language Models for Ethiopian Languages with Task Evaluation**

* Title: EthioLLM: Multilingual Large Language Models for Ethiopian Languages with Task Evaluation
* Link: <https://arxiv.org/abs/2403.13737>
* Abstract: 9/10

**Hyacinth6B: A Large Language Model for Traditional Chinese**

* Title: Hyacinth6B: A Large Language Model for Traditional Chinese
* Link: <htt