In [None]:
# Import transformer classes for generaiton
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
# Import torch for datatype attributes 
import torch
import os
auth_token = os.getenv("AUTH_TOKEN")

if torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    print ("There might be a problem!")
    
# Define variable to hold llama2 weights naming 
name = "meta-llama/Llama-2-7b-chat-hf"

In [None]:
# Create tokenizer
tokenizer = AutoTokenizer.from_pretrained(name, 
    cache_dir='./model/', return_token_type_ids=False)

In [None]:
# Create model
model = AutoModelForCausalLM.from_pretrained(name, 
    cache_dir='./model/',  torch_dtype=torch.float16, 
    rope_scaling={"type": "dynamic", "factor": 2}, load_in_8bit=False).to(device)

In [None]:
# As a test, generate a sentence

# Setup a prompt 
prompt = "### User:Which pizza toppings are generally not allowed by Italians? \
          ### Assistant:"
          
# Pass the prompt to the tokenizer
inputs = tokenizer(prompt, return_tensors="pt").to(device)

# Setup the text streamer 
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

# Actually run the thing
output = model.generate(**inputs, streamer=streamer, 
                        use_cache=True, max_new_tokens=200)

In [None]:
# Import the prompt wrapper...but for llama index
from llama_index.prompts.prompts import SimpleInputPrompt
# Create a system prompt 
system_prompt = """[INST] <>
You are a helpful, respectful and honest assistant. Always answer as 
helpfully as possible, while being safe. Your answers should not include
any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content.
Please ensure that your responses are socially unbiased and positive in nature.

If a question does not make any sense, or is not factually coherent, explain 
why instead of answering something not correct. If you don't know the answer 
to a question, please don't share false information.

Your goal is to provide answers relating to a set of research papers.<>
"""
# Throw together the query wrapper
query_wrapper_prompt = SimpleInputPrompt("{query_str} [/INST]",skip_on_failure=True)

In [None]:
# Complete the query prompt
query_wrapper_prompt.format(query_str='hello')

In [None]:
# Import the llama index HF Wrapper
from llama_index.llms import HuggingFaceLLM
# Create a HF LLM using the llama index wrapper 
llm = HuggingFaceLLM(context_window=4096,
                    max_new_tokens=256,
                    system_prompt=system_prompt,
                    query_wrapper_prompt=query_wrapper_prompt,
                    model=model,
                    tokenizer=tokenizer)

In [None]:
# Bring in embeddings wrapper
from llama_index.embeddings import LangchainEmbedding
# Bring in HF embeddings - need these to represent document chunks
from langchain.embeddings.huggingface import HuggingFaceEmbeddings

In [None]:
# Create and dl embeddings instance  
embeddings=LangchainEmbedding(
    HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
)

In [None]:
# Bring in stuff to change service context
from llama_index import set_global_service_context
from llama_index import ServiceContext

In [None]:
# Create new service context instance
service_context = ServiceContext.from_defaults(
    chunk_size=1024,
    llm=llm,
    embed_model=embeddings
)
# And set the service context
set_global_service_context(service_context)

In [None]:
import requests
import feedparser
import os

#url = "https://connect.biorxiv.org/biorxiv_xml.php?subject=developmental_biology"

url = "http://arxiv.org/rss/cs.CL"

# Make a GET request to fetch the content of the page
response = requests.get(url)

# Parse the content as XML using feedparser
feed = feedparser.parse(response.content)

# Now, 'feed' will contain the parsed RSS feed data
# You can access elements like title, link, entries, etc.
# Example:
# print("Feed Title:", feed.feed.title)
# print("Feed Link:", feed.feed.link)

# # Access individual entries
# for entry in feed.entries:
#     print("\nTitle:", entry.title)
#     print("Link:", entry.link)
#     print("Summary:", entry.summary)

# Create the 'abstracts' folder if it doesn't exist
if not os.path.exists('abstracts'):
    os.makedirs('abstracts')

# Access individual entries and save them as separate documents
for i, entry in enumerate(feed.entries, start=1):
    title = entry.title
    summary = entry.summary

    # Create a file path for the abstract
    file_path = os.path.join('abstracts', f'abstract_{i}.txt')

    # Write the title and summary to the file
    with open(file_path, 'w', encoding='utf-8') as file:
        file.write(f'Title: {title}\n\n')
        file.write(f'Summary: {summary}\n')

print("All abstracts saved successfully.")

In [None]:
# Import deps to load documents 
from llama_index import VectorStoreIndex, SimpleDirectoryReader

# Load documents from a directory
documents = SimpleDirectoryReader('abstracts').load_data()

# Create an index from the documents
index = VectorStoreIndex.from_documents(documents)

In [None]:
# Setup index query engine using LLM 
query_engine = index.as_query_engine(
    streaming=True,similarity_top_k=5
)

In [None]:
streaming_response = query_engine.query(
    "Are there papers about knoledge graphs?"
)
streaming_response.print_response_stream()