In [None]:
# SetList api saved to file
%reload_ext autoreload
%autoreload 2

from setlist_app import SetListApp

app = SetListApp("tmG3-KHsciHD1mS5y58b1FIv5NMPccWTKN8E")

for iter in range(10, 11): 
    r = await app.get_setList(artistName="Taylor Swift", p=iter)
    data = await r.text()
    fileName = "./data/get_data" + str(iter) + ".json"
    with open(fileName, "w") as f:
        # Write the JSON data to the file
        f.write(data)
#print(f"{await r.text()}")


In [None]:
# Print json generated
import json
from pathlib import Path
from pprint import pprint 

file_path='./data/get_data1.json'
data = json.loads(Path(file_path).read_text())
pprint(data)

In [1]:
# Create dcoument loader(s)

from langchain.document_loaders import JSONLoader
from llama_index import Document
from pprint import pprint 

def metadata_func(record: dict, metadata: dict) -> dict:
    metadata["eventDate"] = record["eventDate"]
    metadata["venue"] = record["venue"].get("name")
    metadata["tour"] = record["tour"].get("name")
    songs = []
    
    for e in record["sets"].get("set"):
        for s in e.get("song"):
            if (s.get("name")):
                songs.append(s.get("name"))
    
    metadata["songs"] = ", ".join(songs)       
    return metadata

loader = JSONLoader(file_path='./data/get_data1.json', 
                    jq_schema='.setlist[]',
                    content_key='id',
                    text_content=False,
                    metadata_func=metadata_func)
lcDocuments = loader.load()
#pprint(lcDocuments)

lDocs = []
for doc in lcDocuments:
    lDocs.append(Document.from_langchain_format(doc))

pprint(lDocs)    

[Document(text='13a6b159',
          doc_id='3d6e4928-a208-4f9f-93a5-e20cc42c2d27',
          embedding=None,
          doc_hash='b3500390fff3d5b4f12cff4a6032fe477489767bb45bc9c7e13b64c3f78a21a5',
          extra_info={'eventDate': '04-06-2023',
                      'seq_num': 1,
                      'songs': 'Miss Americana & the Heartbreak Prince, Cruel '
                               'Summer, The Man, You Need to Calm Down, Lover, '
                               'The Archer, Fearless, You Belong With Me, Love '
                               "Story, 'tis the damn season, willow, marjorie, "
                               'champagne problems, tolerate it, ...Ready for '
                               "It?, Delicate, Don't Blame Me, Look What You "
                               'Made Me Do, Enchanted, Red - Intro, 22, We Are '
                               'Never Ever Getting Back Together, I Knew You '
                               'Were Trouble, All Too Well (10 Minute '
    

In [None]:
# Create index using loader
%reload_ext autoreload
%autoreload 2

from langchain.agents import initialize_agent, Tool
#from langchain.chains.conversation.memory import ConversationBufferMemory
#from langchain.indexes import VectorstoreIndexCreator
from langchain.llms import GPT4All
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
#from langchain.cache import SQLiteCache
from langchain import PromptTemplate
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
import logging, sys, certifi, os

from llama_index.node_parser import SimpleNodeParser
from llama_index.embeddings.langchain import LangchainEmbedding
from llama_index.langchain_helpers.text_splitter import TokenTextSplitter
from dotenv import load_dotenv

#load_dotenv()
#os.environ['REQUESTS_CA_BUNDLE'] = os.path.join(os.path.dirname(sys.argv[0]), certifi.where())

from llama_index import (
    GPTVectorStoreIndex,
    LLMPredictor,
    PromptHelper,
    StorageContext,
    ServiceContext
)

logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

# langchain.llm_cache = SQLiteCache(database_path=".langchain.db")

tools = [
    Tool(
        name="Swifty chat bot",
        func=lambda q: index.query(q),
        description=f"Useful when you want answer questions about the set list Documents."
    )
]
model_path = '../models/ggml-gpt4all-j-v1.3-groovy.bin'
template = """Question: {question}

Answer: Let's think step by step."""

prompt = PromptTemplate(template=template, input_variables=["question"])

llm = GPT4All(model=model_path, backend='gptj', callbacks=[StreamingStdOutCallbackHandler()], streaming=True, n_ctx=512, n_threads=8, verbose=True)
gpt4all_lm_predictor = LLMPredictor(llm=llm)
embed_model = LangchainEmbedding(HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2"))
#prompt_helper = PromptHelper(max_input_size=512, num_output=256, max_chunk_overlap=-1000)
service_context = ServiceContext.from_defaults(
    llm_predictor=gpt4all_lm_predictor,
    embed_model=embed_model,
    node_parser=SimpleNodeParser(text_splitter=TokenTextSplitter(chunk_size=300, chunk_overlap=20))
)
storage_context = StorageContext.from_defaults(persist_dir="./storage")
index = GPTVectorStoreIndex.from_documents(lDocs, storage_context=storage_context, service_context=service_context)
index.storage_context.persist(persist_dir="./storage")
query_engine = index.as_query_engine(streaming=True, similarity_top_k=1, service_context=service_context)

#
#agent_chain = initialize_agent(
#    tools, 
#    llm, 
#    agent="zero-shot-react-description", 
#    memory=ConversationBufferMemory(memory_key="chat_history")
#)

response_stream = query_engine.query("How many times was 'Cruel Summer' played in a set list?")
response_stream.print_response_stream()

In [None]:
from langchain.llms import GPT4All
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain import PromptTemplate
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
import logging, sys

from llama_index.node_parser import SimpleNodeParser
from llama_index.embeddings.langchain import LangchainEmbedding
from llama_index.langchain_helpers.text_splitter import TokenTextSplitter
from langchain.cache import SQLiteCache
import langchain
from llama_index import (
    GPTVectorStoreIndex,
    load_index_from_storage,
    LLMPredictor,
    PromptHelper,
    StorageContext,
    ServiceContext
)

logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

langchain.llm_cache = SQLiteCache(database_path=".langchain.db")

model_path = '../models/ggml-gpt4all-j-v1.3-groovy.bin'
gpt4all_lm_predictor = LLMPredictor(
    llm=GPT4All(model=model_path, backend='gptj', callbacks=[StreamingStdOutCallbackHandler()], streaming=True, n_ctx=512, n_threads=8, verbose=True))
service_context = ServiceContext.from_defaults(
    llm_predictor=gpt4all_lm_predictor,
    embed_model=LangchainEmbedding(HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")),
    node_parser=SimpleNodeParser(text_splitter=TokenTextSplitter(chunk_size=300, chunk_overlap=20))
)
storage_context = StorageContext.from_defaults(persist_dir="./storage")
index = load_index_from_storage(storage_context, "6eda28f7-f7da-4ce8-aaa6-5514d4668d5f", service_context=service_context) 
query_engine = index.as_query_engine(streaming=True, similarity_top_k=1, service_context=service_context)
response_stream = query_engine.query("How many times was the song 'Cruel Summer' played on the Eras Tour?")
response_stream.print_response_stream()