In [None]:
import logging
import sys
import os

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

os.environ['NUMEXPR_MAX_THREADS'] = '4'
os.environ['NUMEXPR_NUM_THREADS'] = '2'
import numexpr as ne

In [None]:
import os
os.environ["OPENAI_API_KEY"] = "sk-xxx"
import openai
openai.api_key = "sk-xxx"

In [None]:
from pathlib import Path
from llama_index import download_loader, VectorStoreIndex, load_index_from_storage, SummaryIndex
from llama_index.storage.storage_context import StorageContext

PDFReader = download_loader("PDFReader")

loader = PDFReader()

class PodcastTitle:
    def __init__(self, name, about, file, key):
        self.name = name
        self.about = about   
        self.file = file
        self.key = key

podcast_titles = [
    PodcastTitle("10 Tools for Managing Stress and Anxiety with Huberman", "manage stress and anxiety", "behaviour/10_Tools_for_Managing_Stress_&_Anxiety_Huberman_Lab_Podcast_10.pdf", "tools_for_stress"),
    PodcastTitle("The Science of Setting and Achieving Goals with Huberman", "set and achieve goals", "behaviour/55_The_Science_of_Setting_&_Achieving_Goals_Huberman_Lab_Podcast_55.pdf", "setting_goals"),
    PodcastTitle("Dr Chris Palmer Diet and Nutrition for Mental Health with Huberman", "have healthy diet for mental health", "food/99_Dr_Chris_Palmer_Diet_&_Nutrition_for_Mental_Health_Huberman_Lab_Podcast_99.pdf", "diet_nutrition"),
]

podcast_vector_index = {}
podcast_summary_index = {}
for podcast in podcast_titles:
    try:
        storage_context = StorageContext.from_defaults(persist_dir=f"./storage/cache/{podcast.key}_vector")
        podcast_vector_index[podcast.key] = load_index_from_storage(storage_context)

        storage_context = StorageContext.from_defaults(persist_dir=f"./storage/cache/{podcast.key}_summary")
        podcast_summary_index[podcast.key] = load_index_from_storage(storage_context)
    except:
        documents = loader.load_data(file=Path(f"./assets/AndrewHuberman/{podcast.file}"))
        vector_index = VectorStoreIndex.from_documents(documents)
        podcast_vector_index[podcast.key] = vector_index
        vector_index.storage_context.persist(persist_dir=f"./storage/cache/{podcast.key}_vector")

        podcast_summary_index[podcast.key] = SummaryIndex.from_documents(documents)
        podcast_summary_index[podcast.key].storage_context.persist(persist_dir=f"./storage/cache/{podcast.key}_summary")




In [18]:
podcast_summary_index

{'tools_for_stress': <llama_index.indices.list.base.SummaryIndex at 0x1460650d0>,
 'setting_goals': <llama_index.indices.list.base.SummaryIndex at 0x1461275e0>,
 'diet_nutrition': <llama_index.indices.list.base.SummaryIndex at 0x1459080d0>}

In [20]:
from llama_index.indices.postprocessor import KeywordNodePostprocessor
from llama_index.retrievers import VectorIndexRetriever
from llama_index.query_engine import RetrieverQueryEngine
from llama_index.tools import QueryEngineTool, ToolMetadata


node_processor = KeywordNodePostprocessor(
    exclude_keywords=["supplements", "LMNT", "InsideTracker", "Helix", "ROKA", "Athletic Greens", "Thesis", "Eight Sleep"]
)

agents = {}
podcats_vector_engines = {}
podcats_summary_engines = {}

for podcast in podcast_titles:
    retriever = VectorIndexRetriever(
        index = podcast_vector_index[podcast.key],
        similarity_top_k=3,
    )

    podcats_vector_engines[podcast.key] = RetrieverQueryEngine(
        retriever = retriever,
        node_postprocessors=[node_processor]
    )

    podcats_summary_engines[podcast.key] = podcast_summary_index[podcast.key].as_query_engine()

    query_engine_tools = []
    new_tool = QueryEngineTool(
        query_engine=podcats_vector_engines[podcast.key],
        metadata = ToolMetadata(
            name=f"{podcast.key}_vector_tool",
            description=f"Useful for retrieving specific context from a podcast {podcast.name}. "
            f"Use when you need information related to {podcast.about}.",
        )

    )
    query_engine_tools.append(new_tool)

    new_tool = QueryEngineTool(
        query_engine=podcats_summary_engines[podcast.key],
        metadata = ToolMetadata(
            name=f"{podcast.key}_summary_tool",
            description=f"Useful for summary of the podcast '{podcast.name}'"
            f"Use when you need overview information about how to {podcast.about}. ",
        )

    )
    query_engine_tools.append(new_tool)

    from llama_index.agent import OpenAIAgent
    from llama_index.llms import OpenAI

    agent = OpenAIAgent.from_tools(query_engine_tools, llm=OpenAI(temperature=0, model="gpt-3.5-turbo-0613"), verbose=True)

    agents[podcast.key] = agent



In [21]:
agents

{'tools_for_stress': <llama_index.agent.openai_agent.OpenAIAgent at 0x146a932e0>,
 'setting_goals': <llama_index.agent.openai_agent.OpenAIAgent at 0x146a8ed90>,
 'diet_nutrition': <llama_index.agent.openai_agent.OpenAIAgent at 0x146a8e6a0>}

In [22]:
from llama_index.schema import IndexNode

nodes = []
for podcast in podcast_titles:
    podcast_summary = (
        f"This content contains podcast transcript: '{podcast.name}'. "
        f"Use this index if you need to lookup specific information about {podcast.about}.\n"
    )
    node = IndexNode(text = podcast_summary, index_id=podcast.key)
    nodes.append(node)

In [None]:
vector_index = VectorStoreIndex(nodes)
vector_retriever = vector_index.as_retriever(similarity_top_k=1)

In [24]:
from llama_index.retrievers import RecursiveRetriever
from llama_index.query_engine import RetrieverQueryEngine
from llama_index.response_synthesizers import get_response_synthesizer

recursive_retriever = RecursiveRetriever(
    "vector",
    retriever_dict={"vector": vector_retriever},
    query_engine_dict=agents,
    verbose=True,
)

response_syntesizer = get_response_synthesizer(
    response_mode="compact",
)

query_engine = RetrieverQueryEngine.from_args(
    recursive_retriever,
    response_synthesizer=response_syntesizer,
)


In [25]:
openai.log = "debug"

In [None]:
response = query_engine.query("Give me a summary of the podcast 'Dr Chris Palmer: Diet and Nutrition for Mental health'")

In [29]:
response

Response(response="Dr. Chris Palmer, a psychiatrist and professor at Harvard Medical School, has conducted research on the relationship between diet and mental health. He has found that providing comprehensive support and education is crucial for successful dietary interventions. This support can come in various forms, such as health and wellness coaches, dieticians, family education, or even pre-prepared meals. By offering this level of support, Dr. Palmer has achieved high compliance rates in his pilot trial. He emphasizes the importance of making the diet as accessible and manageable as possible to increase adherence. Dr. Palmer's approach has been particularly effective for patients with conditions like schizophrenia and bipolar disorder, as they experience significant symptom relief when they adhere to the prescribed diet.", source_nodes=[NodeWithScore(node=TextNode(id_='10d1e3cc-cdda-4233-ac9d-cb86ec0a1aea', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_l