In [1]:
import logging
import sys
import os

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

os.environ['NUMEXPR_MAX_THREADS'] = '4'
os.environ['NUMEXPR_NUM_THREADS'] = '2'
import numexpr as ne

In [2]:
import os
os.environ["OPENAI_API_KEY"] = "your key"
import openai
openai.api_key = "your key"

In [3]:
from pathlib import Path
from llama_index.core import download_loader, VectorStoreIndex, load_index_from_storage, SummaryIndex, StorageContext

PDFReader = download_loader("PDFReader")

loader = PDFReader()

class PodcastTitle:
    def __init__(self, name, about, file, key):
        self.name = name
        self.about = about   
        self.file = file
        self.key = key

podcast_titles = [
    PodcastTitle("10 Tools for Managing Stress and Anxiety with Huberman", "manage stress and anxiety", "behaviour/10_Tools_for_Managing_Stress_&_Anxiety_Huberman_Lab_Podcast_10.pdf", "tools_for_stress"),
    PodcastTitle("The Science of Setting and Achieving Goals with Huberman", "set and achieve goals", "behaviour/55_The_Science_of_Setting_&_Achieving_Goals_Huberman_Lab_Podcast_55.pdf", "setting_goals"),
    PodcastTitle("Dr Chris Palmer Diet and Nutrition for Mental Health with Huberman", "have healthy diet for mental health", "food/99_Dr_Chris_Palmer_Diet_&_Nutrition_for_Mental_Health_Huberman_Lab_Podcast_99.pdf", "diet_nutrition"),
]

podcast_vector_index = {}
podcast_summary_index = {}
for podcast in podcast_titles:
    try:
        storage_context = StorageContext.from_defaults(persist_dir=f"./storage/cache/{podcast.key}_vector")
        podcast_vector_index[podcast.key] = load_index_from_storage(storage_context)

        storage_context = StorageContext.from_defaults(persist_dir=f"./storage/cache/{podcast.key}_summary")
        podcast_summary_index[podcast.key] = load_index_from_storage(storage_context)
    except:
        documents = loader.load_data(file=Path(f"./assets/AndrewHuberman/{podcast.file}"))
        vector_index = VectorStoreIndex.from_documents(documents)
        podcast_vector_index[podcast.key] = vector_index
        vector_index.storage_context.persist(persist_dir=f"./storage/cache/{podcast.key}_vector")

        podcast_summary_index[podcast.key] = SummaryIndex.from_documents(documents)
        podcast_summary_index[podcast.key].storage_context.persist(persist_dir=f"./storage/cache/{podcast.key}_summary")




  PDFReader = download_loader("PDFReader")


INFO:llama_index.core.indices.loading:Loading all indices.
Loading all indices.
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:llama_index.core.indices.loading:Loading all indices.
Loading all indices.
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:llama_index.core.indices.loading:Loading all indices.
Loading all indices.
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


In [4]:
podcast_summary_index

{'tools_for_stress': <llama_index.core.indices.list.base.SummaryIndex at 0x1e13cfc3b50>,
 'setting_goals': <llama_index.core.indices.list.base.SummaryIndex at 0x1e141c21110>,
 'diet_nutrition': <llama_index.core.indices.list.base.SummaryIndex at 0x1e142400f50>}

In [16]:
from llama_index.core.postprocessor import KeywordNodePostprocessor
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.tools import QueryEngineTool, ToolMetadata


node_processor = KeywordNodePostprocessor(
    exclude_keywords=["supplements", "LMNT", "InsideTracker", "Helix", "ROKA", "Athletic Greens", "Thesis", "Eight Sleep"]
)

agents = {}
podcats_vector_engines = {}
podcats_summary_engines = {}

for podcast in podcast_titles:
    retriever = VectorIndexRetriever(
        index = podcast_vector_index[podcast.key],
        similarity_top_k=3,
    )

    podcats_vector_engines[podcast.key] = RetrieverQueryEngine(
        retriever = retriever,
        node_postprocessors=[node_processor]
    )

    podcats_summary_engines[podcast.key] = podcast_summary_index[podcast.key].as_query_engine()

    query_engine_tools = []
    new_tool = QueryEngineTool(
        query_engine=podcats_vector_engines[podcast.key],
        metadata = ToolMetadata(
            name=f"{podcast.key}_vector_tool",
            description=f"Useful for retrieving specific context from a podcast {podcast.name}. "
            f"Use when you need information related to {podcast.about}.",
        )

    )
    query_engine_tools.append(new_tool)

    new_tool = QueryEngineTool(
        query_engine=podcats_summary_engines[podcast.key],
        metadata = ToolMetadata(
            name=f"{podcast.key}_summary_tool",
            description=f"Useful for summary of the podcast '{podcast.name}'"
            f"Use when you need overview information about how to {podcast.about}. ",
        )

    )
    query_engine_tools.append(new_tool)

    from llama_index.agent.openai import OpenAIAgent
    from llama_index.llms.openai import OpenAI

    agent = OpenAIAgent.from_tools(query_engine_tools, llm=OpenAI(temperature=0, model="gpt-3.5-turbo"), verbose=True)

    agents[podcast.key] = agent



In [17]:
agents

{'tools_for_stress': <llama_index.agent.openai.base.OpenAIAgent at 0x1e1461e1150>,
 'setting_goals': <llama_index.agent.openai.base.OpenAIAgent at 0x1e148583b50>,
 'diet_nutrition': <llama_index.agent.openai.base.OpenAIAgent at 0x1e1485bd4d0>}

In [18]:
from llama_index.core.schema import IndexNode

nodes = []
for podcast in podcast_titles:
    podcast_summary = (
        f"This content contains podcast transcript: '{podcast.name}'. "
        f"Use this index if you need to lookup specific information about {podcast.about}.\n"
    )
    node = IndexNode(text = podcast_summary, index_id=podcast.key)
    nodes.append(node)

In [19]:
vector_index = VectorStoreIndex(nodes)
vector_retriever = vector_index.as_retriever(similarity_top_k=1)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


In [20]:
from llama_index.core.retrievers import RecursiveRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.response_synthesizers import get_response_synthesizer

recursive_retriever = RecursiveRetriever(
    "vector",
    retriever_dict={"vector": vector_retriever},
    query_engine_dict=agents,
    verbose=True,
)

response_syntesizer = get_response_synthesizer(
    response_mode="compact",
)

query_engine = RetrieverQueryEngine.from_args(
    recursive_retriever,
    response_synthesizer=response_syntesizer,
)


In [21]:
openai.log = "debug"

In [22]:
response = query_engine.query("Give me a summary of the podcast 'Dr Chris Palmer: Diet and Nutrition for Mental health'")

[1;3;34mRetrieving with query id None: Give me a summary of the podcast 'Dr Chris Palmer: Diet and Nutrition for Mental health'
[0mINFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
[1;3;38;5;200mRetrieved node with id, entering: diet_nutrition
[0m[1;3;34mRetrieving with query id diet_nutrition: Give me a summary of the podcast 'Dr Chris Palmer: Diet and Nutrition for Mental health'
[0mAdded user message to memory: Give me a summary of the podcast 'Dr Chris Palmer: Diet and Nutrition for Mental health'
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
=== Calling Function ===
Calling function: diet_nutrition_summary_tool with args: {"input":"Dr Chris Palmer: Diet and Nutrition for Mental health"}
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/cha

In [23]:
response

Response(response="The podcast discusses the potential benefits of supplementing with ketone esters or salts to correct brain metabolism deficits in the short term, particularly for Alzheimer's disease. It also emphasizes the comprehensive approach of the ketogenic diet, which includes enhancements in glucose levels, insulin signaling, and mitochondrial biogenesis, highlighting its potential advantages for mental health.", source_nodes=[NodeWithScore(node=TextNode(id_='d4e68ea2-dfd1-4e89-a853-af9b824a4a52', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text="Query: Give me a summary of the podcast 'Dr Chris Palmer: Diet and Nutrition for Mental health'\nResponse: The summary of the podcast 'Dr Chris Palmer: Diet and Nutrition for Mental health' highlights the potential benefits of supplementing with ketone esters or salts to correct brain metabolism deficits in the short term, particularly for Alzheimer's disease. The ket