### Recalling narratives with retrieval augmented generation

How do hippocampus and neocortex work together to recall narratives (and other sequences), whilst minimising the amount of detail stored in the hippocampus?

* Neocortex creates gist
* Gist plus unpredictable details stored in HPC
* Stimulus triggers recall
* Relevant event(s) retrieved from HPC
* NC elaborates details

In [None]:
!pip uninstall openai -y

In [None]:
!pip install openai --upgrade
!pip install llama-index --upgrade

In [None]:
import sys
sys.path.append('../data/')

import os
import openai
import pandas as pd
import random
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core import Document
import logging
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core import get_response_synthesizer
from llama_index.core.prompts import PromptTemplate

logging.basicConfig(stream=sys.stdout, level=logging.ERROR)

client = openai.OpenAI(
    api_key='your key'
)

os.environ['OPENAI_API_KEY'] = 'your key'

chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": "Say this is a test",
        }
    ],
    model="gpt-3.5-turbo"
)

chat_completion.choices[0].message.content

In [None]:
def get_output(input_text):
    completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": input_text,
            }
        ],
        model="gpt-4",
        logprobs=True
    )
    output_text = completion.choices[0].message.content
    return output_text

get_output("Hello world")

In [None]:
def get_stories():
    df = pd.read_csv('../data/stories_train.csv')
    df['combined'] = df[[f'sentence{i}' for i in range(1,6)]].astype(str).agg(' '.join, axis=1)
    return df['combined'].tolist()

stories = get_stories()

In [None]:
stories_subset = stories[0:50]
gists = [get_output(f"{story} \nVery short summary:") for story in stories_subset]
details = [get_output(f"Story: {story} \nSummary: {gists[ind]}. \n Keywords featured in story not captured by summary:")
          for ind, story in enumerate(stories_subset)]
combined = ["Gist: " + gists[i] + " Other details: " + details[i] for i in range(len(stories_subset))]

In [None]:
docs = [Document(text=txt) for txt in combined]
index = VectorStoreIndex.from_documents(docs)

In [None]:
# define custom retriever
vector_retriever = VectorIndexRetriever(index=index, similarity_top_k=1)

# define response synthesizer
response_synthesizer = get_response_synthesizer()

# vector query engine
query_engine = RetrieverQueryEngine(
    retriever=vector_retriever,
    response_synthesizer=response_synthesizer,
)

qa_prompt_tmpl_str = (
    "Context about a story is below.\n"
    "---------------------\n"
    "{context_str}\n"
    "---------------------\n"
    "Given the context information, answer the query in detail.\n"
    "Query: {query_str}\n"
    "Answer: "
)
qa_prompt_tmpl = PromptTemplate(qa_prompt_tmpl_str)

query_engine.update_prompts(
    {"response_synthesizer:text_qa_template": qa_prompt_tmpl}
)

#### Test recall:

In [None]:
for i in range(50):
    print("Original event:")
    print(stories[i])
    first_sentence = stories[i].split('.')[0]
    print("Query:")
    query = f"What happened when {first_sentence.lower()}?"
    print(query)
    response = query_engine.query(query)
    print("\nGist retrieved from hippocampus:")
    print(response.source_nodes[0].text)
    print("\nNeocortex-elaborated answer:")
    print(response.response)
    print("-------------------------------------")