## Goal Prediction
### 1. Load Vectorstore
- Manage goal predicition module separately

In [13]:
import openai
import langchain
import logging
import json
import os
import pandas as pd
import pickle
from IPython.display import Image, display

# configure paths
data_path = '../data/ego4d_annotation/'
GOALSTEP_ANNOTATION_PATH = data_path + 'goalstep/'
SPATIAL_ANNOTATION_PATH = data_path + 'spatial/'
GOALSTEP_VECSTORE_PATH = GOALSTEP_ANNOTATION_PATH + 'goalstep_docarray_faiss'
SPATIAL_VECSTORE_PATH = SPATIAL_ANNOTATION_PATH + 'spatial_docarray_faiss'

### 1. Make Vectorstore(X)

In [14]:
# Embedding Database
import database
from langchain_openai.embeddings import OpenAIEmbeddings
from sklearn.metrics.pairwise import cosine_similarity
import sys
sys.path.append(os.path.abspath('/usr/local/lib/python3.10/dist-packages'))

from langchain.vectorstores import FAISS
from langchain_community.vectorstores import DocArrayInMemorySearch # do not use this!
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
import docarray


# extract videos list
goalstep_videos_list = database.merge_json_video_list(GOALSTEP_ANNOTATION_PATH)
spatial_videos_list = database.merge_json_video_list(SPATIAL_ANNOTATION_PATH)
print(f"goalstep vids: {len(goalstep_videos_list)} and spatial vids: {len(spatial_videos_list)}")

# make document list(langchain.schema) from the video list
# TODO: when making document list, make sure each segment's parents are well listed!
goalstep_document_list = database.make_goalstep_document_list(goalstep_videos_list)
spatial_document = database.make_spatial_document_list(spatial_videos_list)
print(f"goalstep_document_list: {len(goalstep_document_list)}")
print(f"spatial_document_list: {len(spatial_document)}")

# DELETE THIS: USING FAISS instead!
# goalstep_vector_store =  DocArrayInMemorySearch.from_documents(goalstep_document_list, embedding=embeddings)
# spatial_vector_store = DocArrayInMemorySearch.from_documents(spatial_document, embedding=embeddings)
# save the vectorstore
# goalstep_docarray = goalstep_vector_store._docarray  # Access the DocArray
# spatial_docarray = spatial_vector_store._docarray

# make vectorstores for each dataset (takes most time-3min)
embeddings = OpenAIEmbeddings()
goalstep_vector_store =  FAISS.from_documents(goalstep_document_list, embeddings)
spatial_vector_store = FAISS.from_documents(spatial_document, embeddings)

# save FAISS vecstore documents
goalstep_vector_store.save_local(GOALSTEP_VECSTORE_PATH)
spatial_vector_store.save_local(SPATIAL_VECSTORE_PATH)

goalstep vids: 717 and spatial vids: 36
goalstep_document_list: 39979
spatial_document_list: 400


In [17]:
# load docarray and get docarrayinmemory instance for vectorstore
goalstep_vector_store = FAISS.load_local(GOALSTEP_VECSTORE_PATH, embeddings, allow_dangerous_deserialization=True)
spatial_vector_store = FAISS.load_local(SPATIAL_VECSTORE_PATH, embeddings, allow_dangerous_deserialization=True)

for doc in goalstep_vector_store.docstore._dict.values():
    print(f"{doc.page_content}")

Video UID: c2d06df7-5d3a-4116-9edb-f1c81a4f669b
Goal: Makes the bread
Level 2 Segment 1 for Video c2d06df7-5d3a-4116-9edb-f1c81a4f669b
Step: Kneads the dough with the mixer.
Level 2 Segment 2 for Video c2d06df7-5d3a-4116-9edb-f1c81a4f669b
Step: Pours the flour into the mixer
Level 2 Segment 3 for Video c2d06df7-5d3a-4116-9edb-f1c81a4f669b
Step: Organize the table
Level 2 Segment 4 for Video c2d06df7-5d3a-4116-9edb-f1c81a4f669b
Step: Places the pan in the rack.
Level 2 Segment 5 for Video c2d06df7-5d3a-4116-9edb-f1c81a4f669b
Step: Prepares the bread
Level 3 Segment 1 for Level 2 Segment 5 in Video c2d06df7-5d3a-4116-9edb-f1c81a4f669b
Step: weigh the dough
Level 3 Segment 2 for Level 2 Segment 5 in Video c2d06df7-5d3a-4116-9edb-f1c81a4f669b
Step: weigh the dough
Level 3 Segment 3 for Level 2 Segment 5 in Video c2d06df7-5d3a-4116-9edb-f1c81a4f669b
Step: weigh the dough
Level 3 Segment 4 for Level 2 Segment 5 in Video c2d06df7-5d3a-4116-9edb-f1c81a4f669b
Step: move scale to tabletop
Level 

### 2. Make Input from Source (wip)
- load vectorstore & make retrievers
- extract input sequence and spatial context from test video

In [None]:
print(os.getcwd())
import sys
import input_source
from docarray import DocArray

# load vectorstore & create retrievers
# Load the DocArray from the binary file
loaded_docarray = DocArray.load_binary("mystore.bin")

# Recreate the DocArrayInMemorySearch instance
loaded_vectorstore = DocArrayInMemorySearch(docarray=loaded_docarray)


goalstep_vector_store = DocArrayInMemorySearch.load_local("mystore_dir")
spatial_vector_store = DocArrayInMemorySearch.load_local("mystore_dir")
goalstep_retriever = goalstep_vector_store.as_retriever()
spatial_retriever = spatial_vector_store.as_retriever()

# Extract input information from annotations
input_video_idx = input("what is the video idx for input?: ")
goalstep_video = goalstep_videos_list[input_video_idx]
spatial_video = spatial_videos_list[input_video_idx]
input_goalstep_segments = input_source.extract_lower_goalstep_segments(goalstep_video)
input_spatial_context = input_source.extract_spatial_context(spatial_video)


/root/project/script_predict_goal
metrics.pssSy
[Document(metadata={'type': 'level2', 'video_uid': '543e4c99-5d9f-407d-be75-c397d633fe56', 'start_time': 0.62112, 'end_time': 15.483179999999999, 'step_category': 'Make recipes: Take two slices of bread and put them on a plate', 'step_description': 'Place two slices on bread on the plate'}, page_content='Level 2 Segment 1 for Video 543e4c99-5d9f-407d-be75-c397d633fe56\nStep: Place two slices on bread on the plate'), Document(metadata={'type': 'level3', 'video_uid': '7921902b-293f-49e2-9401-d96791a90e15', 'parent_level1_start_time': 137.46841, 'start_time': 142.81772, 'end_time': 188.65649, 'step_category': 'Make recipes: Take two slices of bread and put them on a plate', 'step_description': 'get two pieces of  sliced bread '}, page_content='Level 3 Segment 1 for Level 2 Segment 3 in Video 7921902b-293f-49e2-9401-d96791a90e15\nStep: get two pieces of  sliced bread '), Document(metadata={'type': 'level2', 'video_uid': '94d5eff8-0fac-4719-ad

### 3. Query from Database (WIP)

In [None]:
import query

# Retrieve from database
input_query = query.return_input_query(input_goalstep_segments, input_spatial_context)
retrieved_goalstep = goalstep_retriever.get_relevant_documents(input_query)
retrieved_spatial = spatial_retriever.get_relevant_documents(input_query)
print(retrieved_goalstep)
print(retrieved_spatial)

# Retrieve documents' parent documents with metadatasearch
# TODO: replace metafilter with real values extracted from input values
goalstep_metafilter = {"level": 1, "parent_id": 10}
spatial_metafilter = {"level": 1, "parent_id": 10}

# Search with filter inside vector_Store
retrieved_goalstep_parent_documents = []
try:
    goalstep_search_results = goalstep_vector_store.similarity_search_with_score(
        query="",
        filter=goalstep_metafilter
    )

    if not goalstep_search_results:
        print("No match: goalstep parent")
        goalstep_search_results = []  # Ensure results is a null (empty) array
    else:
        for result, score in goalstep_search_results:
            retrieved_goalstep_parent_documents = result
except Exception as e:
    print(f"Error : goaistep parent search: {e}")
    goalstep_search_results = []  # Ensure results is a null (empty) array

# TODO: spatial context only has initial spatial layout. Maybe we should not search this extensively.
retrieved_spatial_parent_documents = []
try:
    spatial_search_results = spatial_vector_store.similarity_search_with_score(
        query="",
        filter=spatial_metafilter
    )

    if not spatial_search_results:
        print("No match: spatial parent")
        spatial_search_results = []  # Ensure results is a null (empty) array    
    else:
        for result, score in spatial_search_results:
            retrieved_spatial_parent_documents = result        
except Exception as e:
    print(f"Error: spatial parent search: {e}")
    spatial_search_results = []  # Ensure results is a null (empty) array





### 4. Prompt > LLM (WIP)

In [6]:
from dotenv import load_dotenv
from langchain_openai.chat_models import ChatOpenAI

import prompt_source as promptSource
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import ChatPromptTemplate

# ready the documents for making the Prompt
retrieved_goalstep
retrieved_goalstep_parent_documents 
retrieved_spatial # good example with similar layout?
retrieved_spatial_parent_documents # maybe this is not needed
query = f"Here is the initial spatial context: {retrieved_spatial_parent_documents}.\nHere are the action sequences: {}"


# define llm
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
model = ChatOpenAI(openai_api_key=openai.api_key, model="gpt-3.5-turbo")

# define prompt
prompt = ChatPromptTemplate.from_template(promptSource.template_source)
prompt.format(context=promptSource.context, question=promptSource.question, rules = promptSource.rules)

# Define chain
parser = StrOutputParser()
chain1 = prompt | model | parser 

# Get Respone
response = chain1.invoke()


### 5. Process Response and Compute Distance ()

In [None]:
sys.path.append(os.path.abspath('/root/project')) # add root path to sys.path for external package
from util import metrics
# postprocess answers to get goals



# postprocess answers to get lv1 steps?
# can import other scripts in other folders fine!
metrics.printtest()


In [None]:
# compute metrics



### 6. Configure Prompt2 for Target Space

In [None]:
import input_source
import input_target
import prompt_target

# target space input
source_goalstep_segments = input_goalstep_segments
source_spatial_context = input_spatial_context

target_video = []
target_spatial_context = input_source.extract_spatial_context(target_video)

# make target query & retrieve
input_query = query.return_target_input_query(source_goalstep_segments, source_spatial_context, target_spatial_context)
retrieved_goalstep = goalstep_retriever.get_relevant_documents(input_query)
retrieved_spatial = spatial_retriever.get_relevant_documents(input_query)

# Retrieve documents' parent documents for goalstep annotation


# Concat the retrieved results


# define prompt
prompt2 = ChatPromptTemplate.from_template(promptSource.template_source)
prompt2.format(context=promptSource.context, question=promptSource.question, rules = promptSource.rules)

# Define chain
parser = StrOutputParser()
chain2 = prompt2 | model | parser 

# Get Respone
response2 = chain2.invoke()

### 7. Configure Combined chain for simple one-go-prediction

In [None]:

# # chain can incorpoate other chains
# chain_action = (
#     {"source_spatial_context": itemgetter(source_spatial_context), "goalstep": chain1, "target_spatial_context": itemgetter(target_spatial_context)} | prompt_action | model | parser
# )
from dotenv import load_dotenv
from langchain_openai.chat_models import ChatOpenAI

import prompt_source as promptSource
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import ChatPromptTemplate


# define llm
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
model = ChatOpenAI(openai_api_key=openai.api_key, model="gpt-3.5-turbo")

# define prompt
prompt = ChatPromptTemplate.from_template(promptSource.template_source)
prompt.format(context=promptSource.context, question=promptSource.question, rules = promptSource.rules)

# Define chain
parser = StrOutputParser()
chain1 = prompt | model | parser 

# Get Respone
response = chain1.invoke()