In [1]:
# DIRECTORY SET
import os
import sys
import traceback
from pathlib import Path
base_dir=Path(os.getcwd()).parent.parent
os.chdir(base_dir)
print(os.getcwd())

# ENVIRONMENT VARIABLES
import dotenv
dotenv.load_dotenv()

# DJANGO SETUP
import django
sys.path.append(os.path.abspath(''))
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "serverproject.settings")
django.setup()

# Import async modules
import asyncio
from asgiref.sync import sync_to_async

# Import display modules
from IPython.display import display, Markdown

# Import other modules
import faiss

# import reloading
from importlib import reload

d:\DestinyFolder\DestinyRecaps\DestinyRecapsApi\serverproject


In [2]:
%load_ext autoreload
%autoreload 2

from destinyapp.models import StreamRecapData

from destinyapp.customlibrary import services
from destinyapp.customlibrary import utils
from destinyapp.customlibrary import controller

In [None]:
video_id="OqVH_MTBQ6k"
stream_recap_data=await utils.get_recap_data(video_id)

In [None]:
vector_db, text_chunks = await services.VectorDbAndTextChunksGenerator.generate_basic_vectordb_and_chunks(video_id, stream_recap_data.transcript)

In [None]:
user_prompt="Can you explain what the federalist papers are?"

In [None]:
search_results=await services.search(video_id, user_prompt, k_size=10)#, vector_db, text_chunks

In [None]:
search_results

In [None]:
start_stops

In [None]:
# merge the overlapping text segments
start_stops=[[search_results["all_indexes"][0], search_results["all_indexes"][0]+1000]]
for index in search_results["all_indexes"][1:]:
    diff_to_last=index-start_stops[-1][1]
    if (diff_to_last <= 500) and (diff_to_last >= -500):
        if diff_to_last > 0:
            start_stops[-1][1]=start_stops[-1][1]+diff_to_last
        else:
            start_stops[-1][0]=start_stops[-1][0]+diff_to_last
        continue

    diff_to_last=index-start_stops[-1][0]
    if (diff_to_last <= 500) and (diff_to_last >= -500):
        if diff_to_last > 0:
            start_stops[-1][1]=start_stops[-1][1]+diff_to_last
        else:
            start_stops[-1][0]=start_stops[-1][0]+diff_to_last
        continue
    
    start_stops.append([index, index+1000])

# produce the segments as a string
rag_context_str=""
for i, ss in enumerate(start_stops):
    rag_context_str+=f"Chunk {i}: "+stream_recap_data.transcript[ss[0]:ss[1]]+"\n\n"


In [None]:

system_prompt="""You are a stream bot. You engauge with the user with respect to a past livestream.

You will be given context from the stream the user is talking about by method of RAG. Do your best to accuracy answer the user's question or engage intelligently given the context of the stream. 

--------------------------------------------

Here is the recap for the stream you are to be knowledgeable about:
{stream_recap}

--------------------------------------------

Here is the RAG context raw from the transcript that is potentially relevant to what the user is saying:
{rag_context}

--------------------------------------------
Always try to be concise in what you are saying and talk about things in the direct context of the stream or stream recap.

""".format(stream_recap=stream_recap_data.recap, rag_context=rag_context_str)

full_prompt=[{"role":"system", "content":system_prompt}, {"role":"user", "content":user_prompt}]

In [None]:
response=await utils.async_response_handler(full_prompt, utils.ModelNameEnum.gpt_4o_mini)
print(response)

In [None]:
class StreamBot:
    chat_history=[]

    @classmethod
    async def answer_user(self, video_id, user_prompt, test=None):

        stream_recap_data=await utils.get_recap_data(video_id)

        search_results=await services.search(video_id, user_prompt, k_size=10)#, vector_db, text_chunks 

        # merge the overlapping text segments
        start_stops=[[search_results["all_indexes"][0], search_results["all_indexes"][0]+1000]]
        for index in search_results["all_indexes"][1:]:
            diff_to_last=index-start_stops[-1][1]
            if (diff_to_last <= 500) and (diff_to_last >= -500):
                if diff_to_last > 0:
                    start_stops[-1][1]=start_stops[-1][1]+diff_to_last
                else:
                    start_stops[-1][0]=start_stops[-1][0]+diff_to_last
                continue

            diff_to_last=index-start_stops[-1][0]
            if (diff_to_last <= 500) and (diff_to_last >= -500):
                if diff_to_last > 0:
                    start_stops[-1][1]=start_stops[-1][1]+diff_to_last
                else:
                    start_stops[-1][0]=start_stops[-1][0]+diff_to_last
                continue
            
            start_stops.append([index, index+1000])

        # produce the segments as a string
        rag_context_str=""
        for i, ss in enumerate(start_stops):
            rag_context_str+=f"Chunk {i}: "+stream_recap_data.transcript[ss[0]:ss[1]]+"\n\n"
        

        # Compile prompt
        system_prompt="""You are a stream bot. You engauge with the user with respect to a past livestream.

        You will be given context from the stream the user is talking about by method of RAG. Do your best to accuracy answer the user's question or engage intelligently given the context of the stream. 

        --------------------------------------------

        Here is the recap for the stream you are to be knowledgeable about:
        {stream_recap}

        --------------------------------------------

        Here is the RAG context raw from the transcript that is potentially relevant to what the user is saying:
        {rag_context}

        --------------------------------------------
        Always try to be concise in what you are saying and talk about things in the direct context of the stream or stream recap.

        """.format(stream_recap=stream_recap_data.recap, rag_context=rag_context_str)

        if self.chat_history!=[]:
            self.chat_history=[{"role":"system", "content":system_prompt}, {"role":"user", "content":user_prompt}]
        else:
            self.chat_history[0]["content"]=system_prompt
            self.chat_history.append({"role":"user", "content":user_prompt})

        response=await utils.async_response_handler(full_prompt, utils.ModelNameEnum.gpt_4o_mini)
        self.chat_history.append({"role":"system", "content":response})

        return response

In [4]:
chat_history=[{'role': 'user', 'content': 'test'}]
video_id="OqVH_MTBQ6k"
stream_bot=services.StreamBot()
response=await stream_bot.answer_user(chat_history, video_id)


History: [{'role': 'user', 'content': 'test'}]
Cost:  0.0009653999999999999


In [5]:
response

"It looks like you're testing the system! If you have any questions or topics from the recent livestream you'd like to discuss, feel free to ask!"

In [None]:
chat_history[-1]["role"]

In [None]:
stream_bot=services.StreamBot()
video_id="OqVH_MTBQ6k"
user_prompt="Can you explain what the federalist papers are?"
stream_bot.chat_history=[]

In [None]:
user_prompt=input("User Prompt: ")

In [None]:
response=await stream_bot.answer_user(video_id, user_prompt)
print(response)

In [None]:
import requests

In [None]:
url='http://127.0.0.1:8000/api/chatbot_response?pin=194&video_id=OqVH_MTBQ6k'
# set the chat_history in the body

request_body={"chat_history":["test"]}

response=requests.post(url, json=request_body)



In [None]:
print(response.json())

In [None]:
# convert this b'{"chat_history": ["test"]}' to a dictionary


In [None]:
test=b'{"chat_history": ["test"]}'

In [None]:
import json

In [None]:
# turn string to json
# test=test.decode("utf-8")
json_data=json.loads(test)

In [None]:
json_data["chat_history"][0]