<a href="https://colab.research.google.com/github/avighna-tripathi/JOEY-BOT/blob/main/JOEY_BOT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import re
import random

dialogues = []

def strip_parentheses(s):
    return re.sub(r'\(.*?\)', '', s)

def is_character_line(line, character_name):
    # Use a case-insensitive regex pattern to match the character name followed by a colon
    pattern = re.compile(rf'^{character_name}:\s', re.IGNORECASE)
    return bool(pattern.match(line))

def extract_character_lines(file_path, character_name):
    lines = []
    with open(file_path, 'r', encoding='utf-8', errors='ignore') as script_file:
        try:
            lines = script_file.readlines()
        except UnicodeDecodeError:
            pass

    current_character = ''
    for line in lines:
        stripped_line = line.strip()
        if is_character_line(stripped_line, character_name):
            current_character = character_name
            dialog_line = strip_parentheses(stripped_line.split(':', 1)[1]).strip()
            if len(dialog_line) > 0:
                dialogues.append(dialog_line)
        elif current_character.lower() == character_name.lower() and stripped_line:
            dialog_line = strip_parentheses(stripped_line).strip()
            if len(dialog_line) > 0:
                dialogues.append(dialog_line)
        else:
            current_character = ''

def process_directory(directory_path, character_name):
    for filename in os.listdir(directory_path):
        file_path = os.path.join(directory_path, filename)
        if os.path.isfile(file_path):  # Ignore directories
            extract_character_lines(file_path, character_name)


In [None]:
process_directory("./sample_data/friends",'Joey')

In [None]:
print(len(dialogues))

814


In [None]:
!pip install openai --upgrade



In [None]:
!pip install ragas langchain_openai



In [None]:
import openai
from google.colab import userdata
api_key = userdata.get('OPENAI_API_KEY')

openai.api_key=api_key

In [None]:
with open("./sample_data/joey_lines.txt","w+") as f:
  for line in dialogues:
    f.write(line + "\n")

In [None]:
!pip install langchain_experimental



In [None]:
from langchain.indexes import VectorstoreIndexCreator
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from langchain_community.document_loaders import TextLoader
from langchain_experimental.text_splitter import SemanticChunker


text_splitter = SemanticChunker(OpenAIEmbeddings(openai_api_key=api_key),breakpoint_threshold_type="percentile")
with open("./sample_data/joey_lines.txt") as f:
  joey_lines = f.read()
docs = text_splitter.create_documents([joey_lines])

embeddings = OpenAIEmbeddings(openai_api_key=api_key)
index = VectorstoreIndexCreator(embedding=embeddings).from_documents(docs)



In [None]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.retrievers import RePhraseQueryRetriever
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import EmbeddingsFilter

llm=ChatOpenAI(openai_api_key=api_key,temperature=0)
system_prompt=(
    "You are Joey from F.R.I.E.N.D.S ."
    "Use the given context to answer the question. "
    "If you don't know the answer, say you don't know. "
    "Use three sentence maximum and keep the answer concise. "
    "Context: {context}"
)
prompt = ChatPromptTemplate.from_messages(
    [
        ("system",system_prompt),
        ("human","{input}"),
    ]
)
retriever=index.vectorstore.as_retriever(search_kwargs={'k': 5})

In [None]:
retriever_from_llm=RePhraseQueryRetriever.from_llm(
    retriever=retriever,llm=llm
)

In [None]:
embeddings_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.76)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=embeddings_filter, base_retriever=retriever_from_llm
)

question_answer_chain = create_stuff_documents_chain(llm, prompt)
chain = create_retrieval_chain(compression_retriever, question_answer_chain)

In [None]:
def pretty_print_docs(docs):
    print(
        f"\n{'-' * 100}\n".join(
            [f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)]
        )
    )

compressed_docs = compression_retriever.invoke("Who is the sister of Ross ?")
pretty_print_docs(compressed_docs)

INFO:langchain.retrievers.re_phraser:Re-phrased question: Query for vectorstore: Sister of Ross


Document 1:

. gay. Woah, woah, woah, you have a date? But uh, uh, what about uh, Ross and uh. .
----------------------------------------------------------------------------------------------------
Document 2:

What? Come on, they're close. Oh, like you've never gotten a little rambunctious with Ross. Well, who's to say what's true?
----------------------------------------------------------------------------------------------------
Document 3:

Oh, yeah, I do. [quietly] I never know how long you're supposed to wait in this type of a situation before you can talk again, you know? [Ross stares blankly at him] Maybe a little longer. Ahhhhhh, I didn't get the job. I dunno. Some fat guy's sleeping with the store manager. He's not even jolly, it's all political. Ah, I'm gonna be one of his helpers. It's just such a slap in the face, y'know? Hey, that guy's going home with more than a note! Hi. Hi, sorry I'm late. Nice shoes, huh? You know more than one Fun Bobby? Ooh ooh ooh ooh, there's no 

In [None]:
import logging
logging.basicConfig()
logging.getLogger("langchain.retrievers.re_phraser").setLevel(logging.INFO)

while True :
  question = input("Hey !! how you doin' ? Ask me anything related to season 1 :")

  result = chain.invoke({"input": question})
# print("SOURCE DOCUMENTS:\n")
# for doc in result["context"]:
#     print(doc)
  print("\nRESULT:\n")
  print(result["answer"])
  s=input("Wanna ask me more ques [y/n] ?:")
  if s=='y':
    continue
  else :
    break

Hey !! how you doin' ? Ask me anything related to season 1 :Name of Chandlers Mother ?


INFO:langchain.retrievers.re_phraser:Re-phrased question: Query for vectorstore: Chandler's mother's name



RESULT:

Nora Tyler Bing.
Wanna ask me more ques [y/n] ?:y
Hey !! how you doin' ? Ask me anything related to season 1 :Occupation of Ross 


INFO:langchain.retrievers.re_phraser:Re-phrased question: Query for vectorstore: Ross occupation



RESULT:

Ross is a paleontologist.
Wanna ask me more ques [y/n] ?:y
Hey !! how you doin' ? Ask me anything related to season 1 :Occupation of all your friends


INFO:langchain.retrievers.re_phraser:Re-phrased question: Query for vectorstore: Occupation friends



RESULT:

Monica is a chef, Ross is a paleontologist, Chandler works in advertising, Phoebe is a masseuse, Rachel works in fashion, and I work as an actor.
Wanna ask me more ques [y/n] ?:n
