In [87]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_ai21 import AI21SemanticTextSplitter
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_ai21 import AI21LLM
from langchain_core.prompts import PromptTemplate
from langchain_ai21 import AI21ContextualAnswers
from langchain import hub
import os

llm = ChatOpenAI(model="gpt-3.5-turbo-0125")
OPENAI_API_KEY = 'sk-proj-gnB8KP0GbrXROIZmAYRKT3BlbkFJveplnj1LUDtIpQcpH1ya'
AI_TWENTYONE_API_KEY = "XaZ0TkUk0Z8NiUMZGxcaD1HPmdeV5V1C"

## <b>TESTING DIFFERENT DOCUMENT SPLITTERS </b>

In [88]:
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
os.environ["AI21_API_KEY"] = AI_TWENTYONE_API_KEY

In [89]:
text = TextLoader("./TestFiles/elijahsresume.txt")

In [90]:
loaded_text = text.load()
loaded_text

[Document(page_content='ELIJAH C. DODSON\n37A Edgewood St, Roxbury, MA, 02119 | 857-205-5833 | dodsone@wit.edu                                                                                         Elijah Dodson - Full Stack Engineer Intern - Fidelity Investments | LinkedIn\nEducation & Coursework\nWentworth Institute of Technology, Boston, MA, Graduation: Aug 2024, BA in Computer Science, GPA: 3.58/4.00\nRelevant Coursework: Computer Science I and II, Data Structures, Algorithms, Data Science Fundamentals, OS \nMathematics: Calc 1 and 2, Discrete Math, Computer Organizations, Linear Algebra & Matrix Theory \nProb. & Stats. for Engineers, Engineering Physics I\nElectives: Global Cities, Microeconomics, Cell and Molecular Biology (Current)\nCathedral High School, Boston, MA, June 2020, GPA: 4.1/4.33\nSkills & Interests\nCoding Languages: Java, Python, JavaScript, HTML, CSS, Vue.js, React.js (Learning), C/C++ (Basics), F# (Learning)\nSoftware: Eclipse, PyCharm, Node.js, VSCode, Jupyter 

In [91]:
# Initializing Splitters

recursive_split = RecursiveCharacterTextSplitter(
    chunk_size=150, chunk_overlap=20, add_start_index=True
)

character_split = CharacterTextSplitter(separator="\n\n",
    chunk_size=50, chunk_overlap=20
)

semantic_split = AI21SemanticTextSplitter(
    chunk_size=150
)

# Splitting text with different splitters
rsLoad = recursive_split.split_documents(loaded_text)
csLoad = character_split.split_documents(loaded_text)
ssLoad = semantic_split.split_documents(loaded_text)

print("Split Length of Each Load  ")
print(f"Recursive Split Load: {len(rsLoad)}")
print(f"Character Split Load: {len(csLoad)}")
print(f"Semantic Split Load: {len(ssLoad)}")

Split Length of Each Load  
Recursive Split Load: 33
Character Split Load: 1
Semantic Split Load: 10


In [None]:
rsLoad

In [None]:
csLoad[0]

In [66]:
ssLoad

[Document(page_content='ELIJAH C. DODSON\n37A Edgewood St, Roxbury, MA, 02119 | 857-205-5833 | dodsone@wit.edu                                                                                         Elijah Dodson - Full Stack Engineer Intern - Fidelity Investments | LinkedIn\nEducation & Coursework\nWentworth Institute of Technology, Boston, MA, Graduation: Aug 2024, BA in Computer Science, GPA: 3.58/4.00\nRelevant Coursework: Computer Science I and II, Data Structures, Algorithms, Data Science Fundamentals, OS \nMathematics: Calc 1 and 2, Discrete Math, Computer Organizations, Linear Algebra & Matrix Theory \nProb. & Stats.\n\nfor Engineers, Engineering Physics I\nElectives: Global Cities, Microeconomics, Cell and Molecular Biology (Current)', metadata={'source': './TestFiles/elijahsresume.txt', 'source_type': 'normal_text'}),
 Document(page_content='Cathedral High School, Boston, MA, June 2020, GPA: 4.1/4.33\nSkills & Interests\nCoding Languages: Java, Python, JavaScript, HTML, CSS, 

In [92]:
rs_vectorstore = Chroma.from_documents(documents=rsLoad, embedding=OpenAIEmbeddings())
cs_vectorstore = Chroma.from_documents(documents=csLoad, embedding=OpenAIEmbeddings())
ss_vectorstore = Chroma.from_documents(documents=ssLoad, embedding=OpenAIEmbeddings())

In [93]:
rs_retriever = rs_vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})
cs_retriever = cs_vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})
ss_retriever = ss_vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})

In [96]:
tsm = AI21ContextualAnswers()
chain = tsm | StrOutputParser()

response = chain.invoke(
    {"context": loaded_text[0].page_content, "question": "What internships did Elijah complete?"},
)

In [97]:
response

'Elijah completed three internships:\n1. Fidelity Investments: Software Developer Co-op (Spring 2023) | Full Stack Engineer Intern (Summer 2022) | Student Fellowship Intern (Summer 2019)\n2. Bahtabots Team: Improved an existing user interface by creating a new component with Vue.js and allowed other developers to create chat bots quicker. Debugged in areas where code was unresponsive while practicing clean code strategies to create a more organized space. Designed diagrams to help with the visualization and development of new projects.\n3. Cognitive Computing Intern Group: Collaborated with other Tech Summer Interns in a SCRUM environment to implement a machine learning model that compared chat bot utterances and gives more optimal responses and improve user experience for both customers and Fidelity associates. Developed JavaScript code onto Transcript Lambda code for the purpose of adding properties to Elasticsearch lambda through Amazon Web Services'

In [None]:
print("--Enter a question to prompt the assistant--")
while(True):
    question = input("-> ")
    response = chain.invoke(
        {"context": loaded_text[0].page_content, "question": question.split('->')[0]},
    )
    print(response)

In [94]:
#prompt = hub.pull("rlm/rag-prompt")

#def format_docs(docs):
#    return "\n\n".join(doc.page_content for doc in docs)

In [70]:
# rs_rag_chain = (
#     {"context": rs_retriever | format_docs, "question": RunnablePassthrough()}
#     | prompt
#     | llm
#     | StrOutputParser()
# )

# cs_rag_chain = (
#     {"context": cs_retriever | format_docs, "question": RunnablePassthrough()}
#     | prompt
#     | llm
#     | StrOutputParser()
# )

# ss_rag_chain = (
#     {"context": ss_retriever | format_docs, "question": RunnablePassthrough()}
#     | prompt
#     | llm
#     | StrOutputParser()
# )


In [84]:
# for chunk in rs_rag_chain.stream("What internships did Elijah complete?"):
#     print(chunk, end="", flush=True)

Elijah completed internships as a Full Stack Engineer at Fidelity Investments during the summer of 2022. He also worked as a Student Fellowship Intern at Fidelity Investments during the summer of 2019.

In [85]:
# for chunk in cs_rag_chain.stream("What internships did Elijah complete??"):
#     print(chunk, end="", flush=True)

Elijah completed internships as a Full Stack Engineer Intern at Fidelity Investments in the summers of 2022 and 2019.

In [86]:
# for chunk in ss_rag_chain.stream("What internships did Elijah complete??"):
#     print(chunk, end="", flush=True)

Elijah completed a Full Stack Engineer Internship at Fidelity Investments during the summer of 2022.