In [1]:
from os import listdir
from os.path import isfile, join

mypath = r'C:\xampp\htdocs'
onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]

#Filters the files which are in the html format
onlyfiles = [x for x in onlyfiles if 'html' in x]

In [None]:
print(onlyfiles)

In [2]:
import os
os.chdir(r"C:\xampp\htdocs")

In [3]:
#there are plenty of HTMLLoaders to choose from
from langchain_community.document_loaders import UnstructuredHTMLLoader

In [4]:

# the data dictionary will contain the documents
data = {}

i = 0
for file in onlyfiles:
    loader = UnstructuredHTMLLoader('Profile of Mood States (POMS).html')

    data[i] = loader.load()

    i+=1

In [18]:
#importing the particular text splitter, in this case we would use TokenTextSplitter
from langchain.text_splitter import TokenTextSplitter


text_splitter = TokenTextSplitter(chunk_size=1000, chunk_overlap=25)
#A dictionary which contains all the post split docs.
texts = {}
for i in range(len(data)):
#the key of texts dictionary is the title of each file
    texts[data[i][0].metadata['source']] = text_splitter.split_documents(data[i])


In [None]:
#The built-in ChromaDb LangChain method for vector store-based retrieval
# need two arguments, the algorithm to use and search kwargs, which specify
# the parameters for each algorithm. k tells amount of documents to return etc

#Defines the retriever
retriever = vectordb.as_retriever(search_type='mmr', search_kwargs ={'k':1})

#Gets the document for the retriever
retriever.get_relevant_documents('What is a mood?')

In [23]:
from langchain.retrievers import ParentDocumentRetriever
from langchain.storage import InMemoryStore

#Tells how to split the children
child_splitter = TokenTextSplitter(chunk_size=250, chunk_overlap=10)
#Tells how to split the parent
parent_splitter = TokenTextSplitter(chunk_size=1000, chunk_overlap=50)

vectorstore = Chroma(
    collection_name="full_documents", embedding_function=OpenAIEmbeddings(api_key='')
)
#Creates a document store in memory
store = InMemoryStore()
#This retriever takes vector store and document store
retriever = ParentDocumentRetriever(
    vectorstore=vectorstore,
    docstore=store,
    child_splitter=child_splitter,
   parent_splitter = parent_splitter
)

# Adds the child documents and splits them.
for i in range(len(data)):
    retriever.add_documents(data[i])

#Search over documents:
retriever.get_relevant_documents('Tell me about coaching?')

[Document(metadata={'source': 'Profile of Mood States (POMS).html'}, page_content='topics A-Z Page Index Article Library Coaching General Young Athletes Literature Reviews Coaching Conditioning Endurance Ergogenic Aids Evaluation Injury Prevention Nutrition Physiology Planning Psychology Skill Development Speed Strength Female Athletes Master Athletes Young Athletes Sports Specific Basketball Bodybuilding Boxing Cycling Golf Rowing Rugby Soccer Squash Swimming Tennis Triathlon USA Football Training Conditioning Endurance Ergogenic Aids Evaluation Injury Prevention Nutrition Physiology Planning Psychology Skill Development Speed Strength Anatomy & Physiology Articles Biomechanics Body Systems Cardiovascular Digestive Endocrine Muscular Neurological Respiratory Skeletal Body Type Energy Systems Maximum Heart Rate Movement Analysis Coaching Articles Assessment Communication Ethics Group Dynamics Methods Philosophy Process Principles Responsibilities Styles Skills & Roles Young Athletes Fi

In [28]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI
from operator import itemgetter

# This is the prompt I used

# It takes in the documents as {context} and user provide {topic}
template = """Mimic the writing style in the context:
{context} and produce a blog on the topic

Topic: {topic}


"""

prompt = ChatPromptTemplate.from_template(template)

model = ChatOpenAI(api_key ='')

# Using LangCHain LCEL to supply the prompt and generate output
chain = (
    {
        "context": itemgetter("topic") | retriever,
        "topic": itemgetter("topic"),

    }
    | prompt
    | model
    | StrOutputParser()
)
#running the Chain
chain.invoke({"topic": "Mood "})

'Title: Exploring the Intricacies of Mood\n\nMood is a fascinating aspect of human experience that influences how we perceive and interact with the world around us. It is a complex interplay of emotions, thoughts, and physical sensations that can fluctuate throughout the day. Understanding our mood can provide valuable insights into our mental and emotional well-being.\n\nOne of the key components of mood is its transient nature. Our mood can shift in response to various stimuli, such as external events, our thoughts and beliefs, and even our physical state. For example, a stressful day at work may leave us feeling anxious and irritable, while a relaxing evening with loved ones can uplift our spirits and improve our mood.\n\nMoreover, mood can also impact our behavior and decision-making. When we are in a positive mood, we are more likely to be sociable, creative, and open to new experiences. On the other hand, a negative mood can lead to feelings of apathy, pessimism, and a lack of mo