In [10]:
from langchain_google_genai import ChatGoogleGenerativeAI
import dotenv
from langchain_community.document_loaders import WebBaseLoader, RecursiveUrlLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import HumanMessage
dotenv.load_dotenv()

True

In [91]:
import os
from langchain_groq import ChatGroq


chat = ChatGroq(temperature=0,groq_api_key= os.getenv('GROQ'),model_name = 'llama3-70b-8192')
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)


## Load Data

In [12]:
data = {
    'wikis':['National_Institute_of_Open_Schooling','Central_Board_of_Secondary_Education','International_General_Certificate_of_Secondary_Education','International_Baccalaureate','Category:State_secondary_education_boards_of_India'],
    'websites':['https://byjus.com/entrance-examinations/'],
    'recursive':['https://vikaspedia.in/education/career-guidance','https://nios.ac.in','https://cbseacademic.nic.in','https://www.cambridgeinternational.org']
}

In [13]:
from bs4 import BeautifulSoup as Soup
from langchain_community.vectorstores.utils import filter_complex_metadata
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chroma_db = Chroma(collection_name="counsellor",embedding_function=GoogleGenerativeAIEmbeddings(model='models/embedding-001'), persist_directory="./db3")
for k,v in enumerate(data):
    print(k,v)
    if v == 'wikis':
        loader = WebBaseLoader(["https://en.wikipedia.org/wiki/{page}" for page in data[v] ])
        d = loader.load()
        text_splits = text_splitter.split_documents(d)
    if v =='websites':
        loader = WebBaseLoader(data[v])
        k = loader.load()
        text_splits = text_splitter.split_documents(k)
    if v == 'recursive':
        for web in data[v]:
            loader = RecursiveUrlLoader(url=web, max_depth=1, )
            docs = loader.load()
            text_splits = text_splitter.split_documents(docs)
    
    chroma_db.add_documents(filter_complex_metadata(text_splits),)

0 wikis
1 websites
2 recursive


In [8]:
import cohere 

os.environ['COHERE_API_KEY'] = os.getenv('COHERE')
co =  cohere.Client(os.environ['COHERE_API_KEY'])

In [70]:
retriever = chroma_db.as_retriever(search_kwargs={"k": 25})

In [71]:
query ='I got 50% in 12th CBSE Board Exams and I got 68%tile in JEE Mains What do I do now?'

In [72]:
docs = retriever.invoke(query,k=25)

In [73]:
docs.__len__()

25

In [74]:
import re
from langchain_core.documents import Document
import unicodedata

def remove_control_characters(text):
    return "".join(ch for ch in text if unicodedata.category(ch)[0] != "C")
def clean_text(text):
    # Remove leading/trailing whitespace and empty strings
    cleaned_text = re.sub(r'^\s*$\n', '', text, flags=re.MULTILINE)
    # Remove strings containing only whitespace
    cleaned_text = ''.join([line for line in cleaned_text.splitlines() if line.strip()])
    return cleaned_text

cleaned_documents = [remove_control_characters(clean_text(doc.page_content)) for doc in docs]

In [75]:
cleaned_documents

['KCET Counselling  KCET Sample Papers  KCET FAQs  KCET Notifications  KCET Solutions  WBJEEWBJEE Exam Dates  WBJEE Application Form  WBJEE Syllabus  WBJEE Question Paper  WBJEE Admit Card  WBJEE Cutoff  WBJEE Eligibility  WBJEE Exam Pattern  WBJEE Counselling  WBJEE Notifications  GUJCETGUJCET Exam Dates  GUJCET Application Form  GUJCET Syllabus  GUJCET Question Papers  GUJCET Admit Card  GUJCET Cut Off  GUJCET Eligibility  GUJCET Exam Pattern  SRMJEEESRMJEEE Application Form  SRMJEEE Admit',
 'UPSEEUPSEE Exam Dates  UPSEE Application Form  UPSEE Syllabus  UPSEE Admit Card  UPSEE Cutoff  UPSEE Eligibility  UPSEE Exam Pattern  UPSEE Counselling  UPSEE Notification  UPSEE FAQs  UPSEE College Predictor  UPSEE Rank Predictor  HPCETHPCET Question Papers  HPCET Application Form  HPCET Dates  HPCET Counseling  HPCET Answer Key  HPCET Admit Card  HPCET Cut Off  HPCET Rank List  HPCET Result  HPCET Syllabus  HP CET Exam Pattern  AP EAMCETAP EAMCET Application Form  AP EAMCET Syllabus  AP',
 'S

In [76]:
rerank_docs  = co.rerank(query=query,documents=cleaned_documents,top_n=25,model="rerank-english-v2.0")

In [77]:
docs

[Document(page_content='KCET Counselling  KCET Sample Papers  KCET FAQs  KCET Notifications  KCET Solutions  WBJEEWBJEE Exam Dates  WBJEE Application Form  WBJEE Syllabus  WBJEE Question Paper  WBJEE Admit Card  WBJEE Cutoff  WBJEE Eligibility  WBJEE Exam Pattern  WBJEE Counselling  WBJEE Notifications  GUJCETGUJCET Exam Dates  GUJCET Application Form  GUJCET Syllabus  GUJCET Question Papers  GUJCET Admit Card  GUJCET Cut Off  GUJCET Eligibility  GUJCET Exam Pattern  SRMJEEESRMJEEE Application Form  SRMJEEE Admit', metadata={'description': 'Entrance exams are the gateway to securing a seat in premiere institutes such as the IITs, AIIMs, IISERs and IIMs in India. BYJU’s provide the details about all entrance exams.', 'language': 'en', 'source': 'https://byjus.com/entrance-examinations/', 'title': 'Entrance Examinations in India, Types, Dates, Links of different Entrance Examinations'}),
 Document(page_content='UPSEEUPSEE Exam Dates  UPSEE Application Form  UPSEE Syllabus  UPSEE Admit Ca

In [92]:
SYSTEM_TEMPLATE = """
Answer you're questions solely based on the context below and use the persona stated. If the question canot be answered by the context, just answer 'I don't know'.
You should give proper answers and provide almost all the information ,for example do not tell the student to search on the internet.

<context>
{context}
</context>
"""

question_answering_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            SYSTEM_TEMPLATE,
        ),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

document_chain = create_stuff_documents_chain(chat, question_answering_prompt)

In [82]:
for i, d in enumerate(rerank_docs):
    print(f'Document:{cleaned_documents[i]}')


Document:KCET Counselling  KCET Sample Papers  KCET FAQs  KCET Notifications  KCET Solutions  WBJEEWBJEE Exam Dates  WBJEE Application Form  WBJEE Syllabus  WBJEE Question Paper  WBJEE Admit Card  WBJEE Cutoff  WBJEE Eligibility  WBJEE Exam Pattern  WBJEE Counselling  WBJEE Notifications  GUJCETGUJCET Exam Dates  GUJCET Application Form  GUJCET Syllabus  GUJCET Question Papers  GUJCET Admit Card  GUJCET Cut Off  GUJCET Eligibility  GUJCET Exam Pattern  SRMJEEESRMJEEE Application Form  SRMJEEE Admit
Document:UPSEEUPSEE Exam Dates  UPSEE Application Form  UPSEE Syllabus  UPSEE Admit Card  UPSEE Cutoff  UPSEE Eligibility  UPSEE Exam Pattern  UPSEE Counselling  UPSEE Notification  UPSEE FAQs  UPSEE College Predictor  UPSEE Rank Predictor  HPCETHPCET Question Papers  HPCET Application Form  HPCET Dates  HPCET Counseling  HPCET Answer Key  HPCET Admit Card  HPCET Cut Off  HPCET Rank List  HPCET Result  HPCET Syllabus  HP CET Exam Pattern  AP EAMCETAP EAMCET Application Form  AP EAMCET Syllab

In [93]:
document_chain.invoke(
    {
        "context": [Document(page_content=cleaned_documents[i]) for i,d in enumerate(rerank_docs)],
        "messages": [
            HumanMessage(content="I got 50% in 12th CBSE Board Exams and I got 68%tile in JEE Mains What do I do now?")
        ],
    }
)

"Congratulations on completing your 12th CBSE Board Exams and taking the JEE Mains exam!\n\nConsidering your scores, you may be eligible for some engineering colleges and universities. However, the specific colleges and courses you can get into will depend on the cutoffs and eligibility criteria of each institution.\n\nBased on the context provided, I can suggest that you explore the following options:\n\n1. **UPSEE**: With your JEE Mains score, you might be eligible for some colleges in Uttar Pradesh. You can check the UPSEE (Uttar Pradesh State Entrance Examination) website for more information on the application process, eligibility, and participating colleges.\n2. **HPCET**: You can also consider applying to colleges in Himachal Pradesh through the HPCET (Himachal Pradesh Common Entrance Test). Check the HPCET website for more information on the application process, eligibility, and participating colleges.\n3. **State-level exams**: Depending on your domicile or residence, you migh