Install the required dependencies:

In [1]:
!pip install -q cassio datasets langchain openai tiktoken

Import the packages we'll need:

In [None]:
# LangChain components to use
from langchain.vectorstores.cassandra import Cassandra
from langchain.indexes.vectorstore import VectorStoreIndexWrapper
from langchain.llms import OpenAI
from langchain_community.embeddings import OllamaEmbeddings
import os

# Support for dataset retrieval with Hugging Face
from datasets import load_dataset

# With CassIO, the engine powering the Astra DB integration in LangChain,
# you will also initialize the DB connection:
import cassio

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
!pip install PyPDF2



In [None]:
os.environ['ASTRA_DB_ID']=os.getenv("ASTRA_DB_ID")
os.environ['ASTRA_DB_APPLICATION_TOKEN']=os.getenv("ASTRA_DB_APPLICATION_TOKEN")
os.environ['GROQ_API_KEY']=os.getenv("GROQ_API_KEY")

In [3]:
from PyPDF2 import PdfReader

### Setup

In [None]:
ASTRA_DB_ID = os.getenv("ASTRA_DB_ID")
ASTRA_DB_APPLICATION_TOKEN = os.getenv("ASTRA_DB_APPLICATION_TOKEN")

In [4]:
# provide the path of  pdf file/files.
pdfreader = PdfReader('D:\langchain_projects\mini_poject_2\placement_policy\Placement Policy-2021-22-1-5.pdf')

In [5]:
from typing_extensions import Concatenate
# read text from pdf
raw_text = ''
for i, page in enumerate(pdfreader.pages):
    content = page.extract_text()
    if content:
        raw_text += content

In [9]:
len(raw_text)

11282

Initialize the connection to your database:

In [14]:
cassio.init(token=ASTRA_DB_APPLICATION_TOKEN, database_id=ASTRA_DB_ID)

Create the LangChain embedding and LLM objects for later usage:

In [None]:
from langchain_groq import ChatGroq
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
GROQ_API_KEY = os.getenv("ASTRA_DB_APPLICATION_TOKEN")
llm = ChatGroq(groq_api_key = GROQ_API_KEY,model_name = "Llama3-8b-8192")
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLm-L6-v2")

Create your LangChain vector store ... backed by Astra DB!

In [16]:
astra_vector_store = Cassandra(
    embedding=embeddings,
    table_name="minipoject_1",
    session=None,
    keyspace=None,
)

In [17]:
from langchain.text_splitter import CharacterTextSplitter
# We need to split the text using Character Text Split such that it sshould not increse token size
text_splitter = CharacterTextSplitter(
    separator = "\n",
    chunk_size = 800,
    chunk_overlap  = 200,
    length_function = len,
)
texts = text_splitter.split_text(raw_text)

In [18]:
texts[:50]

['PLACEMENT  POLICY  \n \n \nEligibility Criteria for Placement  \n \n1. The student  must  be a registered student  with the T&P  Office for  Campus  Placement.  \n \n2. Eligibility criteria for placements are prescribed by the company visiting for \nplacements. The Institute or  the T&P Office has no role in this regard. Eligibility  \ncriteria will be communicated in advance to the  students.  Students  shall check  \ntheir eligibility  before  applying  for placement. TPO has full rights to withdraw the \nnames of the non -eligible candidates.  \n \n3. Students are advised to  read the Job Announcement Form by the company \ncarefully. An eligible student giving consent or applying against the offer is \nsupposed to have verified thoroughly the Job profile or company background, etc.',
 'carefully. An eligible student giving consent or applying against the offer is \nsupposed to have verified thoroughly the Job profile or company background, etc.  \n \n4. If the CGPA criteria defi n

### Load the dataset into the vector store



In [19]:

astra_vector_store.add_texts(texts)

print("Inserted %i headlines." % len(texts))

astra_vector_index = VectorStoreIndexWrapper(vectorstore=astra_vector_store)

Inserted 19 headlines.


### Run the QA cycle

Simply run the cells and ask a question -- or `quit` to stop. (you can also stop execution with the "▪" button on the top toolbar)

Here are some suggested questions:
* What are the eligibility criteria for students to participate in placements?

* What is the policy for students who get multiple job offers?


In [21]:
first_question = True
while True:
    if first_question:
        query_text = input("\nEnter your question (or type 'quit' to exit): ").strip()
    else:
        query_text = input("\nWhat's your next question (or type 'quit' to exit): ").strip()

    if query_text.lower() == "quit":
        break

    if query_text == "":
        continue

    first_question = False

    print("\nQUESTION: \"%s\"" % query_text)
    answer = astra_vector_index.query(query_text, llm=llm).strip()
    print("ANSWER: \"%s\"\n" % answer)

    print("FIRST DOCUMENTS BY RELEVANCE:")
    for doc, score in astra_vector_store.similarity_search_with_score(query_text, k=4):
        print("    [%0.4f] \"%s ...\"" % (score, doc.page_content[:84]))


QUESTION: "What is the policy for students who get multiple job offers?"
ANSWER: "According to the policy, students are required to accept the offers within the stipulated period. Undue requests for reconsideration shall not be entertained. If a student accepts an offer, they are not allowed to register for any other company coming later. Additionally, if a student accepts an internship offer, they are not eligible for other companies giving higher stipend/PPO offers or any such benefits.

However, a student already placed can participate again for availing an offer with an improved package and facilities, but only under certain conditions:

1. The package offered by the next company must be at least 1.5 times of the existing package. This will be allowed only twice.
2. The student must meet the company eligibility criteria.
3. All such changeovers will be allowed if and only if the percentage of placed students is not less than 50% of the total number of eligible students of the Inst