In [1]:
from dotenv import load_dotenv
load_dotenv()
import os
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain.vectorstores import FAISS
from PyPDF2 import PdfReader

In [2]:
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [3]:
def get_pdf_text(pdf_docs):
    text = " "
    for pdf in pdf_docs:
        reader = PdfReader(pdf)
        for page in reader.pages:
            text += page.extract_text()
    return text


In [4]:
from langchain.text_splitter import CharacterTextSplitter

def get_text_chunks(text):
    text_splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
    )
    chunks = text_splitter.split_text(text)
    return chunks

In [5]:
from langchain_openai import OpenAIEmbeddings
def get_vectorstore(text_chunks):
    embeddings = OpenAIEmbeddings(api_key=OPENAI_API_KEY)
    vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
    return vectorstore


In [6]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import ConversationalRetrievalChain

In [7]:
from langchain.memory import ConversationBufferMemory
def get_conversation_chain(vectorstore):
    llm = ChatOpenAI(api_key=OPENAI_API_KEY)
    memory = ConversationBufferMemory(memory_key = 'chat_history',return_messages = True)
    conversation_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=vectorstore.as_retriever(),
        memory=memory
    )
    return conversation_chain

In [8]:
raw_text = get_pdf_text(["history.pdf"])
text_chunks= get_text_chunks(raw_text)

In [9]:
vectorstore= get_vectorstore(text_chunks)

In [10]:
conversation =  get_conversation_chain(vectorstore)

  memory = ConversationBufferMemory(memory_key = 'chat_history',return_messages = True)


In [11]:
conversation({'question' : "who is fredric sorrieu?"})

  conversation({'question' : "who is fredric sorrieu?"})


{'question': 'who is fredric sorrieu?',
 'chat_history': [HumanMessage(content='who is fredric sorrieu?', additional_kwargs={}, response_metadata={}),
  AIMessage(content="Frédéric Sorrieu was a French artist who created a series of four prints visualizing his dream of a world made up of 'democratic and social Republics'.", additional_kwargs={}, response_metadata={})],
 'answer': "Frédéric Sorrieu was a French artist who created a series of four prints visualizing his dream of a world made up of 'democratic and social Republics'."}

In [12]:
conversation({'question' : "what is his significance?"})

{'question': 'what is his significance?',
 'chat_history': [HumanMessage(content='who is fredric sorrieu?', additional_kwargs={}, response_metadata={}),
  AIMessage(content="Frédéric Sorrieu was a French artist who created a series of four prints visualizing his dream of a world made up of 'democratic and social Republics'.", additional_kwargs={}, response_metadata={}),
  HumanMessage(content='what is his significance?', additional_kwargs={}, response_metadata={}),
  AIMessage(content="Frédéric Sorrieu was a French artist who created a series of four prints visualizing his dream of a world made up of 'democratic and social Republics'. His work depicted a utopian vision where the peoples of Europe and America were shown marching in a long train, paying homage to the statue of Liberty. Sorrieu's artwork symbolized fraternity among nations and highlighted the aspirations for democratic and social republics. His work is significant for reflecting the ideals of nationalism and the emergence

In [13]:
#Approach 2
from langchain_openai import ChatOpenAI
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

llm = ChatOpenAI(api_key=OPENAI_API_KEY, temperature=0)
prompt = ChatPromptTemplate.from_template("""
Retreive all the data from the pdfs and answer strictly according to the data in the pdfs.
You are an expert at creating viva questions for students based on the content of the pdfs.
Create 5 viva questions based on important concepts of the input pdfs and user's difficulty level which is given as input
Generate both question and answer for each question which will be used to compare with the user's answers.
Give the answer in the form of a dictionary as question : answer for all the 5 questions.


<context>
{context}
</context>

Question: Give me viva questions on  difficulty level-{input} based on the content of the pdfs.""")

document_chain = create_stuff_documents_chain(llm, prompt)

retriever = vectorstore.as_retriever()
retrieval_chain = create_retrieval_chain(retriever, document_chain)

#example input-to be replaced with a summary generated by the llm of the user profile

difficulty_level = "easy"




In [14]:
import json
def call_llm(difficulty_level):
    response = retrieval_chain.invoke({"input": "{difficulty_level}"})
    data_dict = json.loads(response["answer"])

    # Initialize the new dictionary structure
    qa_dict = {}

    # Iterate over the keys and values to restructure the dictionary
    for i in range(1, 6):
        qa_dict[i] = {
            "question": data_dict[f"Question {i}"],
            "answer": data_dict[f"Answer {i}"]
        }
    return qa_dict

In [19]:
qa_dict= call_llm(difficulty_level)
qa_dict

{1: {'question': "Explain the impact of Napoleon's administrative measures on the German-speaking regions in the first half of the nineteenth century.",
  'answer': "Napoleon's administrative measures created a confederation of 39 states out of countless small principalities in the German-speaking regions. Each state had its own currency, weights, and measures, leading to customs barriers and duties for merchants traveling through the region."},
 2: {'question': 'Discuss the initial reactions of local populations to French rule in areas conquered by Napoleon.',
  'answer': 'Initially, in places like Holland, Switzerland, Brussels, Mainz, Milan, and Warsaw, the French armies were welcomed as symbols of liberty. However, the enthusiasm turned to hostility as increased taxation, censorship, and forced conscription overshadowed the administrative changes.'},
 3: {'question': 'What were the economic hardships faced in Europe during the 1830s and the first half of the nineteenth century?',
 

In [16]:
qa_dict[4].get("answer") #to obtain answer

'Language and popular traditions played a crucial role in the creation of national identity during the nineteenth century, as seen in the use of Polish as a symbol of resistance against Russian dominance and the importance of national languages in asserting cultural identity.'

In [17]:
import nltk
from nltk.tokenize import word_tokenize
from sklearn.metrics import jaccard_score
from sentence_transformers import SentenceTransformer, util

# Download NLTK tokenizer resources
nltk.download('punkt')

# Model's generated answer (RAG answer)
model_answer = "Language and popular traditions played a crucial role in the creation of national identity by symbolizing resistance against dominant powers. The use of Polish, for example, became a symbol of struggle against Russian dominance."

# User's answer
# user_answer = "Language and traditions were key in forming national identity, especially in resisting powers like Russia, where Polish became a symbol of resistance."
user_answer = "Language and traditions were key in forming and resisting identity example using polish"
# Step 1: Calculate Literal Score (Jaccard Similarity)
def calculate_literal_score(model_ans, user_ans):
    # Tokenize answers
    model_tokens = set(word_tokenize(model_ans.lower()))
    user_tokens = set(word_tokenize(user_ans.lower()))

    # Calculate Jaccard Similarity
    intersection = model_tokens.intersection(user_tokens)
    union = model_tokens.union(user_tokens)
    literal_score = len(intersection) / len(union) * 100  # In percentage
    return literal_score

# Step 2: Calculate Semantic Score (Cosine Similarity of Embeddings)
def calculate_semantic_score(model_ans, user_ans):
    # Load pre-trained model for embeddings
    model = SentenceTransformer('all-MiniLM-L6-v2')

    # Get sentence embeddings
    model_embedding = model.encode(model_ans, convert_to_tensor=True)
    user_embedding = model.encode(user_ans, convert_to_tensor=True)

    # Calculate cosine similarity
    semantic_score = util.pytorch_cos_sim(model_embedding, user_embedding).item() * 100  # In percentage
    return semantic_score

# Generate the report
def generate_comparison_report(model_ans, user_ans):
    literal_score = calculate_literal_score(model_ans, user_ans)
    semantic_score = calculate_semantic_score(model_ans, user_ans)

    report = {
        "literal_score": f"{literal_score:.2f}%",
        "semantic_score": f"{semantic_score:.2f}%"
    }

    return report

# Generate and print report
report = generate_comparison_report(model_answer, user_answer)
print(report)


  from tqdm.autonotebook import tqdm, trange
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\yugtg\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


{'literal_score': '19.44%', 'semantic_score': '67.29%'}


In [1]:
qa_dict= {
    "Question 1": "What were some of the advantages that businessmen and small-scale producers in Europe realized would facilitate the movement and exchange of goods and capital?",
    "Answer 1": "Uniform laws, standardized weights and measures, and a common national currency.",
    "Question 2": "How did the local populations in areas conquered by the French initially react to French rule?",     
    "Answer 2": "In many places such as Holland and Switzerland, as well as in certain cities like Brussels, Mainz, Milan, and Warsaw, the French armies were welcomed as harbingers of liberty.",
    "Question 3": "What were some of the grievances of the weavers in a Silesian village as described by journalist Wilhelm Wolff?",
    "Answer 3": "The contractors drastically reduced payments for finished textiles, leading to extreme misery among the workers.",
    "Question 4": "What issue of injustice was highlighted in the anonymous letter sent to the editor in 1850?",        
    "Answer 4": "The denial of political rights to women despite their contributions and responsibilities in society.", 
    "Question 5": "What new word is defined as awareness of women's rights and interests based on the belief of social, economic, and political equality of the genders?",
    "Answer 5": "Feminist."
}

qa_dict.get("Question 1")

'What were some of the advantages that businessmen and small-scale producers in Europe realized would facilitate the movement and exchange of goods and capital?'