In [8]:
import nltk
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Ensure you have the required NLTK resources
nltk.download('punkt_tab')


[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\Aspire_Lays/nltk_data...
[nltk_data]   Unzipping tokenizers\punkt_tab.zip.


True

In [9]:
# Define the context
context = """
CHRIST (Deemed to be University) was born out of the educational vision of St Kuriakose Elias Chavara, 
an educationalist and social reformer of the nineteenth century in South India. He founded the first 
Catholic indigenous congregation, Carmelites of Mary Immaculate (CMI), in 1831 which administers 
CHRIST (Deemed to be University). CHRIST (Deemed to be University) was established as ‘Christ College’ 
in 1969. It undertook path-breaking initiatives in Indian higher education with the introduction of 
innovative and modern curricula, insistence on academic discipline, imparting of Holistic Education 
and adoption of global higher education practices with the support of creative and dedicated staff.
"""


In [10]:
# Split the context into sentences
sentences = nltk.sent_tokenize(context)

In [12]:
sentences

['\nCHRIST (Deemed to be University) was born out of the educational vision of St Kuriakose Elias Chavara, \nan educationalist and social reformer of the nineteenth century in South India.',
 'He founded the first \nCatholic indigenous congregation, Carmelites of Mary Immaculate (CMI), in 1831 which administers \nCHRIST (Deemed to be University).',
 'CHRIST (Deemed to be University) was established as ‘Christ College’ \nin 1969.',
 'It undertook path-breaking initiatives in Indian higher education with the introduction of \ninnovative and modern curricula, insistence on academic discipline, imparting of Holistic Education \nand adoption of global higher education practices with the support of creative and dedicated staff.']

In [11]:
# Define the questions
questions = [
    "Who was the educational visionary behind CHRIST (Deemed to be University)?",
    "What congregation did St Kuriakose Elias Chavara establish in 1831?",
    "In which year was CHRIST (Deemed to be University) originally established as 'Christ College'?",
    "What were some of the path-breaking initiatives taken by the university in Indian higher education?",
    "How did the university ensure the adoption of global higher education practices?"
]

In [13]:

# Initialize TF-IDF Vectorizer
vectorizer = TfidfVectorizer()

# Fit and transform the sentences
sentence_vectors = vectorizer.fit_transform(sentences)


In [14]:


# Answer extraction using cosine similarity
answers = {}
for question in questions:
    question_vector = vectorizer.transform([question])  # Vectorize the question
    similarities = cosine_similarity(question_vector, sentence_vectors).flatten()
    best_sentence_index = np.argmax(similarities)  # Find the most relevant sentence
    answers[question] = sentences[best_sentence_index]  # Store the best matching sentence

# Print the answers
for question, answer in answers.items():
    print(f"Q: {question}\nA: {answer}\n")


Q: Who was the educational visionary behind CHRIST (Deemed to be University)?
A: CHRIST (Deemed to be University) was established as ‘Christ College’ 
in 1969.

Q: What congregation did St Kuriakose Elias Chavara establish in 1831?
A: 
CHRIST (Deemed to be University) was born out of the educational vision of St Kuriakose Elias Chavara, 
an educationalist and social reformer of the nineteenth century in South India.

Q: In which year was CHRIST (Deemed to be University) originally established as 'Christ College'?
A: CHRIST (Deemed to be University) was established as ‘Christ College’ 
in 1969.

Q: What were some of the path-breaking initiatives taken by the university in Indian higher education?
A: It undertook path-breaking initiatives in Indian higher education with the introduction of 
innovative and modern curricula, insistence on academic discipline, imparting of Holistic Education 
and adoption of global higher education practices with the support of creative and dedicated staff.

In [20]:
import torch
from transformers import BertForQuestionAnswering, BertTokenizer

# Load pre-trained model and tokenizer from Hugging Face
model_name = "bert-large-uncased-whole-word-masking-finetuned-squad"

In [21]:

model = BertForQuestionAnswering.from_pretrained(model_name)
tokenizer = BertTokenizer.from_pretrained(model_name)


Some weights of the model checkpoint at bert-large-uncased-whole-word-masking-finetuned-squad were not used when initializing BertForQuestionAnswering: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [22]:

# Context and question
context = """
CHRIST (Deemed to be University) was born out of the educational vision of St Kuriakose Elias Chavara, an educationalist and social reformer of the nineteenth century in South India. 
He founded the first Catholic indigenous congregation, Carmelites of Mary Immaculate (CMI), in 1831 which administers CHRIST (Deemed to be University). 
“CHRIST (Deemed to be University) was established as ‘Christ College’ in 1969. 
It undertook path-breaking initiatives in Indian higher education with the introduction of innovative and modern curricula, insistence on academic discipline, imparting of Holistic Education, and adoption of global higher education practices with the support of creative and dedicated staff.
"""
question = "Who was the educational visionary behind CHRIST (Deemed to be University)?"


In [23]:
# Tokenize the context and question
inputs = tokenizer.encode_plus(question, context, add_special_tokens=True, return_tensors="pt")

In [24]:
# Get the start and end token positions for the answer
with torch.no_grad():
    output = model(**inputs)

start_position = output.start_logits.argmax()
end_position = output.end_logits.argmax()


In [25]:
# Convert token indices back to words
answer_tokens = inputs["input_ids"][0][start_position:end_position + 1]
answer = tokenizer.decode(answer_tokens)


In [26]:

# Print the answer
print("Answer:", answer)

Answer: st kuriakose elias chavara
