# Context correction

In [4]:
from nltk import sent_tokenize
from transformers import T5ForConditionalGeneration, T5Tokenizer

# Load model and tokenizer
cc_tokenizer = T5Tokenizer.from_pretrained("DeathReaper0965/t5-context-corrector")
cc_model = T5ForConditionalGeneration.from_pretrained("DeathReaper0965/t5-context-corrector")

# Utility function to correct context
def correct_context(input_text, temperature=0.5):
    # tokenize
    batch = cc_tokenizer(input_text, truncation=True, padding='max_length', max_length=256, return_tensors="pt")

    # forward pass
    results = cc_model.generate(**batch, max_length=256, num_beams=3, no_repeat_ngram_size=2, repetition_penalty=2.5, temperature=temperature,do_sample=True)
    
    return results

# Utility function to split the paragraph into multiple sentences
def split_and_correct_context(sent):
    sents = sent_tokenize(sent)
    
    final_sents = cc_tokenizer.batch_decode(correct_context(sents), 
                                            clean_up_tokenization_spaces=True, 
                                            skip_special_tokens=True)
    
    final_sents = " ".join([final_sents[i].strip() for i in range(len(final_sents))])
    
    return final_sents

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


In [90]:
from gramformer import Gramformer
gf = Gramformer(models=1, use_gpu=False)
def correct_grammar(text):
    return list(gf.correct(text, max_candidates=1))[0]



[Gramformer] Grammar error correct/highlight model loaded..


In [87]:
from happytransformer import HappyTextToText, TTSettings
happy_tt = HappyTextToText("T5", "vennify/t5-base-grammar-correction")

args = TTSettings(num_beams=5, min_length=1)
def correct_grammar_2(text):
    return happy_tt.generate_text(f"grammar: {text}", args=args).text

12/15/2024 10:50:17 - INFO - happytransformer.happy_transformer -   Using device: cpu


# Synonyms

In [6]:
import spacy
nlp = spacy.load("en_core_web_sm")

def generate_synonyms(word):
    synonyms = []
    for token in nlp.vocab:
        if token.has_vector and token.is_alpha and token.text.lower() != word.lower():
            similarity = token.similarity(nlp(word)[0])
            if similarity > 0.7:  # Threshold for synonym similarity
                synonyms.append(token.text)
    return synonyms

def calculate_synonym_similarity(true_answer, student_answer):
    true_tokens = [token.text.lower() for token in nlp(true_answer) if token.is_alpha]
    student_tokens = [token.text.lower() for token in nlp(student_answer) if token.is_alpha]

    match_count = 0

    for student_word in student_tokens:
        if student_word in true_tokens:
            match_count += 1
        else:
            synonyms = generate_synonyms(student_word)
            match_count += sum(1 for synonym in synonyms if synonym in true_tokens)

    avg_length = (len(true_tokens) + len(student_tokens)) / 2
    synonym_similarity = match_count / avg_length if avg_length > 0 else 0
    return synonym_similarity

# Bigram

In [7]:
def generate_bigrams(word_list):
    return [(word_list[i], word_list[i + 1]) for i in range(len(word_list) - 1)]

def bigram_similarity(text1, text2):
    list1 = text1.split()
    list2 = text2.split()
    bigrams1 = generate_bigrams(list1)
    bigrams2 = generate_bigrams(list2)
    
    set_bigrams1 = set(bigrams1)
    set_bigrams2 = set(bigrams2)
    
    common_bigrams = set_bigrams1.intersection(set_bigrams2)
    common_count = len(common_bigrams)
    
    avg_bigram_length = (len(set_bigrams1) + len(set_bigrams2)) / 2.0
    
    similarity_score = common_count / avg_bigram_length if avg_bigram_length > 0 else 0    
    return similarity_score


# Cosine

In [132]:
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('all-MiniLM-L6-v2')

def cos_similarity(sentence1, sentence2):
    embedding1 = model.encode(sentence1, show_progress_bar=False)
    embedding2 = model.encode(sentence2, show_progress_bar=False)
    similarity = cosine_similarity([embedding1], [embedding2])[0][0]
    return similarity

# Average

In [165]:
def avg_similarity(text1, text2):
    return 0.1 * bigram_similarity(text1, text2) + 0.2 * calculate_synonym_similarity(text1, text2) + 0.7 * cos_similarity(text1, text2)

# Preprocessing

In [113]:
def remove_stopwords(text):
    return " ".join([word.lower() for word in text.split() if word.lower() not in nlp.Defaults.stop_words])

In [111]:
def lemmatize_text(text):
    return " ".join([word.lemma_ for word in nlp(text)])

In [133]:
def preprocess_text(text):
    text = lemmatize_text(text)
    text = remove_stopwords(text)
    return text

# Spelling correction (only for answers with few words)

In [79]:
from transformers import pipeline
fix_spelling = pipeline("text2text-generation",model="oliverguhr/spelling-correction-english-base")

def correct_spelling(text):
    return fix_spelling(f"{text}",max_length=2048)[0]['generated_text']

# Comparing 2 sentences

In [89]:
s1 = "mitochondria"
s2 = "mitochondrai"
s1, s2 = correct_spelling(s1), correct_spelling(s2)
print(s1, s2)
s1, s2 = correct_grammar(s1), correct_grammar(s2)
print(s1, s2)
avg_similarity(s1, s2)#.round()

mitochondria. mitochondria.
mitochondria. mitochondria.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

0.9000000834465027

# Paragraph summarizer

In [None]:
def summarize(text, num_sentences=3):
    doc = nlp(text)

    # Calculate sentence scores based on word frequency and sentence length
    sentence_scores = {}
    for sent in doc.sents:
        score = 0
        for token in sent:
            score += token.lemma_.count("_")  # Count underscores in lemmas as a simple measure of importance
        sentence_scores[sent] = score / len(sent)

    # Sort sentences by score and return the top N
    sorted_sentences = sorted(sentence_scores.items(), key=lambda x: x[1], reverse=True)
    return [str(sent) for sent, score in sorted_sentences[:num_sentences]]

# Experimenting

In [120]:
key_ans = nlp(r'''Social Inequality (The Estates System): French society was divided into three estates. The First Estate (clergy) and the Second Estate (nobility) enjoyed privileges, such as exemptions from taxes and special legal rights, while the Third Estate (commoners) paid most of the taxes and had little political power. The Third Estate, which made up about 98% of the population, included peasants, urban workers, and the bourgeoisie (middle class). The resentment of the inequalities between the estates contributed significantly to the revolutionary spirit.
Financial Crisis: France was deeply in debt due to its involvement in expensive wars, particularly the American Revolution (1775–1783), and lavish spending by King Louis XVI and his court. The tax burden largely fell on the Third Estate, while the clergy and nobility were exempt. Efforts to reform the tax system were blocked by the privileged classes. Bad weather in the late 1780s caused crop failures, resulting in food shortages, high bread prices, and widespread hunger. This led to increased suffering for the common people.
Weak Leadership (Louis XVI and Marie Antoinette): King Louis XVI was seen as an ineffective ruler who failed to address France’s financial problems. His indecisiveness and inability to implement necessary reforms weakened his authority. Queen Marie Antoinette, who was unpopular for her extravagant lifestyle and foreign origins, was often blamed for the financial crisis, further fueling discontent.
Enlightenment Ideas: The Enlightenment was an intellectual movement that emphasized reason, individual rights, and the idea of equality before the law. Philosophers like Jean-Jacques Rousseau, Voltaire, and Baron de Montesquieu criticized absolute monarchy and aristocratic privileges. These ideas inspired many in the Third Estate, especially the bourgeoisie, who sought greater political influence and reform of the outdated system.
Ineffective Government and Estates-General: By 1789, France’s financial situation had become so dire that Louis XVI called the Estates-General (a representative assembly) for the first time since 1614, to address the fiscal crisis. The Third Estate, feeling underrepresented and excluded from decision-making, eventually proclaimed itself the National Assembly and vowed to draft a new constitution, a move that challenged the authority of the king and sparked a political crisis.
Economic Hardship and Unrest: With the economic difficulties and food shortages, bread prices soared, and many people could not afford basic food. This created widespread anger and unrest, particularly in urban areas. Many workers and artisans faced unemployment and poor living conditions, which further aggravated social tensions.
''')
'''
Failure of Reform: Some attempts were made by the monarchy to implement reforms, such as calling the Estates-General or suggesting new tax policies, but these were either too little or too late. The reforms did not resolve the underlying economic and social issues, and the monarchy’s refusal to implement deeper changes contributed to the collapse of royal authority.
The Influence of the American Revolution: The success of the American Revolution (1776) provided a model for challenging authority and achieving political change. The idea that a people could overthrow an oppressive government and establish a republic inspired many in France, particularly among the bourgeoisie and revolutionary thinkers.
''';
stud_ans = nlp(r'''Social Inequality: French society was divided into three estates. The First Estate consisted of the clergy, the Second Estate was made up of the nobility, and the Third Estate included peasants, city workers, and the bourgeoisie (middle class). The Third Estate made up about 98% of the population, but they were heavily taxed and had little political power, while the First and Second Estates enjoyed privileges. This created widespread frustration and resentment among the lower classes.
Financial Crisis: By the late 1700s, France was in severe debt, primarily due to its involvement in expensive wars, like the American Revolution, and the lavish spending of King Louis XVI and his court. The government had to borrow large sums of money, leading to an economic crisis. The tax system was inefficient, and the burden fell on the common people, worsening their financial hardship.
Enlightenment Ideas: The Enlightenment, a philosophical movement emphasizing reason, individual rights, and equality, influenced many French thinkers. Ideas about liberty, democracy, and the rights of man began to spread, encouraging people to question traditional authority and the monarchy. Thinkers like Rousseau, Voltaire, and Montesquieu inspired the desire for reform and change.
Poor Harvests and Hunger: France suffered a series of bad harvests in the late 1780s, leading to food shortages, high bread prices, and widespread hunger. The harsh winters and poor agricultural conditions made life difficult for peasants and urban workers, which led to anger and unrest. The high price of bread, a staple food, was particularly damaging to the lower classes.
Weak Leadership: King Louis XVI was seen as a weak and indecisive ruler. His inability to solve the financial crisis or address the grievances of the people made the monarchy appear ineffective. Queen Marie Antoinette was also unpopular, partly due to her lavish lifestyle, which contrasted sharply with the suffering of the common people.
Estates-General and the National Assembly: In 1789, in an attempt to solve the financial crisis, Louis XVI called the Estates-General (a meeting of representatives from all three estates). The Third Estate, frustrated with their lack of power, broke away and declared themselves the National Assembly, signaling the start of a political revolution. They vowed to create a new constitution for France, leading to the formation of a revolutionary government.''')

In [193]:
key_ans = nlp(r'''French society was divided into three estates. The First Estate (clergy) and the Second Estate (nobility) enjoyed privileges, such as exemptions from taxes and special legal rights, while the Third Estate (commoners) paid most of the taxes and had little political power. The Third Estate, which made up about 98% of the population, included peasants, urban workers, and the bourgeoisie (middle class). The resentment of the inequalities between the estates contributed significantly to the revolutionary spirit.
France was deeply in debt due to its involvement in expensive wars, particularly the American Revolution (1775–1783), and lavish spending by King Louis XVI and his court. The tax burden largely fell on the Third Estate, while the clergy and nobility were exempt. Efforts to reform the tax system were blocked by the privileged classes. Bad weather in the late 1780s caused crop failures, resulting in food shortages, high bread prices, and widespread hunger. This led to increased suffering for the common people.
King Louis XVI was seen as an ineffective ruler who failed to address France’s financial problems. His indecisiveness and inability to implement necessary reforms weakened his authority. Queen Marie Antoinette, who was unpopular for her extravagant lifestyle and foreign origins, was often blamed for the financial crisis, further fueling discontent.
The Enlightenment was an intellectual movement that emphasized reason, individual rights, and the idea of equality before the law. Philosophers like Jean-Jacques Rousseau, Voltaire, and Baron de Montesquieu criticized absolute monarchy and aristocratic privileges. These ideas inspired many in the Third Estate, especially the bourgeoisie, who sought greater political influence and reform of the outdated system.
By 1789, France’s financial situation had become so dire that Louis XVI called the Estates-General (a representative assembly) for the first time since 1614, to address the fiscal crisis. The Third Estate, feeling underrepresented and excluded from decision-making, eventually proclaimed itself the National Assembly and vowed to draft a new constitution, a move that challenged the authority of the king and sparked a political crisis.
With the economic difficulties and food shortages, bread prices soared, and many people could not afford basic food. This created widespread anger and unrest, particularly in urban areas. Many workers and artisans faced unemployment and poor living conditions, which further aggravated social tensions.
''')
'''
Failure of Reform: Some attempts were made by the monarchy to implement reforms, such as calling the Estates-General or suggesting new tax policies, but these were either too little or too late. The reforms did not resolve the underlying economic and social issues, and the monarchy’s refusal to implement deeper changes contributed to the collapse of royal authority.
The Influence of the American Revolution: The success of the American Revolution (1776) provided a model for challenging authority and achieving political change. The idea that a people could overthrow an oppressive government and establish a republic inspired many in France, particularly among the bourgeoisie and revolutionary thinkers.
''';
stud_ans = nlp(r'''French society was divided into three estates. The First Estate consisted of the clergy, the Second Estate was made up of the nobility, and the Third Estate included peasants, city workers, and the bourgeoisie (middle class). The Third Estate made up about 98% of the population, but they were heavily taxed and had little political power, while the First and Second Estates enjoyed privileges. This created widespread frustration and resentment among the lower classes.
By the late 1700s, France was in severe debt, primarily due to its involvement in expensive wars, like the American Revolution, and the lavish spending of King Louis XVI and his court. The government had to borrow large sums of money, leading to an economic crisis. The tax system was inefficient, and the burden fell on the common people, worsening their financial hardship.
The Enlightenment, a philosophical movement emphasizing reason, individual rights, and equality, influenced many French thinkers. Ideas about liberty, democracy, and the rights of man began to spread, encouraging people to question traditional authority and the monarchy. Thinkers like Rousseau, Voltaire, and Montesquieu inspired the desire for reform and change.
France suffered a series of bad harvests in the late 1780s, leading to food shortages, high bread prices, and widespread hunger. The harsh winters and poor agricultural conditions made life difficult for peasants and urban workers, which led to anger and unrest. The high price of bread, a staple food, was particularly damaging to the lower classes.
King Louis XVI was seen as a weak and indecisive ruler. His inability to solve the financial crisis or address the grievances of the people made the monarchy appear ineffective. Queen Marie Antoinette was also unpopular, partly due to her lavish lifestyle, which contrasted sharply with the suffering of the common people.
In 1789, in an attempt to solve the financial crisis, Louis XVI called the Estates-General (a meeting of representatives from all three estates). The Third Estate, frustrated with their lack of power, broke away and declared themselves the National Assembly, signaling the start of a political revolution. They vowed to create a new constitution for France, leading to the formation of a revolutionary government.''')

In [None]:
# normal summarization

from collections import Counter
import math

def text_summarization(text, max_threshold=0.05, min_threshold=0.01, summary_length=3):
    # 1. Tokenize the text into words and split into sentences
    sentences = text.split(".\n")
    words = text.split()
    
    # 2. Remove duplicates from the word list
    unique_words = list(set(words))
    
    # 3. Count the frequency of each word
    word_counts = Counter(words)
    total_words = len(words)
    
    # 4. Calculate word percentage (frequency / total word count)
    word_percentages = {word: freq / total_words for word, freq in word_counts.items()}
    
    # 5. Select average frequent words based on min and max thresholds
    keywords = [word for word, percentage in word_percentages.items()
                if min_threshold <= percentage <= max_threshold]
    
    # 6. Count window size and weight each sentence
    sentence_weights = []
    for sentence in sentences:
        sentence_words = sentence.split()
        significant_words = [word for word in sentence_words if word in keywords]
        
        if not significant_words:
            continue
        
        # Window size: Maximum distance between keywords
        positions = [sentence_words.index(word) for word in significant_words]
        window_size = max(positions) - min(positions) + 1 if len(positions) > 1 else 1
        
        # Weight: (Number of keywords)^2 / window size
        weight = (len(significant_words) ** 2) / window_size
        sentence_weights.append((sentence, weight))
    
    # 7. Sort sentences by weight in descending order
    sorted_sentences = sorted(sentence_weights, key=lambda x: x[1], reverse=True)
    
    # 8. Select the top n sentences for the summary
    summary_sentences = [sentence for sentence, weight in sorted_sentences[:summary_length]]
    
    return ". ".join(summary_sentences)

# Example Usage
text = """
Text summarization is the process of distilling the most important information from a source text.
It is commonly used to summarize articles, documents, and other large texts.
The goal is to retain key information while significantly reducing the text length.
Automatic summarization techniques include extractive and abstractive methods.
Extractive summarization selects sentences directly from the source text.
Abstractive summarization generates new sentences that capture the essence of the original text.
"""
summary = text_summarization(text)
print("Summary:")
print(summary)


Summary:
It is commonly used to summarize articles, documents, and other large texts. The goal is to retain key information while significantly reducing the text length. 
Text summarization is the process of distilling the most important information from a source text


In [None]:
# spacy summarization

import spacy
from collections import Counter
import math

def text_summarization_spacy(text, summary_length=3, max_threshold=0.05, min_threshold=0.01):
    # Load the English NLP model from spaCy
    nlp = spacy.load("en_core_web_sm")
    doc = nlp(text)
    
    # 1. Tokenize the text into sentences and words
    sentences = [sent.text for sent in doc.sents]
    words = [token.text.lower() for token in doc if not token.is_punct and not token.is_space]
    
    # 2. Remove duplicates from the word list
    unique_words = set(words)
    
    # 3. Count the frequency of each word
    word_counts = Counter(words)
    total_words = len(words)
    
    # 4. Calculate word percentage (frequency / total word count)
    word_percentages = {word: freq / total_words for word, freq in word_counts.items()}
    
    # 5. Select average frequent words based on thresholds
    keywords = [word for word, percentage in word_percentages.items()
                if min_threshold <= percentage <= max_threshold]
    
    # 6. Count window size and weight each sentence
    sentence_weights = []
    for sentence in sentences:
        sentence_doc = nlp(sentence)
        sentence_words = [token.text.lower() for token in sentence_doc if not token.is_punct and not token.is_space]
        significant_words = [word for word in sentence_words if word in keywords]
        
        if not significant_words:
            continue
        
        # Window size: Maximum distance between keywords
        positions = [sentence_words.index(word) for word in significant_words]
        window_size = max(positions) - min(positions) + 1 if len(positions) > 1 else 1
        
        # Weight: (Number of keywords)^2 / window size
        weight = (len(significant_words) ** 2) / window_size
        sentence_weights.append((sentence, weight))
    
    # 7. Sort sentences by weight in descending order
    sorted_sentences = sorted(sentence_weights, key=lambda x: x[1], reverse=True)
    
    # 8. Select the top n sentences for the summary
    summary_sentences = [sentence for sentence, weight in sorted_sentences[:summary_length]]
    
    return " ".join(summary_sentences)

# Example Usage
text = """
Text summarization is the process of distilling the most important information from a source text.
It is commonly used to summarize articles, documents, and other large texts.
The goal is to retain key information while significantly reducing the text length.
Automatic summarization techniques include extractive and abstractive methods.
Extractive summarization selects sentences directly from the source text.
Abstractive summarization generates new sentences that capture the essence of the original text.
"""
summary = text_summarization_spacy(text)
print("Summary:")
print(summary)


Summary:
It is commonly used to summarize articles, documents, and other large texts.
 
Text summarization is the process of distilling the most important information from a source text.
 The goal is to retain key information while significantly reducing the text length.



In [103]:
text_summarization_spacy(key_ans.text, 6)#, summarize(stud_ans.text)

'Queen Marie Antoinette, who was unpopular for her extravagant lifestyle and foreign origins, was often blamed for the financial crisis, further fueling discontent.\n Bad weather in the late 1780s caused crop failures, resulting in food shortages, high bread prices, and widespread hunger. France was deeply in debt due to its involvement in expensive wars, particularly the American Revolution (1775–1783), and lavish spending by King Louis XVI and his court. The tax burden largely fell on the Third Estate, while the clergy and nobility were exempt. The Third Estate, feeling underrepresented and excluded from decision-making, eventually proclaimed itself the National Assembly and vowed to draft a new constitution, a move that challenged the authority of the king and sparked a political crisis.\n The Third Estate, which made up about 98% of the population, included peasants, urban workers, and the bourgeoisie (middle class).'

In [None]:
text_summarization_spacy(stud_ans.text, 6)

'The First Estate consisted of the clergy, the Second Estate was made up of the nobility, and the Third Estate included peasants, city workers, and the bourgeoisie (middle class). In 1789, in an attempt to solve the financial crisis, Louis XVI called the Estates-General (a meeting of representatives from all three estates). Ideas about liberty, democracy, and the rights of man began to spread, encouraging people to question traditional authority and the monarchy. They vowed to create a new constitution for France, leading to the formation of a revolutionary government. The government had to borrow large sums of money, leading to an economic crisis. The high price of bread, a staple food, was particularly damaging to the lower classes.\n'

In [114]:
text_summarization_spacy(remove_stopwords(lemmatize_text(stud_ans.text)), 6)

'1789 , attempt solve financial crisis , louis xvi estates - general ( meeting representative estate ) . king louis xvi weak indecisive ruler . vow create new constitution france , lead formation revolutionary government . inability solve financial crisis address grievance people monarchy appear ineffective . government borrow large sum money , lead economic crisis . tax system inefficient , burden fall common people , worsen financial hardship .'

In [115]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('sentence-transformers/paraphrase-MiniLM-L6-v2')


modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/3.73k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/314 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [136]:
t1 = '''Beneath the canopy of an ancient forest, sunlight filtered through the dense lattice of leaves, casting dappled patterns on the moss-covered ground. The air was thick with the earthy aroma of damp soil and the occasional sweet scent of blooming wildflowers. Birds chirped melodiously, their songs harmonizing with the gentle rustle of leaves stirred by a faint breeze. Hidden among the underbrush, a small fox observed the scene with curious eyes, its fur blending seamlessly with the auburn hues of fallen leaves. The forest seemed alive, a timeless sanctuary where every whisper of the wind told stories of resilience and renewal.
'''
t2 = '''In the bustling heart of the city, life moved at a relentless pace. Cars honked impatiently, weaving through crowded streets lined with towering skyscrapers that glinted in the morning sun. Vendors shouted to attract passersby, their stalls overflowing with fresh produce, vibrant textiles, and an assortment of trinkets. Amid the chaos, a street performer played a soulful tune on his violin, momentarily drawing a small crowd. The melody, rich with emotion, offered a brief reprieve from the cacophony, reminding everyone of the beauty hidden in the everyday hustle. Above, the sky was a patchwork of clouds and smog, a testament to the city's ceaseless activity.'''

embeddings1 = model.encode(key_ans.text)
embeddings2 = model.encode(stud_ans.text)
print(cosine_similarity([embeddings1], [embeddings2])[0][0])

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

0.88476187


In [None]:
def cluster_sentences(sentences, n):
    m = len(sentences)
    if m <= n:
        # If fewer sentences than clusters, each sentence is a cluster
        return [[sentence] for sentence in sentences]

    # Calculate similarities between consecutive sentences
    similarities = [avg_similarity(sentences[i], sentences[i + 1]) for i in range(m - 1)]

    # Find indices of the (m - n) smallest similarities
    partition_indices = sorted(range(len(similarities)), key=lambda i: similarities[i])[:n - 1]

    # Sort partition indices to split sentences sequentially
    partition_indices.sort()

    # Partition sentences based on the identified indices
    clusters = []
    prev_index = 0
    for idx in partition_indices:
        clusters.append(sentences[prev_index:idx + 1])
        prev_index = idx + 1
    clusters.append(sentences[prev_index:])  # Add the final cluster

    return clusters

In [190]:
sentences = ["a", "b", "c", "d", "e"]
n = 7
clusters = cluster_sentences(sentences, n)
print(clusters)

[['a'], ['b'], ['c'], ['d'], ['e']]


In [None]:
def compare_answers(student_answer, answer_key, max_marks):
    
    # student_answer = nlp(preprocess_text(student_answer))
    # answer_key = nlp(preprocess_text(answer_key))
    
    # Get the sentences from the student answer and the answer key
    student_sentences = [str(i) for i in student_answer.sents]
    answer_key_sentences = [str(i) for i in answer_key.sents]
    
    student_clusters = cluster_sentences(student_sentences, max_marks)
    key_clusters = cluster_sentences(answer_key_sentences, max_marks)
    for i in student_clusters:
        print(i)
    for i in key_clusters:
        print(i)
    
    total_marks = 0

    # Compare student clusters with key clusters
    for student_cluster in student_clusters:
        similarity_dict = {}
        
        for student_sentence in student_cluster:
            # Compare each student sentence with each sentence in the answer key
            for key_cluster in key_clusters:
                for key_sentence in key_cluster:
                    similarity = avg_similarity(student_sentence, key_sentence)
                    similarity_dict[key_sentence] = similarity
        
        # Find the key sentence with the maximum similarity
        max_pair = max(similarity_dict, key=similarity_dict.get)
        max_key_sentence = max_pair
        
        # Find the cluster associated with the key sentence
        key_cluster = next(cluster for cluster in key_clusters if max_key_sentence in cluster)
        
        # Calculate the average similarity between clusters
        cluster_similarities = []
        for student_sentence in student_cluster:
            for key_sentence in key_cluster:
                cluster_similarities.append(avg_similarity(student_sentence, key_sentence))
        
        average_similarity = sum(cluster_similarities) / len(cluster_similarities)
        
        # Add 1 mark if average similarity > 0.5
        print(average_similarity)
        if average_similarity > 0.3:
            total_marks += 1
        
        # Remove the used key cluster
        key_clusters.remove(key_cluster)
    
    return total_marks


In [203]:
a1 = nlp(r'''
Cardiac muscles contract automatically and rhythmically, controlled by the autonomic nervous system and specialized pacemaker cells. Intercalated Discs are unique junctions between cardiac muscle cells that contain gap junctions and desmosomes. They allow for synchronized contraction by enabling rapid electrical signal transmission and maintaining structural integrity during powerful contractions. Cardiac muscles have a striated structure similar to skeletal muscles, with alternating light and dark bands, due to the arrangement of actin and myosin filaments.
''')
a2 = nlp(r'''Cardiac muscles work automatically without conscious control, helping the heart pump blood continuously. They have a striated (striped) appearance due to the arrangement of actin and myosin filaments, similar to skeletal muscles. Cardiac muscles are interconnected by intercalated discs, which allow rapid transmission of electrical signals, ensuring synchronized contraction of the heart.
''')

In [204]:
compare_answers(a1, a2, 3)

['\nCardiac muscles contract automatically and rhythmically, controlled by the autonomic nervous system and specialized pacemaker cells.', 'Intercalated Discs are unique junctions between cardiac muscle cells that contain gap junctions and desmosomes.']
['They allow for synchronized contraction by enabling rapid electrical signal transmission and maintaining structural integrity during powerful contractions.']
['Cardiac muscles have a striated structure similar to skeletal muscles, with alternating light and dark bands, due to the arrangement of actin and myosin filaments.\n']
['Cardiac muscles work automatically without conscious control, helping the heart pump blood continuously.']
['They have a striated (striped) appearance due to the arrangement of actin and myosin filaments, similar to skeletal muscles.']
['Cardiac muscles are interconnected by intercalated discs, which allow rapid transmission of electrical signals, ensuring synchronized contraction of the heart.\n']
0.5305729537

2