<a href="https://colab.research.google.com/github/aidanamergembayeva/AutomatedAssessmentSystemWithAI/blob/main/FirstSampleModel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
from transformers import pipeline

# Load a pre-trained model for semantic similarity
model = pipeline("text-classification", model="distilbert-base-uncased-finetuned-sst-2-english")

# Function to assess answers
def assess_student_answers(student_answers, ideal_answers):
    if len(student_answers) != len(ideal_answers):
        raise ValueError("The number of student answers and ideal answers must be the same.")

    scores = []
    for i, (student_answer, ideal_answer) in enumerate(zip(student_answers, ideal_answers)):
        # Combine the student and ideal answer into a prompt to calculate similarity
        combined_input = f"Student answer: {student_answer} \\nIdeal answer: {ideal_answer}"
        result = model(combined_input)
        score = result[0]['score'] if result[0]['label'] == 'POSITIVE' else 1 - result[0]['score']

        scores.append(round(score * 100, 2))  # Scale to a percentage
        print(f"Question {i + 1}: Score = {scores[-1]}%\\n")

    return scores

# Example usage
student_answers = [
    "The process of photosynthesis involves plants using sunlight to make food.",
    "The main cause of World War I was the complex system of alliances.",
    "Gravity is the force that attracts objects toward each other.",
]

ideal_answers = [
    "Photosynthesis is the process by which plants convert sunlight into energy, producing oxygen as a byproduct.",
    "The underlying causes of World War I included militarism, alliances, imperialism, and nationalism.",
    "Gravity is a fundamental force that causes objects to be attracted towards the center of the Earth.",
]

# Run assessment
scores = assess_student_answers(student_answers, ideal_answers)
print("Final scores for each answer:", scores)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]



Question 1: Score = 5.53%\n
Question 2: Score = 6.37%\n
Question 3: Score = 99.58%\n
Final scores for each answer: [5.53, 6.37, 99.58]


In [None]:
import numpy as np
from transformers import pipeline
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer

# Load pre-trained models for NLP
sentiment_model = pipeline("text-classification", model="distilbert-base-uncased-finetuned-sst-2-english")
semantic_model = SentenceTransformer('all-MiniLM-L6-v2')

# Function to assess answers with semantic analysis and sentiment scoring
def assess_student_answers(student_answers, ideal_answers):
    if len(student_answers) != len(ideal_answers):
        raise ValueError("The number of student answers and ideal answers must be the same.")

    scores = []
    for i, (student_answer, ideal_answer) in enumerate(zip(student_answers, ideal_answers)):
        # Sentiment analysis
        combined_input = f"Student answer: {student_answer} \\nIdeal answer: {ideal_answer}"
        sentiment_result = sentiment_model(combined_input)
        sentiment_score = sentiment_result[0]['score'] if sentiment_result[0]['label'] == 'POSITIVE' else 1 - sentiment_result[0]['score']

        # Semantic similarity
        student_embedding = semantic_model.encode([student_answer])
        ideal_embedding = semantic_model.encode([ideal_answer])
        similarity_score = cosine_similarity(student_embedding, ideal_embedding)[0][0]

        # Weighted final score (e.g., 60% semantic, 40% sentiment)
        final_score = round((similarity_score * 0.6 + sentiment_score * 0.4) * 100, 2)
        scores.append(final_score)

        print(f"Question {i + 1}: Semantic Similarity = {round(similarity_score * 100, 2)}%")
        print(f"Question {i + 1}: Sentiment Score = {round(sentiment_score * 100, 2)}%")
        print(f"Question {i + 1}: Final Score = {final_score}%\\n")

    return scores

# Example usage
student_answers = [
    "The process of photosynthesis involves plants using sunlight to make food.",
    "The main cause of World War I was the complex system of alliances.",
    "Gravity is the force that attracts objects toward each other.",
]

ideal_answers = [
    "Photosynthesis is the process by which plants convert sunlight into energy, producing oxygen as a byproduct.",
    "The underlying causes of World War I included militarism, alliances, imperialism, and nationalism.",
    "Gravity is a fundamental force that causes objects to be attracted towards the center of the Earth.",
]

# Run assessment
scores = assess_student_answers(student_answers, ideal_answers)
print("Final scores for each answer:", scores)


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Question 1: Semantic Similarity = 85.52%
Question 1: Sentiment Score = 5.53%
Question 1: Final Score = 53.52%\n
Question 2: Semantic Similarity = 83.11%
Question 2: Sentiment Score = 6.37%
Question 2: Final Score = 52.42%\n
Question 3: Semantic Similarity = 81.8%
Question 3: Sentiment Score = 99.58%
Question 3: Final Score = 88.91%\n
Final scores for each answer: [53.52, 52.42, 88.91]


# New Section

In [None]:

import nltk
nltk.download('punkt')

import numpy as np
import logging
from transformers import pipeline
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
from nltk import sent_tokenize


# Initialize logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Load pre-trained models for NLP
logging.info("Loading pre-trained models...")
sentiment_model = pipeline("text-classification", model="distilbert-base-uncased-finetuned-sst-2-english")
semantic_model = SentenceTransformer('all-MiniLM-L6-v2')

# Function to pre-process and tokenize answers into sentences for better assessment
def preprocess_and_tokenize(text):
    sentences = sent_tokenize(text)
    return sentences

# Function to assess answers with detailed breakdown
def assess_student_answers(student_answers, ideal_answers):
    if len(student_answers) != len(ideal_answers):
        raise ValueError("The number of student answers and ideal answers must be the same.")

    scores = []
    for i, (student_answer, ideal_answer) in enumerate(zip(student_answers, ideal_answers)):
        logging.info(f"Assessing answer {i + 1}...")

        # Tokenize into sentences for more granular analysis
        student_sentences = preprocess_and_tokenize(student_answer)
        ideal_sentences = preprocess_and_tokenize(ideal_answer)

        # Initialize cumulative scores
        cumulative_semantic_score = 0
        cumulative_sentiment_score = 0

        for student_sentence, ideal_sentence in zip(student_sentences, ideal_sentences):
            # Sentiment analysis
            combined_input = f"Student sentence: {student_sentence} \\nIdeal sentence: {ideal_sentence}"
            sentiment_result = sentiment_model(combined_input)
            sentiment_score = sentiment_result[0]['score'] if sentiment_result[0]['label'] == 'POSITIVE' else 1 - sentiment_result[0]['score']

            # Semantic similarity
            student_embedding = semantic_model.encode([student_sentence])
            ideal_embedding = semantic_model.encode([ideal_sentence])
            similarity_score = cosine_similarity(student_embedding, ideal_embedding)[0][0]

            # Update cumulative scores
            cumulative_semantic_score += similarity_score
            cumulative_sentiment_score += sentiment_score

        # Calculate averages for multi-sentence answers
        num_sentences = min(len(student_sentences), len(ideal_sentences))
        avg_semantic_score = cumulative_semantic_score / num_sentences
        avg_sentiment_score = cumulative_sentiment_score / num_sentences

        # Weighted final score (e.g., 70% semantic, 30% sentiment)
        final_score = round((avg_semantic_score * 0.7 + avg_sentiment_score * 0.3) * 100, 2)
        scores.append(final_score)

        # Print detailed breakdown
        logging.info(f"Question {i + 1}: Average Semantic Similarity = {round(avg_semantic_score * 100, 2)}%")
        logging.info(f"Question {i + 1}: Average Sentiment Score = {round(avg_sentiment_score * 100, 2)}%")
        logging.info(f"Question {i + 1}: Final Score = {final_score}%\\n")

    return scores

# Example usage
student_answers = [
    "The process of photosynthesis involves plants using sunlight to make food.",
    "The main cause of World War I was the complex system of alliances.",
    "Gravity is the force that attracts objects toward each other."
]

ideal_answers = [
    "Photosynthesis is the process by which plants convert sunlight into energy, producing oxygen as a byproduct.",
    "The underlying causes of World War I included militarism, alliances, imperialism, and nationalism.",
    "Gravity is a fundamental force that causes objects to be attracted towards the center of the Earth."
]

# Run assessment
scores = assess_student_answers(student_answers, ideal_answers)
print("Final scores for each answer:", scores)


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Final scores for each answer: [60.24, 58.55, 86.94]
