In [None]:
import nltk
import json
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import torch
from transformers import BertTokenizer, BertModel

# Download necessary NLTK resources
# nltk.download('punkt')
# nltk.download('wordnet')
# nltk.download('stopwords')

# Load pre-trained BERT model and tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

def calculate_score(question_text, answer_text):
    try:
        # Tokenize and preprocess the question and answer
        question_tokens = nltk.word_tokenize(question_text.lower())
        answer_tokens = nltk.word_tokenize(answer_text.lower())

        # Remove stopwords
        stop_words = set(nltk.corpus.stopwords.words('english'))
        question_tokens = [word for word in question_tokens if word not in stop_words]
        answer_tokens = [word for word in answer_tokens if word not in stop_words]

        # Convert tokens to embeddings
        question_embeddings = get_sentence_embeddings(' '.join(question_tokens))
        answer_embeddings = get_sentence_embeddings(' '.join(answer_tokens))

        # Calculate cosine similarity
        cosine_sim = cosine_similarity(question_embeddings, answer_embeddings)[0][0]

        # Return the similarity score as a dictionary
        return {"score": cosine_sim}

    except Exception as e:
        print(f"Error processing request: {e}")
        return {"error": "An error occurred while processing the request"}

def get_sentence_embeddings(sentence):
    # Tokenize input text and convert tokens to tensor
    inputs = tokenizer(sentence, return_tensors='pt', padding=True, truncation=True)

    # Forward pass through the model to get hidden states
    with torch.no_grad():
        outputs = model(**inputs)

    # Extract the embeddings from the model's output
    embeddings = outputs.last_hidden_state.mean(dim=1).numpy()

    return embeddings


question_text = "What is Tokenization?"
answer_text = "Tokenization is the process of breaking down a piece of text into smaller units, which are typically words or subwords. These smaller units are called tokens. Tokenization is a fundamental step in natural language processing (NLP) tasks because it allows computers to process and analyze text at a granular level."

result = calculate_score(question_text, answer_text)
print(result)


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

{'score': 0.5646133}


In [23]:
import openai
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Set your OpenAI API key
openai.api_key = ''

def calculate_score(question_text, answer_text):
    try:
        # Concatenate the question and answer text
        combined_text = f"Question: {question_text}\nAnswer: {answer_text}"

        prompt = f"Given the Question: {question_text}\n\nAnd the answer text: {answer_text}\n\nEvaluate the student's score. Provide a score in percentage."

        # Generate embeddings using OpenAI's GPT model
        response = openai.ChatCompletion.create(
            model='gpt-4-turbo',
            messages=[
                {
                    'role': 'system',
                    'content': 'You are an expert professor scoring on the basis of questions and answers.'
                },
                {
                    'role': 'user',
                    'content': prompt
                }
            ],
            max_tokens=150,
            temperature=0.7
        )

        # Extract the completion text from the response
        completion_text = response['choices'][0]['message']['content']

        # Extract the score from the completion text
        score_str = completion_text.split("Your score: ")[-1].strip()
        score = score_str

        # Return the score
        return {"score": score}

    except Exception as e:
        print(f"Error processing request: {e}")
        return {"error": "An error occurred while processing the request"}

question_text="What is the capital of India?"
answer_text = "Mumbai"

# question_text = "What is Tokenization?"
# answer_text = "Tokenization is the process of breaking down a piece of text into smaller units, which are typically words or subwords. These smaller units are called tokens. Tokenization is a fundamental step in natural language processing (NLP) tasks because it allows computers to process and analyze text at a granular level."

result = calculate_score(question_text, answer_text)
print(result)


{'score': 'The correct capital of India is New Delhi, not Mumbai. Therefore, the given answer is incorrect.\n\nScore: 0%'}


In [None]:
#pip install openai==0.28


Collecting openai==0.28
  Downloading openai-0.28.0-py3-none-any.whl (76 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.5/76.5 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: openai
Successfully installed openai-0.28.0


In [20]:
# import gemini

# def calculate_score(question_text, answer_text):
#     try:
#         # Concatenate the question and answer text
#         combined_text = f"Question: {question_text}\nAnswer: {answer_text}"

#         prompt = f"Given the Question: {question_text}\n\nAnd the answer text: {answer_text}\n\nEvaluate the student's score. Provide a score in percentage."

#         response = gemini.generate(prompt, model_name="gemini-pro")

#         # Extract the score from the response (assuming the score format)
#         score_str = response.split("Your score: ")[-1].strip()
#         score = score_str

#         # Return the score
#         return {"score": score}

#     except Exception as e:
#         print(f"Error processing request: {e}")
#         return {"error": "An error occurred while processing the request"}


# question_text = "What is Tokenization?"
# answer_text = "Tokenization is the process of breaking down a piece of text into smaller units, which are typically words or subwords. These smaller units are called tokens. Tokenization is a fundamental step in natural language processing (NLP) tasks because it allows computers to process and analyze text at a granular level."

# result = calculate_score(question_text, answer_text)
# print(result)

ModuleNotFoundError: No module named 'gemini'

In [None]:
pip install gemini

Collecting gemini
  Using cached gemini-0.30.2-py3-none-any.whl
Collecting inheritance>=0.1.3 (from gemini)
  Using cached inheritance-0.1.5-py3-none-any.whl
Collecting geneimpacts>=0.1.3 (from gemini)
  Using cached geneimpacts-0.3.7-py3-none-any.whl
Collecting pysam>=0.6 (from gemini)
  Downloading pysam-0.22.1-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (1.5 kB)
Collecting cyvcf2>=0.7.2 (from gemini)
  Downloading cyvcf2-0.30.28-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.5 kB)
Collecting pybedtools>=0.6.2 (from gemini)
  Using cached pybedtools-0.10.0-cp310-cp310-linux_x86_64.whl
Collecting bottle>=0.11.6 (from gemini)
  Downloading bottle-0.12.25-py3-none-any.whl.metadata (1.8 kB)
Collecting ipyparallel>=4.0 (from gemini)
  Downloading ipyparallel-8.8.0-py3-none-any.whl.metadata (6.4 kB)
Collecting ipython-cluster-helper>=0.5.1 (from gemini)
  Using cached ipython_cluster_helper-0.6.4-py2.py3-none-any.whl
Collecting bx-python>=0.7.1 (from gemini)
  Do

In [None]:
sudo apt install libcairo2-dev pkg-config python3-dev

SyntaxError: invalid syntax (<ipython-input-19-dd092a4aa101>, line 1)