In [1]:
import json
import torch
from transformers import AutoTokenizer, AutoModel
from sklearn.linear_model import LinearRegression
from sentence_transformers import SentenceTransformer
import numpy as np
from tqdm import tqdm






In [2]:
import json

dataset = []

with open("training_data.jsonl", "r", encoding="utf-8") as f:
    for line in f:
        line = line.strip()
        if not line:
            continue
        dataset.append(json.loads(line))

len(dataset)


2

In [3]:
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")

In [4]:
def get_embedding(text):
    tokens = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        output = model(**tokens).last_hidden_state.mean(dim=1)
    return output.squeeze().numpy()

X = []
y = []

for item in tqdm(dataset):
    emb = get_embedding(item["answer"])
    X.append(emb)
    y.append(item["score"])

X = np.array(X)
y = np.array(y)


100%|██████████| 2/2 [00:01<00:00,  1.44it/s]


In [5]:
regressor = LinearRegression()
regressor.fit(X, y)
print("Model trained successfully.")

Model trained successfully.


In [6]:
np.savez("scoring_model_weights.npz", weights=regressor.coef_, bias=regressor.intercept_)
print("Model saved!")


Model saved!


In [7]:
#im trying to geberate sentence embeddings
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def score_answer(user_answer, ideal_answer):
    user_emb = model.encode([user_answer])
    ideal_emb = model.encode([ideal_answer])

    sim = cosine_similarity(user_emb, ideal_emb)[0][0]  

    # Convert similarity → score (0–100)
    score = (sim + 1) * 50  
    return round(score)


In [8]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

def score_answer(user_answer, ideal_answer):
    
    u = model.encode([user_answer])[0]
    i = model.encode([ideal_answer])[0]

    # Cosine similarity → 0 to 100
    similarity = cosine_similarity([u], [i])[0][0]
    score = max(0, min(100, similarity * 100))

    
    feedback = []
    if len(user_answer.split()) < 20:
        feedback.append("Your answer is too short. Add more depth.")
    if similarity < 0.6:
        feedback.append("Your answer is missing key concepts.")
    if similarity >= 0.8:
        feedback.append("Good response! Well aligned with expected concepts.")

    return {
        "score": round(score),
        "feedback": feedback,
        "ideal_answer": ideal_answer
    }


In [9]:
#testing

test_question = "What is polymorphism?"
ideal = "Polymorphism allows objects to take many forms by overriding methods."
user = "It means a function behaves differently depending on object."

score_answer(user, ideal)


{'score': 30,
 'feedback': ['Your answer is too short. Add more depth.',
  'Your answer is missing key concepts.'],
 'ideal_answer': 'Polymorphism allows objects to take many forms by overriding methods.'}