In [None]:
!pip install gensim scikit-learn rouge-score nltk

Collecting gensim
  Downloading gensim-4.4.0-cp312-cp312-win_amd64.whl.metadata (8.6 kB)
Collecting nltk
  Downloading nltk-3.9.2-py3-none-any.whl.metadata (3.2 kB)
Collecting smart_open>=1.8.1 (from gensim)
  Downloading smart_open-7.5.0-py3-none-any.whl.metadata (24 kB)
Collecting regex>=2021.8.3 (from nltk)
  Downloading regex-2025.11.3-cp312-cp312-win_amd64.whl.metadata (41 kB)
Collecting tqdm (from nltk)
  Downloading tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Collecting wrapt (from smart_open>=1.8.1->gensim)
  Downloading wrapt-2.0.1-cp312-cp312-win_amd64.whl.metadata (9.2 kB)
Downloading gensim-4.4.0-cp312-cp312-win_amd64.whl (24.4 MB)
   ---------------------------------------- 0.0/24.4 MB ? eta -:--:--
   -------- ------------------------------- 5.2/24.4 MB 31.9 MB/s eta 0:00:01
   ----------------------- ---------------- 14.2/24.4 MB 37.0 MB/s eta 0:00:01
   ---------------------------------------  24.4/24.4 MB 41.7 MB/s eta 0:00:01
   -------------------------------------

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
chromadb 0.6.3 requires tenacity>=8.2.3, which is not installed.
chromadb 0.6.3 requires typer>=0.9.0, which is not installed.
datasets 4.3.0 requires dill<0.4.1,>=0.3.0, which is not installed.
datasets 4.3.0 requires fsspec[http]<=2025.9.0,>=2023.1.0, which is not installed.
huggingface-hub 0.36.0 requires fsspec>=2023.5.0, which is not installed.
openai-whisper 20250625 requires more-itertools, which is not installed.
openai-whisper 20250625 requires numba, which is not installed.
opentelemetry-api 1.30.0 requires importlib-metadata<=8.5.0,>=6.0, which is not installed.
ragas 0.3.8 requires appdirs, which is not installed.
ragas 0.3.8 requires networkx, which is not installed.
ragas 0.3.8 requires typer, which is not installed.
tensorflow-intel 2.16.1 requires h5py>=3.10.0, which is not installed.
deprecated 1.

In [None]:
import gensim.downloader as api
from gensim.models import KeyedVectors
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from rouge_score import rouge_scorer
from nltk.translate.bleu_score import sentence_bleu
from nltk.tokenize import word_tokenize
import nltk
nltk.download('punkt', quiet=True)  # Tokenization
import warnings
warnings.filterwarnings('ignore')  # Suppress Gensim warnings

def load_text(file_path):
    """Load text from file."""
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            return f.read().strip()
    except FileNotFoundError:
        print(f"Error: File {file_path} not found. Create it with sample text.")
        return ""

def get_word2vec_embedding(text, model, unknown_vector=np.zeros(300)):
    """Compute averaged Word2Vec embedding for a text."""
    tokens = word_tokenize(text.lower())
    valid_embeddings = []
    for token in tokens:
        if token in model:
            valid_embeddings.append(model[token])
        else:
            valid_embeddings.append(unknown_vector)  # Fallback for OOV
    if valid_embeddings:
        return np.mean(valid_embeddings, axis=0)
    return unknown_vector

def semantic_similarity_gensim(human_text, generated_text, model_name='word2vec-google-news-300'):
    """Compute semantic similarity using averaged Word2Vec embeddings."""
    # Load pre-trained Word2Vec (downloads ~1.5GB first time)
    try:
        model = api.load(model_name)
    except Exception:
        print(f"Error loading {model_name}. Ensure Gensim is installed and try again.")
        return 0.0
    
    human_emb = get_word2vec_embedding(human_text, model)
    gen_emb = get_word2vec_embedding(generated_text, model)
    
    # Cosine similarity (reshape for sklearn)
    sim_matrix = cosine_similarity(human_emb.reshape(1, -1), gen_emb.reshape(1, -1))
    sim = sim_matrix[0][0]
    return sim

In [None]:
human_file = 'human.txt'
generated_file = 'generated.txt'
    
human_text = load_text(human_file)
generated_text = load_text(generated_file)
    
if not human_text or not generated_text:
    print("Error: Please create 'human.txt' and 'generated.txt' with your texts.")
    exit(1)
    
print("Loaded Texts (first 100 chars):")
print(f"Human: {human_text[:100]}...")
print(f"Generated: {generated_text[:100]}...")
print("\n" + "="*50)
    
# Gensim Word2Vec similarity (semantic via word vectors)
sem_sim = semantic_similarity_gensim(human_text, generated_text)
print(f"Semantic Similarity (Word2Vec Cosine): {sem_sim:.4f}")
    
if sem_sim > 0.8:
    print("\nInsight: High semantic overlap—texts share contextual word meanings.")
elif sem_sim > 0.5:
    print("\nInsight: Moderate alignment—related vocabulary, but topics diverge slightly.")
else:
    print("\nInsight: Low similarity—distinct semantic fields; potential content mismatch.")

Loaded Texts (first 100 chars):
Human: So let's start with the feedback. All right, let's go through it. So I would probably estimate your ...
Generated: Part 1：

1. **Overall Band Score**: 7.0 – The candidate speaks at length with generally good fluency...

Semantic Similarity (Word2Vec Cosine): 0.8127

Lexical Alignment:
BLEU Score: 0.0000
ROUGE-L F1: 0.1058

Overall Alignment (threshold=0.7): Aligned (score=0.8127)

Insight: High semantic overlap—texts share contextual word meanings.
