# 
## 1. Comparing the Performance of Text Embedding Models

In [None]:
# -*- coding: utf-8 -*-

"""
Word Similarity Models Evaluation Framework (Fixed Version)
Evaluate the performance of multiple word similarity models on various datasets
"""

import os
import numpy as np
import pandas as pd
from scipy.stats import pearsonr, spearmanr
from sklearn.utils import Bunch
from sklearn.metrics.pairwise import cosine_similarity
import warnings
warnings.filterwarnings('ignore')

# Import your data loading functions
from load_datasets import *

class ModelEvaluator:
    """Word Similarity Model Evaluator"""
    
    def __init__(self):
        self.models = {}
        self.results = {}
        
    def load_gensim_model(self, model_name):
        """Load Gensim models (Word2Vec, GloVe, etc.)"""
        try:
            print(f"Loading {model_name}...")
            import gensim.downloader as gensim_api
            model = gensim_api.load(MODELS[model_name])
            return model
        except Exception as e:
            print(f"Error loading {model_name}: {e}")
            return None
    
    def load_sentence_transformer_model(self, model_name):
        """Load Sentence Transformer models"""
        try:
            print(f"Loading {model_name}...")
            from sentence_transformers import SentenceTransformer
            model = SentenceTransformer(MODELS[model_name])
            return model
        except Exception as e:
            print(f"Error loading {model_name}: {e}")
            return None
    
    def load_bert_model_simple(self, model_name):
        """Load BERT models using a simplified method"""
        try:
            print(f"Loading {model_name} with simple method...")
            # Attempt to wrap BERT models using sentence-transformers
            from sentence_transformers import SentenceTransformer
            
            # For BERT models, we can use sentence-transformers as a wrapper
            if "bert" in model_name.lower():
                model = SentenceTransformer(MODELS[model_name])
                return model
            return None
        except Exception as e:
            print(f"Error loading {model_name} with simple method: {e}")
            return None
    
    def compute_gensim_similarity(self, model, word1, word2):
        """Compute word similarity using Gensim models"""
        try:
            return model.similarity(word1, word2)
        except (KeyError, ValueError):
            return np.nan
    
    def compute_sentence_transformer_similarity(self, model, word1, word2):
        """Compute word similarity using Sentence Transformer models"""
        try:
            embeddings = model.encode([word1, word2])
            similarity = cosine_similarity([embeddings[0]], [embeddings[1]])[0][0]
            return float(similarity)
        except Exception as e:
            return np.nan
    
    def load_all_models(self):
        """Load all models"""
        # Model configurations
        model_configs = {
            # Gensim models
            "Glove": {"type": "gensim", "model_key": "glove-wiki-gigaword-300"},
            "Word2Vec": {"type": "gensim", "model_key": "word2vec-google-news-300"},
            
            # Sentence Transformer models
            "mpnet-base-v2": {"type": "sentence_transformer", "model_key": "sentence-transformers/all-mpnet-base-v2"},
            "distilroberta-v1": {"type": "sentence_transformer", "model_key": "sentence-transformers/all-distilroberta-v1"},
            "defsent-roberta": {"type": "sentence_transformer", "model_key": "sentence-transformers/all-roberta-large-v1"},
            "sentence-t5-large": {"type": "sentence_transformer", "model_key": "sentence-transformers/sentence-t5-large"},
            
            # Simplified BERT models (wrapped via sentence-transformers)
            "bert-base": {"type": "bert_simple", "model_key": "bert-base-uncased"},
            "bert-large": {"type": "bert_simple", "model_key": "bert-large-uncased"},
        }
        
        for model_name, config in model_configs.items():
            try:
                if config["type"] == "gensim":
                    model = self.load_gensim_model_by_key(config["model_key"])
                    if model is not None:
                        self.models[model_name] = {"model": model, "type": "gensim"}
                
                elif config["type"] == "sentence_transformer":
                    model = self.load_sentence_transformer_model_by_key(config["model_key"])
                    if model is not None:
                        self.models[model_name] = {"model": model, "type": "sentence_transformer"}
                
                elif config["type"] == "bert_simple":
                    model = self.load_bert_model_simple_by_key(config["model_key"])
                    if model is not None:
                        self.models[model_name] = {"model": model, "type": "sentence_transformer"}
                        
            except Exception as e:
                print(f"Failed to load {model_name}: {e}")
                continue
    
    def load_gensim_model_by_key(self, model_key):
        """Load Gensim models by model key (enhanced version)"""
        try:
            import gensim.downloader as gensim_api
            
            # Check if the model has been downloaded
            if model_key not in gensim_api.info()['models']:
                print(f"Model {model_key} not found in gensim repository. Downloading...")
            
            # Load the model and print vocabulary information
            model = gensim_api.load(model_key)
            print(f"Loaded gensim model: {model_key}")
            print(f"  Vocabulary size: {len(model.key_to_index)}")
            print(f"  Sample words: {list(model.key_to_index.keys())[:5]}")
            return model
        except Exception as e:
            print(f"Error loading gensim model {model_key}: {e}")
            return None
    
    def load_sentence_transformer_model_by_key(self, model_key):
        """Load Sentence Transformer models by model key"""
        try:
            from sentence_transformers import SentenceTransformer
            return SentenceTransformer(model_key)
        except Exception as e:
            print(f"Error loading sentence transformer {model_key}: {e}")
            return None
    
    def load_bert_model_simple_by_key(self, model_key):
        """Load BERT models by model key (simplified version)"""
        try:
            from sentence_transformers import SentenceTransformer
            return SentenceTransformer(model_key)
        except Exception as e:
            print(f"Error loading BERT model {model_key}: {e}")
            return None
    
    def evaluate_on_dataset(self, dataset_name, dataset_loader_func):
        """Evaluate all models on a specific dataset"""
        print(f"\nEvaluating on {dataset_name}...")
        
        # Load dataset
        try:
            data = dataset_loader_func()
            word_pairs = data.X
            human_scores = data.y
        except Exception as e:
            print(f"Error loading dataset {dataset_name}: {e}")
            return
        
        dataset_results = {}
        
        for model_name, model_info in self.models.items():
            print(f"  Evaluating {model_name}...")
            
            model_scores = []
            valid_indices = []
            
            for i, (word1, word2) in enumerate(word_pairs):
                word1, word2 = str(word1).strip(), str(word2).strip()
                
                if model_info["type"] == "gensim":
                    similarity = self.compute_gensim_similarity(
                        model_info["model"], word1, word2
                    )
                elif model_info["type"] == "sentence_transformer":
                    similarity = self.compute_sentence_transformer_similarity(
                        model_info["model"], word1, word2
                    )
                else:
                    similarity = np.nan
                
                if not np.isnan(similarity):
                    model_scores.append(similarity)
                    valid_indices.append(i)
            
            if len(model_scores) > 0:
                # Filter valid human scores
                valid_human_scores = [human_scores[i] for i in valid_indices]
                
                # Calculate correlations
                try:
                    pearson_corr, pearson_p = pearsonr(model_scores, valid_human_scores)
                    spearman_corr, spearman_p = spearmanr(model_scores, valid_human_scores)
                except:
                    pearson_corr = spearman_corr = np.nan
                
                dataset_results[model_name] = {
                    "pearson": pearson_corr,
                    "spearman": spearman_corr,
                    "coverage": len(model_scores) / len(word_pairs),
                    "n_pairs": len(model_scores)
                }
                
                print(f"    Pearson: {pearson_corr:.3f}, Spearman: {spearman_corr:.3f}, Coverage: {len(model_scores)}/{len(word_pairs)}")
            else:
                print(f"    No valid predictions for {model_name}")
                dataset_results[model_name] = {
                    "pearson": np.nan,
                    "spearman": np.nan,
                    "coverage": 0.0,
                    "n_pairs": 0
                }
        
        self.results[dataset_name] = dataset_results
    
    def run_full_evaluation(self):
        """Run the full evaluation"""
        print("Starting model evaluation...")
        
        # Load all models
        self.load_all_models()
        print(f"Successfully loaded {len(self.models)} models: {list(self.models.keys())}")
        
        if len(self.models) == 0:
            print("No models were loaded successfully. Please check your environment.")
            return
        
        # Define datasets and their corresponding loader functions
        datasets = {
            # "SimLex999": fetch_SimLex999,
            "WordSim353": lambda: fetch_WS353("all"),
            "WordSim353-sim": lambda: fetch_WS353("similarity"),
            "WordSim353-rel": lambda: fetch_WS353("relatedness"),
            "MEN-dev": lambda: fetch_MEN("dev"),
            "MEN-test": lambda: fetch_MEN("test"),
            "MEN": lambda: fetch_MEN("all"),
            "RG65": fetch_RG65,
            "SCWS": fetch_SCWS,  
            "SimVerb3500": lambda: fetch_SimVerb3500("all"), 
            "SimVerb3500-dev": lambda: fetch_SimVerb3500("dev"),
            "SimVerb3500-test": lambda: fetch_SimVerb3500("test"), 
        }
        
        # Attempt to add more datasets (if available)
        """
        try:
            datasets["SimVerb3500"] = lambda: fetch_SimVerb3500("all")
        except:
            print("SimVerb3500 dataset not available")
            
        try:
            datasets["SCWS"] = fetch_SCWS
        except:
            print("SCWS dataset not available")
        """
        # Evaluate on each dataset
        for dataset_name, loader_func in datasets.items():
            try:
                self.evaluate_on_dataset(dataset_name, loader_func)
            except Exception as e:
                print(f"Error evaluating {dataset_name}: {e}")
                continue
    
    def print_results_summary(self):
        """Print results summary"""
        if not self.results:
            print("No results to display!")
            return
        
        print("\n" + "="*80)
        print("EVALUATION RESULTS SUMMARY")
        print("="*80)
        
        # Create results DataFrame
        all_results = []
        
        for dataset_name, dataset_results in self.results.items():
            for model_name, metrics in dataset_results.items():
                all_results.append({
                    "Dataset": dataset_name,
                    "Model": model_name,
                    "Pearson": metrics.get("pearson", np.nan),
                    "Spearman": metrics.get("spearman", np.nan),
                    "Coverage": metrics.get("coverage", 0),
                    "N_Pairs": metrics.get("n_pairs", 0)
                })
        
        results_df = pd.DataFrame(all_results)
        
        # Display results grouped by dataset
        for dataset in results_df['Dataset'].unique():
            dataset_df = results_df[results_df['Dataset'] == dataset].copy()
            dataset_df = dataset_df.sort_values('Spearman', ascending=False, na_position='last')
            
            print(f"\n{dataset}:")
            print("-" * 70)
            print("Model".ljust(20), "Pearson".rjust(8), "Spearman".rjust(8), 
                  "Coverage".rjust(10), "N_Pairs".rjust(8))
            print("-" * 70)
            
            for _, row in dataset_df.iterrows():
                pearson_str = f"{row['Pearson']:.3f}" if not pd.isna(row['Pearson']) else "N/A"
                spearman_str = f"{row['Spearman']:.3f}" if not pd.isna(row['Spearman']) else "N/A"
                coverage_str = f"{row['Coverage']:.2%}"
                
                print(f"{row['Model']:<20} {pearson_str:>8} {spearman_str:>8} {coverage_str:>10} {row['N_Pairs']:>8}")
    
    def save_detailed_results(self, filepath="model_evaluation_results.csv"):
        """Save detailed results to a CSV file"""
        if not self.results:
            print("No results to save!")
            return
        
        all_results = []
        for dataset_name, dataset_results in self.results.items():
            for model_name, metrics in dataset_results.items():
                all_results.append({
                    "Dataset": dataset_name,
                    "Model": model_name,
                    "Pearson_Correlation": metrics.get("pearson", np.nan),
                    "Spearman_Correlation": metrics.get("spearman", np.nan),
                    "Coverage": metrics.get("coverage", 0),
                    "Valid_Pairs": metrics.get("n_pairs", 0)
                })
        
        results_df = pd.DataFrame(all_results)
        results_df.to_csv(filepath, index=False)
        print(f"\nDetailed results saved to: {filepath}")
        
        # Create pivot table showing model rankings
        pivot_spearman = results_df.pivot(index='Model', columns='Dataset', values='Spearman_Correlation')
        
        # Calculate average performance (ignoring NaN values)
        pivot_spearman['Average'] = pivot_spearman.mean(axis=1, skipna=True)
        pivot_spearman = pivot_spearman.sort_values('Average', ascending=False, na_position='last')
        
        print("\nSpearman Correlation Matrix (sorted by average performance):")
        print(pivot_spearman.round(3))
        
        return results_df

def main():
    """Main function"""
    # Check dependencies
    missing_deps = []
    
    try:
        import gensim
    except ImportError:
        missing_deps.append("gensim")
    
    try:
        import sentence_transformers
    except ImportError:
        missing_deps.append("sentence-transformers")
    
    if missing_deps:
        print(f"Missing dependencies: {missing_deps}")
        print("Please install with: pip install " + " ".join(missing_deps))
        return None, None
    
    evaluator = ModelEvaluator()
    
    # Run full evaluation
    evaluator.run_full_evaluation()
    
    # Display results
    evaluator.print_results_summary()
    
    # Save results
    results_df = evaluator.save_detailed_results()
    
    return evaluator, results_df

# Global model mapping (simplified version)
MODELS = {
    "glove-wiki-gigaword-300": "glove-wiki-gigaword-300",
    "word2vec-google-news-300": "word2vec-google-news-300", 
    "sentence-transformers/all-mpnet-base-v2": "sentence-transformers/all-mpnet-base-v2",
    "sentence-transformers/all-distilroberta-v1": "sentence-transformers/all-distilroberta-v1",
   "sentence-transformers/all-roberta-large-v1": "sentence-transformers/all-roberta-large-v1",
    "sentence-transformers/sentence-t5-large": "sentence-transformers/sentence-t5-large",
    "bert-base-uncased": "bert-base-uncased",
    "bert-large-uncased": "bert-large-uncased",
}

if __name__ == "__main__":
    evaluator, results = main()

## 2.  Multi-Relation Hyperbolic Embeddings

In [2]:
# -*- coding: utf-8 -*-
"""
Hyperbolic Embeddings Evaluation Framework (Optimized Version)
"""

import numpy as np
import pandas as pd
import torch
import re
import nltk
from nltk.corpus import wordnet as wn
from scipy.stats import pearsonr, spearmanr
from sklearn.utils import Bunch
import warnings
import os
import sys
import json
import logging
from collections import defaultdict
import traceback
from tqdm import tqdm
import glob

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

warnings.filterwarnings('ignore')

# Ensure WordNet data is downloaded
try:
    nltk.download('wordnet', quiet=True)
    nltk.download('omw-1.4', quiet=True)
    nltk.download('punkt', quiet=True)
    nltk.download('averaged_perceptron_tagger', quiet=True)
except Exception as e:
    logger.warning(f"Error downloading NLTK resources: {e}")

# Import data loading functions
try:
    from load_datasets import fetch_SimVerb3500, fetch_MEN, fetch_SimLex999, fetch_SCWS, fetch_WS353, fetch_RG65
except ImportError:
    logger.warning("load_datasets module not found. Using fallback implementations.")
    # Provide simple fallback implementations
    def fetch_SimVerb3500():
        return Bunch(X=np.array([], dtype=object), y=np.array([]))
    def fetch_MEN():
        return Bunch(X=np.array([], dtype=object), y=np.array([]))
    def fetch_SimLex999():
        return Bunch(X=np.array([], dtype=object), y=np.array([]))
    def fetch_SCWS():
        return Bunch(X=np.array([], dtype=object), y=np.array([]))
    def fetch_WS353():
        return Bunch(X=np.array([], dtype=object), y=np.array([]))
    def fetch_RG65():
        return Bunch(X=np.array([], dtype=object), y=np.array([]))

class HyperbolicEvaluator:
    """Hyperbolic Embeddings Evaluator (Optimized Version)"""
    
    def __init__(self, project_dir, device=None):
        self.models = {}
        self.results = {}
        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
        self.wordnet_pos_map = {
            'n': 'noun', 'v': 'verb', 'a': 'adjective', 'r': 'adverb', 
            's': 'adjective satellite'
        }
        self.project_dir = project_dir  # Store project root directory
        logger.info(f"Using device: {self.device}")
        logger.info(f"Project directory: {self.project_dir}")
        self.lemmatizer = nltk.stem.WordNetLemmatizer()
        self.synset_cache = {}
        self.embeddings_cache = {}
    
    def word_to_synset_ids(self, word, pos=None):
        """Map word to possible WordNet synset IDs (optimized version)"""
        cache_key = f"{word}:{pos}"
        if cache_key in self.synset_cache:
            return self.synset_cache[cache_key]
        
        # Convert POS tags to WordNet format
        wn_pos = None
        if pos:
            if pos.startswith('n'):
                wn_pos = wn.NOUN
            elif pos.startswith('v'):
                wn_pos = wn.VERB
            elif pos.startswith('a') or pos.startswith('s'):
                wn_pos = wn.ADJ
            elif pos.startswith('r'):
                wn_pos = wn.ADV
        
        # Get synsets
        synsets = wn.synsets(word, pos=wn_pos)
        
        # Extract 8-digit IDs
        synset_ids = []
        for synset in synsets:
            # Generate 8-digit ID from offset
            offset = str(synset.offset()).zfill(8)
            synset_ids.append(offset)
        
        # Cache results
        self.synset_cache[cache_key] = synset_ids
        return synset_ids
    
    def load_multirel_model(self, model_path, data_dir):
        """Load model trained from multirelational-poincare repository (hyperbolic or Euclidean)"""
        try:
            # Infer model type and dimension from filename
            filename = os.path.basename(model_path)
            if "poincare" in filename:
                model_type = "MuRP"
            elif "euclidean" in filename:
                model_type = "MuRE"
            else:
                model_type = "Unknown"
                
            # Extract dimension information
            match = re.search(r'_(\d+)\.pth$', filename)
            dimension = int(match.group(1)) if match else 0
            
            logger.info(f"Loading {model_type} model (dim={dimension}) from: {model_path}")
            
            # Add project source code directory to system path
            source_dir = os.path.join(self.project_dir, "多关系双曲嵌入/multirelational-poincare")
            if source_dir not in sys.path:
                sys.path.insert(0, source_dir)  # Add to beginning of path
            
            logger.info(f"Added to sys.path: {source_dir}")
            logger.info(f"Current sys.path: {sys.path}")
            
            try:
                from load_data import Data
                if model_type == "MuRP":
                    from model import MuRP
                else:
                    from model import MuRE
                logger.info("Successfully imported model modules")
            except ImportError as e:
                logger.error(f"Error importing model modules: {e}")
                logger.error(f"Current sys.path: {sys.path}")
                return None
            
            # Load data to get entity mapping
            try:
                d = Data(data_dir)
                entity_list = d.entities
                entity_id_map = {entity: idx for idx, entity in enumerate(entity_list)}
                logger.info(f"Loaded {len(entity_list)} entities from dataset")
            except Exception as e:
                logger.error(f"Error loading data: {e}")
                return None
            
            # Load model checkpoint
            try:
                if torch.cuda.is_available():
                    checkpoint = torch.load(model_path)
                else:
                    checkpoint = torch.load(model_path, map_location=torch.device('cpu'))
                
                # Create model instance
                if model_type == "MuRP":
                    # MuRP requires three parameters
                    model = MuRP(d, dimension, self.device)
                else:
                    # MuRE only requires two parameters
                    model = MuRE(d, dimension)
                
                # Load model state
                model.load_state_dict(checkpoint)
                model.eval()
                model.to(self.device)
                
                # Extract entity embeddings
                with torch.no_grad():
                    # Get embeddings based on model type
                    if model_type == "MuRP":
                        entity_embeddings = model.Eh.weight.data.cpu().numpy()
                    else:
                        entity_embeddings = model.E.weight.data.cpu().numpy()
                
                logger.info(f"Extracted entity embeddings of shape: {entity_embeddings.shape}")
            except Exception as e:
                logger.error(f"Error loading model state: {e}")
                traceback.print_exc()
                return None
            
            # Create embedding cache
            self.embeddings_cache = {
                entity: entity_embeddings[idx] 
                for entity, idx in entity_id_map.items()
            }
            
            return {
                "entity_embeddings": entity_embeddings,
                "entity_id_map": entity_id_map,
                "entity_list": entity_list,
                "type": model_type,
                "dimension": dimension,
                "embeddings_cache": self.embeddings_cache
            }
        except Exception as e:
            logger.error(f"Error loading model: {e}")
            traceback.print_exc()
            return None
    
    def compute_poincare_distance(self, u, v, epsilon=1e-5):
        """Compute Poincaré hyperbolic distance (optimized version)"""
        try:
            # Convert to numpy arrays
            u = np.asarray(u, dtype=np.float32)
            v = np.asarray(v, dtype=np.float32)
            
            # Calculate Euclidean norms
            norm_u_sq = np.sum(u**2)
            norm_v_sq = np.sum(v**2)
            norm_diff_sq = np.sum((u - v)**2)
            
            # Avoid numerical instability
            denominator = max((1 - norm_u_sq) * (1 - norm_v_sq), epsilon)
            
            # Calculate hyperbolic distance
            inner_expr = 1 + 2 * norm_diff_sq / denominator
            
            # Ensure inner expression is greater than 1
            if inner_expr <= 1:
                inner_expr = 1 + epsilon
                
            return np.arccosh(inner_expr)
        except Exception as e:
            logger.warning(f"Error computing Poincaré distance: {e}")
            return np.linalg.norm(u - v)
    
    def compute_euclidean_similarity(self, u, v):
        """Compute cosine similarity for Euclidean embeddings"""
        u = np.asarray(u, dtype=np.float32)
        v = np.asarray(v, dtype=np.float32)
        norm_u = np.linalg.norm(u)
        norm_v = np.linalg.norm(v)
        
        if norm_u == 0 or norm_v == 0:
            return 0.0
        
        cosine_sim = np.dot(u, v) / (norm_u * norm_v)
        return (cosine_sim + 1) / 2  # Normalize to [0,1] range
    
    def compute_similarity_batch(self, model_info, words1, words2, pos_list=None):
        """Batch compute similarities (supports both hyperbolic and Euclidean models)"""
        entity_id_map = model_info["entity_id_map"]
        entity_list = model_info["entity_list"]
        embeddings = model_info["entity_embeddings"]
        embeddings_cache = model_info.get("embeddings_cache", {})
        model_type = model_info["type"]
        
        # Batch get synset IDs
        all_synsets1 = []
        all_synsets2 = []
        for i, (word1, word2) in enumerate(zip(words1, words2)):
            pos = pos_list[i] if pos_list and i < len(pos_list) else None
            all_synsets1.append(self.word_to_synset_ids(word1, pos))
            all_synsets2.append(self.word_to_synset_ids(word2, pos))
        
        # Batch lookup embeddings
        similarities = np.full(len(words1), np.nan)
        valid_indices = []
        
        for i, (synsets1, synsets2) in enumerate(zip(all_synsets1, all_synsets2)):
            if not synsets1 or not synsets2:
                continue
                
            # Find valid synset IDs
            valid_synsets1 = [sid for sid in synsets1 if sid in entity_list]
            valid_synsets2 = [sid for sid in synsets2 if sid in entity_list]
            
            if not valid_synsets1 or not valid_synsets2:
                continue
                
            # Try to get embeddings from cache
            emb1 = None
            for sid in valid_synsets1:
                if sid in embeddings_cache:
                    emb1 = embeddings_cache[sid]
                    break
                elif sid in entity_id_map:
                    idx = entity_id_map[sid]
                    emb = embeddings[idx]
                    embeddings_cache[sid] = emb
                    emb1 = emb
                    break
            
            emb2 = None
            for sid in valid_synsets2:
                if sid in embeddings_cache:
                    emb2 = embeddings_cache[sid]
                    break
                elif sid in entity_id_map:
                    idx = entity_id_map[sid]
                    emb = embeddings[idx]
                    embeddings_cache[sid] = emb
                    emb2 = emb
                    break
            
            if emb1 is not None and emb2 is not None:
                # Calculate similarity based on model type
                if model_type == "MuRP":
                    # Hyperbolic model: use inverse of Poincaré distance as similarity
                    distance = self.compute_poincare_distance(emb1, emb2)
                    similarity = 1 / (1 + distance)
                else:
                    # Euclidean model: use cosine similarity
                    similarity = self.compute_euclidean_similarity(emb1, emb2)
                
                similarities[i] = similarity
                valid_indices.append(i)
        
        return similarities, valid_indices
    
    def load_models(self, model_dir, data_dir):
        """Load all models from specified directory"""
        logger.info(f"Loading models from: {model_dir}")
        
        # Find all model files
        model_files = glob.glob(os.path.join(model_dir, "*.pth"))
        logger.info(f"Found {len(model_files)} model files")
        
        if not model_files:
            logger.error("No model files found!")
            return
        
        # Load each model
        for model_path in model_files:
            model_info = self.load_multirel_model(model_path, data_dir)
            if model_info:
                # Generate unique model name (type-dimension)
                model_name = f"{model_info['type']}-{model_info['dimension']}D"
                self.models[model_name] = model_info
                logger.info(f"Loaded model: {model_name}")
            else:
                logger.error(f"Failed to load model: {model_path}")
    
    def evaluate_on_dataset(self, dataset_name, dataset_loader_func):
        """Evaluate models on specific dataset (optimized version)"""
        logger.info(f"\n{'='*50}\nEvaluating on {dataset_name}\n{'='*50}")
        
        # Load dataset
        try:
            data = dataset_loader_func()
            word_pairs = data.X
            human_scores = data.y
            
            # Extract POS information (if available)
            pos_info = []
            if hasattr(data, 'pos') and data.pos is not None:
                pos_info = data.pos
                logger.info(f"Loaded POS tags for {len(pos_info)} pairs")
            
            logger.info(f"Loaded {len(word_pairs)} word pairs")
        except Exception as e:
            logger.error(f"Error loading dataset {dataset_name}: {e}")
            return
        
        dataset_results = {}
        
        for model_name, model_info in self.models.items():
            logger.info(f"Evaluating {model_name} on {dataset_name}...")
            
            words1 = [str(pair[0]).strip() for pair in word_pairs]
            words2 = [str(pair[1]).strip() for pair in word_pairs]
            
            # Batch compute similarities
            model_scores, valid_indices = self.compute_similarity_batch(
                model_info, words1, words2, pos_info
            )
            
            valid_model_scores = model_scores[valid_indices]
            valid_human_scores = human_scores[valid_indices]
            
            coverage = len(valid_indices) / len(word_pairs)
            
            # Calculate coverage
            missing_count = len(word_pairs) - len(valid_indices)
            if missing_count > 0:
                logger.info(f"  Missing {missing_count} word pairs ({coverage:.2%} coverage)")
            
            # Calculate correlation metrics
            if len(valid_model_scores) > 5:  # Need at least 5 valid data pairs
                try:
                    pearson_corr, _ = pearsonr(valid_model_scores, valid_human_scores)
                    spearman_corr, _ = spearmanr(valid_model_scores, valid_human_scores)
                except Exception as e:
                    logger.error(f"Error calculating correlations: {e}")
                    pearson_corr = spearman_corr = np.nan
                
                dataset_results[model_name] = {
                    "pearson": pearson_corr,
                    "spearman": spearman_corr,
                    "coverage": coverage,
                    "n_pairs": len(valid_model_scores),
                    "model_type": model_info["type"],
                    "dimension": model_info["dimension"]
                }
                logger.info(f"  Pearson: {pearson_corr:.4f}, Spearman: {spearman_corr:.4f}, Coverage: {coverage:.2%}")
            else:
                logger.warning(f"  Insufficient valid predictions: {len(valid_model_scores)}/{len(word_pairs)}")
                dataset_results[model_name] = {
                    "pearson": np.nan, "spearman": np.nan,
                    "coverage": coverage, "n_pairs": len(valid_model_scores),
                    "model_type": model_info["type"],
                    "dimension": model_info["dimension"]
                }
        
        self.results[dataset_name] = dataset_results
    
    def run_evaluation(self, datasets):
        """Run evaluation on specified datasets (optimized version)"""
        logger.info("Starting embeddings evaluation...")
        
        if len(self.models) == 0:
            logger.error("No models loaded. Aborting evaluation.")
            return
        
        # Evaluate each dataset
        for dataset_name, loader_func in datasets.items():
            try:
                self.evaluate_on_dataset(dataset_name, loader_func)
            except Exception as e:
                logger.error(f"Error evaluating {dataset_name}: {e}")
                continue
    
    def print_results_summary(self):
        """Print results summary (optimized version)"""
        if not self.results:
            logger.info("No results to display!")
            return None
        
        print("\n" + "="*80)
        print("EMBEDDINGS EVALUATION RESULTS (MuRP vs MuRE)")
        print("="*80)
        
        # Organize results into DataFrame
        all_results = []
        for dataset_name, dataset_results in self.results.items():
            for model_name, metrics in dataset_results.items():
                all_results.append({
                    "Dataset": dataset_name,
                    "Model": model_name,
                    "Type": metrics.get("model_type", "Unknown"),
                    "Dimension": metrics.get("dimension", 0),
                    "Pearson": metrics.get("pearson", np.nan),
                    "Spearman": metrics.get("spearman", np.nan),
                    "Coverage": metrics.get("coverage", 0),
                    "N_Pairs": metrics.get("n_pairs", 0)
                })
        
        results_df = pd.DataFrame(all_results)
        
        # Display by dataset and model type
        for dataset in results_df['Dataset'].unique():
            dataset_df = results_df[results_df['Dataset'] == dataset]
            print(f"\n{dataset}:")
            print("-" * 90)
            print(f"{'Model':<15} {'Type':<8} {'Dim':>4} {'Pearson':>8} {'Spearman':>8} {'Coverage':>10} {'N_Pairs':>8}")
            print("-" * 90)
            
            # Sort by dimension
            dataset_df = dataset_df.sort_values(by=['Type', 'Dimension'])
            
            for _, row in dataset_df.iterrows():
                pearson_str = f"{row['Pearson']:.4f}" if not pd.isna(row['Pearson']) else "N/A"
                spearman_str = f"{row['Spearman']:.4f}" if not pd.isna(row['Spearman']) else "N/A"
                coverage_str = f"{row['Coverage']:.2%}"
                print(f"{row['Model']:<15} {row['Type']:<8} {row['Dimension']:>4} {pearson_str:>8} {spearman_str:>8} {coverage_str:>10} {row['N_Pairs']:>8}")
                
        return results_df
    
    def save_results(self, filepath="dimensionality_results.csv"):
        """Save results to CSV (optimized version)"""
        if not self.results:
            logger.info("No results to save!")
            return None
        
        all_results = []
        for dataset_name, dataset_results in self.results.items():
            for model_name, metrics in dataset_results.items():
                all_results.append({
                    "Dataset": dataset_name,
                    "Model": model_name,
                    "Model_Type": metrics.get("model_type", "Unknown"),
                    "Dimension": metrics.get("dimension", 0),
                    "Pearson_Correlation": metrics.get("pearson", np.nan),
                    "Spearman_Correlation": metrics.get("spearman", np.nan),
                    "Coverage": metrics.get("coverage", 0),
                    "Valid_Pairs": metrics.get("n_pairs", 0)
                })
        
        results_df = pd.DataFrame(all_results)
        
        # Create save directory (if it doesn't exist)
        os.makedirs(os.path.dirname(filepath), exist_ok=True)
        results_df.to_csv(filepath, index=False)
        logger.info(f"Detailed results saved to: {filepath}")
        
        # Save dimensionality analysis summary
        dim_summary = results_df.groupby(['Model_Type', 'Dimension']).agg({
            'Pearson_Correlation': 'mean',
            'Spearman_Correlation': 'mean',
            'Coverage': 'mean'
        }).reset_index()
        dim_summary_file = "dimensionality_summary.csv"
        dim_summary.to_csv(dim_summary_file, index=False)
        logger.info(f"Dimensionality summary saved to: {dim_summary_file}")
        
        return results_df

def main():
    """Main function: run evaluation process (optimized version)"""
    # Check dependencies
    missing_deps = []
    required = ["torch", "scipy", "sklearn", "pandas", "numpy", "nltk"]
    for dep in required:
        try:
            __import__(dep)
        except ImportError:
            missing_deps.append(dep)
    
    if missing_deps:
        print(f"Missing dependencies: {missing_deps}")
        print("Install with: pip install " + " ".join(missing_deps))
        return None, None
    
    # Set project root directory
    project_dir = "/Users/chouyinghan/my_mathlib_project/Demo_in_Matrix"
    
    # Initialize evaluator, pass project root directory
    evaluator = HyperbolicEvaluator(project_dir)
    
    # Automatically construct paths
    model_dir = os.path.join(project_dir, "your_model_directory")
    data_dir = os.path.join(project_dir, "your_dataset_directory")

    # Check if model directory exists
    if not os.path.exists(model_dir):
        print(f"Model directory not found: {model_dir}")
        model_dir = input("Please enter the full path to the model directory: ")
        if not os.path.exists(model_dir):
            print("Invalid path. Exiting.")
            return
    
    # Check if data directory exists
    if not os.path.exists(data_dir):
        print(f"Data directory not found: {data_dir}")
        # Try other possible paths
        possible_data_dir = os.path.join(project_dir, "multirelational-poincare/data/WN18RR/")
        if os.path.exists(possible_data_dir):
            data_dir = possible_data_dir
            print(f"Using alternative data directory: {data_dir}")
        else:
            data_dir = input("Please enter the full path to the data directory: ")
            if not os.path.exists(data_dir):
                print("Invalid path. Exiting.")
                return
    
    # Batch load models
    evaluator.load_models(model_dir, data_dir)
    
    # Check if any models loaded successfully
    if not evaluator.models:
        logger.error("No models loaded successfully. Aborting evaluation.")
        return None, None
    
    # Define datasets to evaluate
    datasets = {
        # "SimVerb3500": lambda: fetch_SimVerb3500('all'),
        # "SimVerb3500-dev": lambda: fetch_SimVerb3500('dev'),
        "SimVerb3500-test": lambda: fetch_SimVerb3500('test'),
        # "MEN": lambda: fetch_MEN("all"),
        # "MEN-dev": lambda: fetch_MEN("dev"),
        # "MEN-test": lambda: fetch_MEN("test"),
        # "SimLex999": lambda: fetch_SimLex999(),
        # "SCWS": lambda: fetch_SCWS(),
        # "WS353": lambda: fetch_WS353("all"),
        # "RG65": fetch_RG65()
    }
    
    # Run evaluation
    evaluator.run_evaluation(datasets)
    results_df = evaluator.print_results_summary()
    
    if results_df is not None:
        # Save results to same directory
        results_file = os.path.join(model_dir, "evaluation_results.csv")
        evaluator.save_results(results_file)
        
        # Save dimensionality analysis summary
        summary_file = os.path.join(model_dir, "dimensionality_summary.csv")
        results_df.to_csv(summary_file, index=False)
        logger.info(f"Dimension summary saved to: {summary_file}")
    else:
        logger.warning("No results to save")
    
    return evaluator, results_df

if __name__ == "__main__":
    evaluator, results = main()

2025-08-23 20:10:13,094 - INFO - Using device: cpu
2025-08-23 20:10:13,094 - INFO - Project directory: /Users/chouyinghan/my_mathlib_project/Demo_in_Matrix


Model directory not found: /Users/chouyinghan/my_mathlib_project/Demo_in_Matrix/your_model_directory
Invalid path. Exiting.


TypeError: cannot unpack non-iterable NoneType object

## Improved version

In [None]:
# -*- coding: utf-8 -*-
"""
Hyperbolic Embeddings Evaluation Framework (Optimized Version)
"""

import numpy as np
import pandas as pd
import torch
import re
import nltk
from nltk.corpus import wordnet as wn
from scipy.stats import pearsonr, spearmanr
from sklearn.utils import Bunch
import warnings
import os
import sys
import json
import logging
from collections import defaultdict
import traceback
from tqdm import tqdm
import glob

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

warnings.filterwarnings('ignore')

# Ensure WordNet data is downloaded
try:
    nltk.download('wordnet', quiet=True)
    nltk.download('omw-1.4', quiet=True)
    nltk.download('punkt', quiet=True)
    nltk.download('averaged_perceptron_tagger', quiet=True)
except Exception as e:
    logger.warning(f"Error downloading NLTK resources: {e}")

# Import data loading functions
try:
    from load_datasets import fetch_SimVerb3500, fetch_MEN, fetch_SimLex999, fetch_SCWS, fetch_WS353, fetch_RG65
except ImportError:
    logger.warning("load_datasets module not found. Using fallback implementations.")
    # Provide simple fallback implementations
    def fetch_SimVerb3500():
        return Bunch(X=np.array([], dtype=object), y=np.array([]))
    def fetch_MEN():
        return Bunch(X=np.array([], dtype=object), y=np.array([]))
    def fetch_SimLex999():
        return Bunch(X=np.array([], dtype=object), y=np.array([]))
    def fetch_SCWS():
        return Bunch(X=np.array([], dtype=object), y=np.array([]))
    def fetch_WS353():
        return Bunch(X=np.array([], dtype=object), y=np.array([]))
    def fetch_RG65():
        return Bunch(X=np.array([], dtype=object), y=np.array([]))

class HyperbolicEvaluator:
    """Hyperbolic Embeddings Evaluator (Optimized Version)"""
    
    def __init__(self, project_dir, device=None):
        self.models = {}
        self.results = {}
        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
        self.wordnet_pos_map = {
            'n': 'noun', 'v': 'verb', 'a': 'adjective', 'r': 'adverb', 
            's': 'adjective satellite'
        }
        self.project_dir = project_dir  # Store project root directory
        logger.info(f"Using device: {self.device}")
        logger.info(f"Project directory: {self.project_dir}")
        self.lemmatizer = nltk.stem.WordNetLemmatizer()
        self.synset_cache = {}
        self.embeddings_cache = {}
    
    def word_to_synset_ids(self, word, pos=None):
        """Map word to possible WordNet synset IDs (optimized version)"""
        cache_key = f"{word}:{pos}"
        if cache_key in self.synset_cache:
            return self.synset_cache[cache_key]
        
        # Convert POS tags to WordNet format
        wn_pos = None
        if pos and isinstance(pos, str):  # Ensure pos is string
            if pos.startswith('n'):
                wn_pos = wn.NOUN
            elif pos.startswith('v'):
                wn_pos = wn.VERB
            elif pos.startswith('a') or pos.startswith('s'):
                wn_pos = wn.ADJ
            elif pos.startswith('r'):
                wn_pos = wn.ADV
        
        # Get synsets
        synsets = wn.synsets(word, pos=wn_pos)
        
        # Extract 8-digit IDs
        synset_ids = []
        for synset in synsets:
            # Generate 8-digit ID from offset
            offset = str(synset.offset()).zfill(8)
            synset_ids.append(offset)
        
        # Cache results
        self.synset_cache[cache_key] = synset_ids
        return synset_ids
    
    def load_multirel_model(self, model_path, data_dir):
        """Load model trained from multirelational-poincare repository (hyperbolic or Euclidean)"""
        try:
            # Infer model type and dimension from filename
            filename = os.pathbasename(model_path)
            if "poincare" in filename:
                model_type = "MuRP"
            elif "euclidean" in filename:
                model_type = "MuRE"
            else:
                model_type = "Unknown"
                
            # Extract dimension information
            match = re.search(r'_(\d+)\.pth$', filename)
            dimension = int(match.group(1)) if match else 0
            
            logger.info(f"Loading {model_type} model (dim={dimension}) from: {model_path}")
            
            # Add project source code directory to system path
            source_dir = os.path.join(self.project_dir, "多关系双曲嵌入/multirelational-poincare")
            if source_dir not in sys.path:
                sys.path.insert(0, source_dir)  # Add to beginning of path
            
            logger.info(f"Added to sys.path: {source_dir}")
            
            try:
                from load_data import Data
                if model_type == "MuRP":
                    from model import MuRP
                else:
                    from model import MuRE
                logger.info("Successfully imported model modules")
            except ImportError as e:
                logger.error(f"Error importing model modules: {e}")
                logger.error(f"Current sys.path: {sys.path}")
                return None
            
            # Load data to get entity mapping
            try:
                d = Data(data_dir)
                entity_list = d.entities
                entity_id_map = {entity: idx for idx, entity in enumerate(entity_list)}
                logger.info(f"Loaded {len(entity_list)} entities from dataset")
            except Exception as e:
                logger.error(f"Error loading data: {e}")
                return None
            
            # Load model checkpoint
            try:
                if torch.cuda.is_available():
                    checkpoint = torch.load(model_path)
                else:
                    checkpoint = torch.load(model_path, map_location=torch.device('cpu'))
                
                # Create model instance
                if model_type == "MuRP":
                    # MuRP requires three parameters
                    model = MuRP(d, dimension, self.device)
                else:
                    # MuRE only requires two parameters
                    model = MuRE(d, dimension)
                
                # Load model state
                model.load_state_dict(checkpoint)
                model.eval()
                model.to(self.device)
                
                # Extract entity embeddings
                with torch.no_grad():
                    # Get embeddings based on model type
                    if model_type == "MuRP":
                        entity_embeddings = model.Eh.weight.data.cpu().numpy()
                    else:
                        entity_embeddings = model.E.weight.data.cpu().numpy()
                
                logger.info(f"Extracted entity embeddings of shape: {entity_embeddings.shape}")
            except Exception as e:
                logger.error(f"Error loading model state: {e}")
                traceback.print_exc()
                return None
            
            # Create embedding cache
            self.embeddings_cache = {
                entity: entity_embeddings[idx] 
                for entity, idx in entity_id_map.items()
            }
            
            return {
                "entity_embeddings": entity_embeddings,
                "entity_id_map": entity_id_map,
                "entity_list": entity_list,
                "type": model_type,
                "dimension": dimension,
                "embeddings_cache": self.embeddings_cache
            }
        except Exception as e:
            logger.error(f"Error loading model: {e}")
            traceback.print_exc()
            return None
    
    def compute_poincare_distance(self, u, v, epsilon=1e-5):
        """Compute Poincaré hyperbolic distance (optimized version)"""
        try:
            # Convert to numpy arrays
            u = np.asarray(u, dtype=np.float32)
            v = np.asarray(v, dtype=np.float32)
            
            # Calculate Euclidean norms
            norm_u_sq = np.sum(u**2)
            norm_v_sq = np.sum(v**2)
            norm_diff_sq = np.sum((u - v)**2)
            
            # Avoid numerical instability
            denominator = max((1 - norm_u_sq) * (1 - norm_v_sq), epsilon)
            
            # Calculate hyperbolic distance
            inner_expr = 1 + 2 * norm_diff_sq / denominator
            
            # Ensure inner expression is greater than 1
            if inner_expr <= 1:
                inner_expr = 1 + epsilon
                
            return np.arccosh(inner_expr)
        except Exception as e:
            logger.warning(f"Error computing Poincaré distance: {e}")
            return np.linalg.norm(u - v)
    
    def compute_euclidean_similarity(self, u, v):
        """Compute cosine similarity for Euclidean embeddings"""
        u = np.asarray(u, dtype=np.float32)
        v = np.asarray(v, dtype=np.float32)
        norm_u = np.linalg.norm(u)
        norm_v = np.linalg.norm(v)
        
        if norm_u == 0 or norm_v == 0:
            return 0.0
        
        cosine_sim = np.dot(u, v) / (norm_u * norm_v)
        return (cosine_sim + 1) / 2  # Normalize to [0,1] range
    
    def compute_similarity_batch(self, model_info, words1, words2, pos_list=None):
        """Batch compute similarities (supports both hyperbolic and Euclidean models)"""
        entity_id_map = model_info["entity_id_map"]
        entity_list = model_info["entity_list"]
        embeddings = model_info["entity_embeddings"]
        embeddings_cache = model_info.get("embeddings_cache", {})
        model_type = model_info["type"]
        
        # Batch get synset IDs
        all_synsets1 = []
        all_synsets2 = []
        for i, (word1, word2) in enumerate(zip(words1, words2)):
            # Safely handle POS tags
            pos = None
            if pos_list is not None and i < len(pos_list):
                pos_item = pos_list[i]
                # Handle different types of POS tags
                if isinstance(pos_item, (list, tuple, np.ndarray)):
                    # Take first element as main part of speech
                    pos = str(pos_item[0]) if len(pos_item) > 0 else None
                elif isinstance(pos_item, str):
                    pos = pos_item
                else:
                    pos = None
            
            all_synsets1.append(self.word_to_synset_ids(word1, pos))
            all_synsets2.append(self.word_to_synset_ids(word2, pos))
        
        # Batch lookup embeddings
        similarities = np.full(len(words1), np.nan)
        valid_indices = []
        
        for i, (synsets1, synsets2) in enumerate(zip(all_synsets1, all_synsets2)):
            # Check if empty list
            if not synsets1 or not synsets2:
                continue
                
            # Find valid synset IDs
            valid_synsets1 = [sid for sid in synsets1 if sid in entity_list]
            valid_synsets2 = [sid for sid in synsets2 if sid in entity_list]
            
            if not valid_synsets1 or not valid_synsets2:
                continue
                
            # Try to get embeddings from cache
            emb1 = None
            for sid in valid_synsets1:
                if sid in embeddings_cache:
                    emb1 = embeddings_cache[sid]
                    break
                elif sid in entity_id_map:
                    idx = entity_id_map[sid]
                    emb = embeddings[idx]
                    embeddings_cache[sid] = emb
                    emb1 = emb
                    break
            
            emb2 = None
            for sid in valid_synsets2:
                if sid in embeddings_cache:
                    emb2 = embeddings_cache[sid]
                    break
                elif sid in entity_id_map:
                    idx = entity_id_map[sid]
                    emb = embeddings[idx]
                    embeddings_cache[sid] = emb
                    emb2 = emb
                    break
            
            if emb1 is not None and emb2 is not None:
                # Calculate similarity based on model type
                if model_type == "MuRP":
                    # Hyperbolic model: use inverse of Poincaré distance as similarity
                    distance = self.compute_poincare_distance(emb1, emb2)
                    similarity = 1 / (1 + distance)
                else:
                    # Euclidean model: use cosine similarity
                    similarity = self.compute_euclidean_similarity(emb1, emb2)
                
                similarities[i] = similarity
                valid_indices.append(i)
        
        return similarities, valid_indices
    
    def load_models(self, model_dir, data_dir):
        """Load all models from specified directory"""
        logger.info(f"Loading models from: {model_dir}")
        
        # Find all model files
        model_files = glob.glob(os.path.join(model_dir, "*.pth"))
        logger.info(f"Found {len(model_files)} model files")
        
        if not model_files:
            logger.error("No model files found!")
            return
        
        # Load each model
        for model_path in model_files:
            model_info = self.load_multirel_model(model_path, data_dir)
            if model_info:
                # Generate unique model name (type-dimension)
                model_name = f"{model_info['type']}-{model_info['dimension']}D"
                self.models[model_name] = model_info
                logger.info(f"Loaded model: {model_name}")
            else:
                logger.error(f"Failed to load model: {model_path}")
    
    def evaluate_on_dataset(self, dataset_name, dataset_loader_func):
        """Evaluate models on specific dataset (optimized version)"""
        logger.info(f"\n{'='*50}\nEvaluating on {dataset_name}\n{'='*50}")
        
        # Load dataset
        try:
            data = dataset_loader_func()
            word_pairs = data.X
            human_scores = data.y
            
            # Ensure human_scores is NumPy array
            if not isinstance(human_scores, np.ndarray):
                human_scores = np.array(human_scores)
            
            # Ensure human_scores is 1D array
            if human_scores.ndim > 1:
                human_scores = human_scores.flatten()
            
            # Extract POS information (if available)
            pos_info = None
            if hasattr(data, 'pos') and data.pos is not None:
                # Ensure pos_info is list or array
                if isinstance(data.pos, (list, tuple, np.ndarray)):
                    pos_info = data.pos
                    logger.info(f"Loaded POS tags for {len(pos_info)} pairs")
                else:
                    logger.warning(f"Unexpected POS type: {type(data.pos)}. Ignoring POS info.")
            
            logger.info(f"Loaded {len(word_pairs)} word pairs")
        except Exception as e:
            logger.error(f"Error loading dataset {dataset_name}: {e}")
            traceback.print_exc()
            return
        
        dataset_results = {}
        
        for model_name, model_info in self.models.items():
            logger.info(f"Evaluating {model_name} on {dataset_name}...")
            
            words1 = [str(pair[0]).strip() for pair in word_pairs]
            words2 = [str(pair[1]).strip() for pair in word_pairs]
            
            # Batch compute similarities
            model_scores, valid_indices = self.compute_similarity_batch(
                model_info, words1, words2, pos_info
            )
            
            # Ensure there are valid indices
            if not valid_indices:
                logger.warning(f"No valid predictions for model {model_name} on dataset {dataset_name}")
                dataset_results[model_name] = {
                    "pearson": np.nan, "spearman": np.nan,
                    "coverage": 0.0, "n_pairs": 0,
                    "model_type": model_info["type"],
                    "dimension": model_info["dimension"]
                }
                continue
                
            valid_model_scores = model_scores[valid_indices]
            valid_human_scores = human_scores[valid_indices]
            
            # Ensure human_scores is numeric array
            if not isinstance(valid_human_scores, np.ndarray) or valid_human_scores.dtype.kind not in 'iuf':
                try:
                    valid_human_scores = np.array(valid_human_scores, dtype=np.float32)
                except:
                    logger.error(f"Invalid human scores type: {type(valid_human_scores)}")
                    continue
            
            coverage = len(valid_indices) / len(word_pairs)
            
            # Calculate coverage
            missing_count = len(word_pairs) - len(valid_indices)
            if missing_count > 0:
                logger.info(f"  Missing {missing_count} word pairs ({coverage:.2%} coverage)")
            
            # Calculate correlation metrics
            if len(valid_model_scores) > 5:  # Need at least 5 valid data pairs
                try:
                    # Ensure array shapes are consistent
                    if valid_model_scores.ndim > 1:
                        valid_model_scores = valid_model_scores.flatten()
                    if valid_human_scores.ndim > 1:
                        valid_human_scores = valid_human_scores.flatten()
                    
                    # Calculate correlation coefficients
                    pearson_corr, _ = pearsonr(valid_model_scores, valid_human_scores)
                    spearman_corr, _ = spearmanr(valid_model_scores, valid_human_scores)
                except Exception as e:
                    logger.error(f"Error calculating correlations: {e}")
                    traceback.print_exc()
                    pearson_corr = spearman_corr = np.nan
                
                dataset_results[model_name] = {
                    "pearson": pearson_corr,
                    "spearman": spearman_corr,
                    "coverage": coverage,
                    "n_pairs": len(valid_model_scores),
                    "model_type": model_info["type"],
                    "dimension": model_info["dimension"]
                }
                logger.info(f"  Pearson: {pearson_corr:.4f}, Spearman: {spearman_corr:.4f}, Coverage: {coverage:.2%}")
            else:
                logger.warning(f"  Insufficient valid predictions: {len(valid_model_scores)}/{len(word_pairs)}")
                dataset_results[model_name] = {
                    "pearson": np.nan, "spearman": np.nan,
                    "coverage": coverage, "n_pairs": len(valid_model_scores),
                    "model_type": model_info["type"],
                    "dimension": model_info["dimension"]
                }
        
        self.results[dataset_name] = dataset_results
    
    def run_evaluation(self, datasets):
        """Run evaluation on specified datasets (optimized version)"""
        logger.info("Starting embeddings evaluation...")
        
        if len(self.models) == 0:
            logger.error("No models loaded. Aborting evaluation.")
            return
        
        # Evaluate each dataset
        for dataset_name, loader_func in datasets.items():
            try:
                logger.info(f"Processing dataset: {dataset_name}")
                self.evaluate_on_dataset(dataset_name, loader_func)
            except Exception as e:
                logger.error(f"Error evaluating {dataset_name}: {e}")
                traceback.print_exc()
                continue
    
    def print_results_summary(self):
        """Print results summary (optimized version)"""
        if not self.results:
            logger.info("No results to display!")
            return None
        
        print("\n" + "="*80)
        print("EMBEDDINGS EVALUATION RESULTS (MuRP vs MuRE)")
        print("="*80)
        
        # Organize results into DataFrame
        all_results = []
        for dataset_name, dataset_results in self.results.items():
            for model_name, metrics in dataset_results.items():
                all_results.append({
                    "Dataset": dataset_name,
                    "Model": model_name,
                    "Type": metrics.get("model_type", "Unknown"),
                    "Dimension": metrics.get("dimension", 0),
                    "Pearson": metrics.get("pearson", np.nan),
                    "Spearman": metrics.get("spearman", np.nan),
                    "Coverage": metrics.get("coverage", 0),
                    "N_Pairs": metrics.get("n_pairs", 0)
                })
        
        results_df = pd.DataFrame(all_results)
        
        # Display by dataset and model type
        for dataset in results_df['Dataset'].unique():
            dataset_df = results_df[results_df['Dataset'] == dataset]
            print(f"\n{dataset}:")
            print("-" * 90)
            print(f"{'Model':<15} {'Type':<8} {'Dim':>4} {'Pearson':>8} {'Spearman':>8} {'Coverage':>10} {'N_Pairs':>8}")
            print("-" * 90)
            
            # Sort by dimension
            dataset_df = dataset_df.sort_values(by=['Type', 'Dimension'])
            
            for _, row in dataset_df.iterrows():
                pearson_str = f"{row['Pearson']:.4f}" if not pd.isna(row['Pearson']) else "N/A"
                spearman_str = f"{row['Spearman']:.4f}" if not pd.isna(row['Spearman']) else "N/A"
                coverage_str = f"{row['Coverage']:.2%}"
                print(f"{row['Model']:<15} {row['Type']:<8} {row['Dimension']:>4} {pearson_str:>8} {spearman_str:>8} {coverage_str:>10} {row['N_Pairs']:>8}")
                
        return results_df
    
    def save_results(self, filepath="dimensionality_results.csv"):
        """Save results to CSV (optimized version)"""
        if not self.results:
            logger.info("No results to save!")
            return None
        
        all_results = []
        for dataset_name, dataset_results in self.results.items():
            for model_name, metrics in dataset_results.items():
                all_results.append({
                    "Dataset": dataset_name,
                    "Model": model_name,
                    "Model_Type": metrics.get("model_type", "Unknown"),
                    "Dimension": metrics.get("dimension", 0),
                    "Pearson_Correlation": metrics.get("pearson", np.nan),
                    "Spearman_Correlation": metrics.get("spearman", np.nan),
                    "Coverage": metrics.get("coverage", 0),
                    "Valid_Pairs": metrics.get("n_pairs", 0)
                })
        
        results_df = pd.DataFrame(all_results)
        
        # Create save directory (if it doesn't exist)
        os.makedirs(os.path.dirname(filepath), exist_ok=True)
        results_df.to_csv(filepath, index=False)
        logger.info(f"Detailed results saved to: {filepath}")
        
        # Save dimensionality analysis summary
        dim_summary = results_df.groupby(['Model_Type', 'Dimension']).agg({
            'Pearson_Correlation': 'mean',
            'Spearman_Correlation': 'mean',
            'Coverage': 'mean'
        }).reset_index()
        dim_summary_file = "dimensionality_summary.csv"
        dim_summary.to_csv(dim_summary_file, index=False)
        logger.info(f"Dimensionality summary saved to: {dim_summary_file}")
        
        return results_df

def main():
    """Main function: run evaluation process (optimized version)"""
    # Check dependencies
    missing_deps = []
    required = ["torch", "scipy", "sklearn", "pandas", "numpy", "nltk"]
    for dep in required:
        try:
            __import__(dep)
        except ImportError:
            missing_deps.append(dep)
    
    if missing_deps:
        print(f"Missing dependencies: {missing_deps}")
        print("Install with: pip install " + " ".join(missing_deps))
        return None, None
    
    # Set project root directory
    project_dir = "/Users/chouyinghan/my_mathlib_project/Demo_in_Matrix"
    
    # Initialize evaluator, pass project root directory
    evaluator = HyperbolicEvaluator(project_dir)
    
    # Automatically construct paths
    model_dir = os.path.join(project_dir, "your_model_directory")
    data_dir = os.path.join(project_dir, "your_dataset_directory")

    # Check if model directory exists
    if not os.path.exists(model_dir):
        print(f"Model directory not found: {model_dir}")
        model_dir = input("Please enter the full path to the model directory: ")
        if not os.path.exists(model_dir):
            print("Invalid path. Exiting.")
            return
    
    # Check if data directory exists
    if not os.path.exists(data_dir):
        print(f"Data directory not found: {data_dir}")
        # Try other possible paths
        possible_data_dir = os.path.join(project_dir, "multirelational-poincare/data/WN18RR/")
        if os.path.exists(possible_data_dir):
            data_dir = possible_data_dir
            print(f"Using alternative data directory: {data_dir}")
        else:
            data_dir = input("Please enter the full path to the data directory: ")
            if not os.path.exists(data_dir):
                print("Invalid path. Exiting.")
                return
    
    # Batch load models
    evaluator.load_models(model_dir, data_dir)
    
    # Check if any models loaded successfully
    if not evaluator.models:
        logger.error("No models loaded successfully. Aborting evaluation.")
        return None, None
    
    # Define datasets to evaluate
    datasets = {
        "SimVerb3500": lambda: fetch_SimVerb3500('all'),
        "SimVerb3500-dev": lambda: fetch_SimVerb3500('dev'),
        "SimVerb3500-test": lambda: fetch_SimVerb3500('test'),
        "MEN": lambda: fetch_MEN("all"),
        "MEN-dev": lambda: fetch_MEN("dev"),
        "MEN-test": lambda: fetch_MEN("test"),
        "SimLex999": fetch_SimLex999,
        "SCWS": fetch_SCWS,
        "WS353": fetch_WS353,
        "RG65": fetch_RG65
    }
    
    # Run evaluation
    evaluator.run_evaluation(datasets)
    results_df = evaluator.print_results_summary()
    
    if results_df is not None:
        # Save results to same directory
        results_file = os.path.join(model_dir, "evaluation_results.csv")
        evaluator.save_results(results_file)
        
        # Save dimensionality analysis summary
        summary_file = os.path.join(model_dir, "dimensionality_summary.csv")
        results_df.to_csv(summary_file, index=False)
        logger.info(f"Dimension summary saved to: {summary_file}")
    else:
        logger.warning("No results to save")
    
    return evaluator, results_df

if __name__ == "__main__":
    evaluator, results = main()