In [7]:
import re
import networkx as nx
from collections import deque
from gradio_client import Client
import re
from collections import defaultdict


In [4]:
client = Client("cstr/conceptnet_normalized")

relations = [
            'RelatedTo','IsA','PartOf','HasA','UsedFor','CapableOf','AtLocation',
            'Causes','HasSubevent','HasFirstSubevent','HasLastSubevent',
            'HasPrerequisite','HasProperty','MotivatedByGoal','ObstructedBy',
            'Desires','CreatedBy','Synonym','Antonym','DistinctFrom','DerivedFrom',
            'SymbolOf','DefinedAs','MannerOf','LocatedNear','HasContext','SimilarTo',
            'EtymologicallyRelatedTo','EtymologicallyDerivedFrom','CausesDesire',
            'MadeOf','ReceivesAction','ExternalURL','NotDesires','NotUsedFor',
            'NotCapableOf','NotHasProperty'
        ]

Loaded as API: https://cstr-conceptnet-normalized.hf.space ‚úî


In [5]:
def get_conceptnet_profile(word, relations):
    result = client.predict(
        word=word,
        lang="en",
        selected_relations=relations,
        api_name="/get_semantic_profile"
    )
    return result

In [40]:
def parse_conceptnet_profile(profile_text):
    """
    Parse ConceptNet semantic profile text into a dictionary.
    
    Args:
        profile_text: String output from get_conceptnet_profile
        
    Returns:
        Dictionary with relation types as keys and list of (word, score) tuples as values.
        Only includes relations that have values, sorted by score in descending order.
    """
    result = {}
    
    # Extract the queried word from the header
    # Pattern: # üß† Semantic Profile: 'word' (EN)
    header_match = re.search(r"# üß† Semantic Profile: '([^']+)'", profile_text)
    queried_word = header_match.group(1) if header_match else None
    
    # Normalize the queried word for comparison (both space and underscore versions)
    if queried_word:
        queried_normalized_space = queried_word.replace('_', ' ')
        queried_normalized_underscore = queried_word.replace(' ', '_')
    else:
        queried_normalized_space = None
        queried_normalized_underscore = None
    
    # Split by relation headers (## RelationType)
    sections = re.split(r'## (\w+)', profile_text)
    
    # sections[0] is the header before first relation, then alternates between relation name and content
    for i in range(1, len(sections), 2):
        relation = sections[i]
        content = sections[i + 1] if i + 1 < len(sections) else ""
        
        # Temporary list for this relation
        relation_list = []
        
        # Find all relation entries
        # Pattern: - *word1* or **word1** RelationType ‚Üí *word2* or **word2** `[score]`
        pattern = r'-\s+(?:\*\*?([^*]+?)\*\*?)\s+\w+\s+‚Üí\s+(?:\*\*?([^*]+?)\*\*?)\s+`\[([0-9.]+)\]`'
        matches = re.findall(pattern, content)
        
        for match in matches:
            word1, word2, score = match
            # Remove any extra whitespace
            word1 = word1.strip()
            word2 = word2.strip()
            score = float(score)
            
            # Check if either word matches the queried word (in either format)
            def is_queried_word(word):
                if not queried_word:
                    return False
                word_space = word.replace('_', ' ')
                word_underscore = word.replace(' ', '_')
                return (word == queried_word or 
                        word_space == queried_normalized_space or 
                        word_underscore == queried_normalized_underscore)
            
            # Determine which word is NOT the queried word
            if is_queried_word(word1):
                target_word = word2
            elif is_queried_word(word2):
                target_word = word1
            else:
                # If neither matches exactly, prefer word1 (usually the related concept)
                target_word = word1
            
            relation_list.append((target_word, score))
        
        # Only add to result if there are values, and sort by score descending
        if relation_list:
            # Sort by score (second element of tuple) in descending order
            relation_list.sort(key=lambda x: x[1], reverse=True)
            result[relation] = relation_list
    
    return result

In [41]:
conceptnet_text = get_conceptnet_profile("revolving door", relations)
conceptnet_data = parse_conceptnet_profile(conceptnet_text)

# conceptnet_text = get_conceptnet_profile("revolving_door", relations)
# conceptnet_data = parse_conceptnet_profile(conceptnet_text)

In [42]:
conceptnet_data

{'RelatedTo': [('dreht√ºr', 1.0),
  ('revolving doors', 1.0),
  ('tourniquet', 1.0),
  ('bussola', 1.0)],
 'UsedFor': [('entering building', 3.464),
  ('enter building', 1.0),
  ('exiting building', 1.0),
  ('getting into building', 1.0)],
 'AtLocation': [('lobby', 2.0),
  ('bank', 1.0),
  ('building', 1.0),
  ('department store', 1.0),
  ('entrance to building', 1.0),
  ('hotel lobby', 1.0),
  ('mall', 1.0)],
 'Synonym': [('dreht√ºr', 1.0)]}

In [None]:
I have relations extracted from the conceptnet for a question_concept, which looks like this:

conceptnet_data = {'RelatedTo': [('dreht√ºr', 1.0),
  ('revolving doors', 1.0),
  ('tourniquet', 1.0),
  ('bussola', 1.0)],
 'UsedFor': [('entering building', 3.464),
  ('enter building', 1.0),
  ('exiting building', 1.0),
  ('getting into building', 1.0)],
 'AtLocation': [('lobby', 2.0),
  ('bank', 1.0),
  ('building', 1.0),
  ('department store', 1.0),
  ('entrance to building', 1.0),
  ('hotel lobby', 1.0),
  ('mall', 1.0)],
 'Synonym': [('dreht√ºr', 1.0)]}

now for the given question, and choices

question=  "A revolving door is convenient for two direction travel, but it also serves as a security measure at a what?",
question_concept = "revolving door",
choices = [
    "bank",
    "library",
    "department store",
    "mall",
    "new york"
]

I need to rank the choices based on their relevance to the question_concept, question using the conceptnet_data. How can I do that?

In [43]:
def rank_choices_by_relevance(conceptnet_data, choices):
    """
    Rank answer choices based on their presence in ConceptNet relations.
    Uses only the weights provided in ConceptNet data.
    
    Args:
        conceptnet_data: Dict mapping relation types to lists of (concept, weight) tuples
        choices: List of answer choices to rank
    
    Returns:
        List of tuples (choice, score, matching_info) sorted by score descending
    """
    choice_scores = {}
    
    for choice in choices:
        score = 0
        matching_info = []
        choice_lower = choice.lower().strip()
        choice_words = set(choice_lower.split())
        
        # Check each relation type in conceptnet_data
        for relation_type, concepts in conceptnet_data.items():
            for concept, concept_weight in concepts:
                concept_lower = concept.lower().strip()
                concept_words = set(concept_lower.split())
                
                match_score = 0
                match_type = None
                
                # Exact match
                if choice_lower == concept_lower:
                    match_score = concept_weight
                    match_type = "exact"
                
                # Choice is substring of concept
                elif choice_lower in concept_lower:
                    match_score = concept_weight * 0.8
                    match_type = "substring"
                
                # Concept is substring of choice
                elif concept_lower in choice_lower:
                    match_score = concept_weight * 0.6
                    match_type = "contains"
                
                # Word overlap for multi-word phrases
                else:
                    overlap = choice_words & concept_words
                    if overlap:
                        overlap_ratio = len(overlap) / max(len(choice_words), len(concept_words))
                        match_score = concept_weight * overlap_ratio * 0.5
                        match_type = "word_overlap"
                
                if match_score > 0:
                    score += match_score
                    matching_info.append({
                        'relation': relation_type,
                        'concept': concept,
                        'weight': concept_weight,
                        'score': match_score,
                        'type': match_type
                    })
        
        choice_scores[choice] = (score, matching_info)
    
    # Sort by score descending
    ranked_choices = sorted(
        [(choice, score, info) for choice, (score, info) in choice_scores.items()],
        key=lambda x: x[1],
        reverse=True
    )
    
    return ranked_choices


def get_top_choice(conceptnet_data, choices):
    """
    Get the top-ranked choice.
    
    Args:
        conceptnet_data: Dict mapping relation types to lists of (concept, weight) tuples
        choices: List of answer choices to rank
    
    Returns:
        The top-ranked choice (string)
    """
    ranked = rank_choices_by_relevance(conceptnet_data, choices)
    return ranked[0][0] if ranked else choices[0]


def get_ranked_choices(conceptnet_data, choices):
    """
    Get just the list of choices in ranked order.
    
    Args:
        conceptnet_data: Dict mapping relation types to lists of (concept, weight) tuples
        choices: List of answer choices to rank
    
    Returns:
        List of choices sorted by relevance score
    """
    ranked = rank_choices_by_relevance(conceptnet_data, choices)
    return [choice for choice, _, _ in ranked]


In [44]:
# Example usage
if __name__ == "__main__":
    conceptnet_data = {
        'RelatedTo': [('dreht√ºr', 1.0),
                      ('revolving doors', 1.0),
                      ('tourniquet', 1.0),
                      ('bussola', 1.0)],
        'UsedFor': [('entering building', 3.464),
                    ('enter building', 1.0),
                    ('exiting building', 1.0),
                    ('getting into building', 1.0)],
        'AtLocation': [('lobby', 2.0),
                       ('bank', 1.0),
                       ('building', 1.0),
                       ('department store', 1.0),
                       ('entrance to building', 1.0),
                       ('hotel lobby', 1.0),
                       ('mall', 1.0)],
        'Synonym': [('dreht√ºr', 1.0)]
    }
    
    choices = ["bank", "library", "department store", "mall", "new york"]
    
    # Get full ranking with scores
    ranked = rank_choices_by_relevance(conceptnet_data, choices)
    
    print("Ranked Choices:")
    print("=" * 80)
    for i, (choice, score, info) in enumerate(ranked, 1):
        print(f"\n{i}. {choice.upper()} (Score: {score:.2f})")
        if info:
            print(f"   Matches:")
            for match in sorted(info, key=lambda x: x['score'], reverse=True)[:3]:
                print(f"   - {match['relation']}: '{match['concept']}' "
                      f"(weight: {match['weight']:.2f}, match: {match['type']})")
        else:
            print(f"   No matches in ConceptNet data")
    
    # Simple outputs
    print("\n" + "=" * 80)
    print("Ranked order:", get_ranked_choices(conceptnet_data, choices))
    print("Top choice:", get_top_choice(conceptnet_data, choices))

Ranked Choices:

1. BANK (Score: 1.00)
   Matches:
   - AtLocation: 'bank' (weight: 1.00, match: exact)

2. DEPARTMENT STORE (Score: 1.00)
   Matches:
   - AtLocation: 'department store' (weight: 1.00, match: exact)

3. MALL (Score: 1.00)
   Matches:
   - AtLocation: 'mall' (weight: 1.00, match: exact)

4. LIBRARY (Score: 0.00)
   No matches in ConceptNet data

5. NEW YORK (Score: 0.00)
   No matches in ConceptNet data

Ranked order: ['bank', 'department store', 'mall', 'library', 'new york']
Top choice: bank


In [45]:
import numpy as np
from typing import List, Tuple, Dict, Any

def get_embeddings(texts: List[str], model_name: str = 'sentence-transformers/all-MiniLM-L6-v2'):
    """
    Get embeddings for a list of texts.
    You can use sentence-transformers, OpenAI, or any other embedding model.
    
    Args:
        texts: List of text strings to embed
        model_name: Name of the embedding model to use
    
    Returns:
        numpy array of embeddings
    """
    # Example using sentence-transformers (install: pip install sentence-transformers)
    from sentence_transformers import SentenceTransformer
    model = SentenceTransformer(model_name)
    embeddings = model.encode(texts, convert_to_numpy=True)
    return embeddings


def cosine_similarity(a: np.ndarray, b: np.ndarray) -> float:
    """Calculate cosine similarity between two vectors."""
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))


def rank_choices_semantic(
    conceptnet_data: Dict[str, List[Tuple[str, float]]], 
    question: str,
    choices: List[str],
    model_name: str = 'sentence-transformers/all-MiniLM-L6-v2'
) -> List[Tuple[str, float, Dict[str, Any]]]:
    """
    Rank choices using semantic similarity between question and ConceptNet relations.
    
    Args:
        conceptnet_data: Dict mapping relation types to lists of (concept, weight) tuples
        question: The question text
        choices: List of answer choices to rank
        model_name: Embedding model to use
    
    Returns:
        List of tuples (choice, score, details) sorted by score descending
    """
    # Get question embedding
    question_embedding = get_embeddings([question], model_name)[0]
    
    # Get choice embeddings
    choice_embeddings = get_embeddings(choices, model_name)
    choice_to_embedding = {choice: emb for choice, emb in zip(choices, choice_embeddings)}
    
    # Collect all ConceptNet concepts and their metadata
    all_concepts = []
    concept_metadata = []
    
    for relation_type, concepts in conceptnet_data.items():
        for concept, weight in concepts:
            all_concepts.append(concept)
            concept_metadata.append({
                'concept': concept,
                'relation': relation_type,
                'weight': weight
            })
    
    # Get embeddings for all ConceptNet concepts
    if all_concepts:
        concept_embeddings = get_embeddings(all_concepts, model_name)
    else:
        concept_embeddings = []
    
    # Score each choice
    choice_scores = {}
    
    for choice in choices:
        choice_emb = choice_to_embedding[choice]
        
        # Method 1: Direct similarity between choice and question
        direct_similarity = cosine_similarity(choice_emb, question_embedding)
        
        # Method 2: Similarity via ConceptNet relations
        conceptnet_score = 0
        matching_concepts = []
        
        for i, concept_emb in enumerate(concept_embeddings):
            metadata = concept_metadata[i]
            
            # Similarity between choice and this concept
            choice_concept_sim = cosine_similarity(choice_emb, concept_emb)
            
            # Similarity between question and this concept
            question_concept_sim = cosine_similarity(question_embedding, concept_emb)
            
            # Combined score: how well this concept bridges choice and question
            bridge_score = (choice_concept_sim * question_concept_sim * metadata['weight'])
            
            if bridge_score > 0.1:  # Threshold to filter noise
                conceptnet_score += bridge_score
                matching_concepts.append({
                    'concept': metadata['concept'],
                    'relation': metadata['relation'],
                    'weight': metadata['weight'],
                    'choice_sim': choice_concept_sim,
                    'question_sim': question_concept_sim,
                    'bridge_score': bridge_score
                })
        
        # Combine both scores
        total_score = direct_similarity + conceptnet_score
        
        choice_scores[choice] = (total_score, {
            'direct_similarity': direct_similarity,
            'conceptnet_score': conceptnet_score,
            'top_concepts': sorted(matching_concepts, key=lambda x: x['bridge_score'], reverse=True)[:5]
        })
    
    # Sort by score descending
    ranked_choices = sorted(
        [(choice, score, details) for choice, (score, details) in choice_scores.items()],
        key=lambda x: x[1],
        reverse=True
    )
    
    return ranked_choices


def rank_choices_simple_semantic(
    conceptnet_data: Dict[str, List[Tuple[str, float]]], 
    question: str,
    choices: List[str],
    model_name: str = 'sentence-transformers/all-MiniLM-L6-v2'
) -> List[str]:
    """
    Simple version that returns just the ranked choice names.
    
    Args:
        conceptnet_data: Dict mapping relation types to lists of (concept, weight) tuples
        question: The question text
        choices: List of answer choices to rank
        model_name: Embedding model to use
    
    Returns:
        List of choices sorted by relevance
    """
    ranked = rank_choices_semantic(conceptnet_data, question, choices, model_name)
    return [choice for choice, _, _ in ranked]



In [46]:
conceptnet_data = {
    'RelatedTo': [('dreht√ºr', 1.0),
                    ('revolving doors', 1.0),
                    ('tourniquet', 1.0),
                    ('bussola', 1.0)],
    'UsedFor': [('entering building', 3.464),
                ('enter building', 1.0),
                ('exiting building', 1.0),
                ('getting into building', 1.0)],
    'AtLocation': [('lobby', 2.0),
                    ('bank', 1.0),
                    ('building', 1.0),
                    ('department store', 1.0),
                    ('entrance to building', 1.0),
                    ('hotel lobby', 1.0),
                    ('mall', 1.0)],
    'Synonym': [('dreht√ºr', 1.0)]
}

question = "A revolving door is convenient for two direction travel, but it also serves as a security measure at a what?"
choices = ["bank", "library", "department store", "mall", "new york"]

# Get full ranking with scores
ranked = rank_choices_semantic(conceptnet_data, question, choices)

print("Ranked Choices (Semantic Matching):")
print("=" * 80)
for i, (choice, score, details) in enumerate(ranked, 1):
    print(f"\n{i}. {choice.upper()} (Total Score: {score:.3f})")
    print(f"   Direct similarity to question: {details['direct_similarity']:.3f}")
    print(f"   ConceptNet bridge score: {details['conceptnet_score']:.3f}")
    
    if details['top_concepts']:
        print(f"   Top bridging concepts:")
        for concept_info in details['top_concepts'][:3]:
            print(f"   - {concept_info['relation']}: '{concept_info['concept']}' "
                    f"(bridge: {concept_info['bridge_score']:.3f})")

print("\n" + "=" * 80)
print("Final ranking:", rank_choices_simple_semantic(conceptnet_data, question, choices))

Ranked Choices (Semantic Matching):

1. BANK (Total Score: 0.686)
   Direct similarity to question: 0.125
   ConceptNet bridge score: 0.561
   Top bridging concepts:
   - UsedFor: 'entering building' (bridge: 0.290)
   - RelatedTo: 'revolving doors' (bridge: 0.146)
   - AtLocation: 'bank' (bridge: 0.125)

2. MALL (Total Score: 0.453)
   Direct similarity to question: 0.039
   ConceptNet bridge score: 0.414
   Top bridging concepts:
   - UsedFor: 'entering building' (bridge: 0.267)
   - RelatedTo: 'revolving doors' (bridge: 0.147)

3. LIBRARY (Total Score: 0.349)
   Direct similarity to question: 0.020
   ConceptNet bridge score: 0.329
   Top bridging concepts:
   - UsedFor: 'entering building' (bridge: 0.194)
   - RelatedTo: 'revolving doors' (bridge: 0.135)

4. DEPARTMENT STORE (Total Score: 0.326)
   Direct similarity to question: 0.051
   ConceptNet bridge score: 0.275
   Top bridging concepts:
   - RelatedTo: 'revolving doors' (bridge: 0.139)
   - UsedFor: 'entering building' (brid

In [47]:
import numpy as np
from typing import List, Tuple, Dict, Any

def get_embeddings(texts: List[str], model_name: str = 'sentence-transformers/all-MiniLM-L6-v2'):
    """
    Get embeddings for a list of texts.
    
    Args:
        texts: List of text strings to embed
        model_name: Name of the embedding model to use
    
    Returns:
        numpy array of embeddings
    """
    from sentence_transformers import SentenceTransformer
    model = SentenceTransformer(model_name)
    embeddings = model.encode(texts, convert_to_numpy=True)
    return embeddings


def cosine_similarity(a: np.ndarray, b: np.ndarray) -> float:
    """Calculate cosine similarity between two vectors."""
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b) + 1e-10)


def softmax(x: np.ndarray) -> np.ndarray:
    """Compute softmax values for array x."""
    exp_x = np.exp(x - np.max(x))  # Subtract max for numerical stability
    return exp_x / np.sum(exp_x)


def attention_based_ranking(
    conceptnet_data: Dict[str, List[Tuple[str, float]]], 
    question: str,
    choices: List[str],
    model_name: str = 'sentence-transformers/all-MiniLM-L6-v2'
) -> List[Tuple[str, float, Dict[str, Any]]]:
    """
    Rank choices using soft attention over ConceptNet concepts.
    
    Method: 
    1. Compute attention weights for each concept based on question similarity
    2. Use attention weights to aggregate choice-concept similarities
    3. Combine with direct question-choice similarity
    
    Args:
        conceptnet_data: Dict mapping relation types to lists of (concept, weight) tuples
        question: The question text
        choices: List of answer choices to rank
        model_name: Embedding model to use
    
    Returns:
        List of tuples (choice, score, details) sorted by score descending
    """
    # Get embeddings
    question_embedding = get_embeddings([question], model_name)[0]
    choice_embeddings = get_embeddings(choices, model_name)
    
    # Collect all concepts from ConceptNet
    all_concepts = []
    concept_metadata = []
    
    for relation_type, concepts in conceptnet_data.items():
        for concept, weight in concepts:
            all_concepts.append(concept)
            concept_metadata.append({
                'concept': concept,
                'relation': relation_type
            })
    
    if not all_concepts:
        # If no concepts, just use direct similarity
        results = []
        for i, choice in enumerate(choices):
            direct_sim = cosine_similarity(question_embedding, choice_embeddings[i])
            results.append((choice, direct_sim, {
                'direct_similarity': direct_sim,
                'attention_score': 0.0,
                'top_attended_concepts': []
            }))
        return sorted(results, key=lambda x: x[1], reverse=True)
    
    # Get concept embeddings
    concept_embeddings = get_embeddings(all_concepts, model_name)
    
    # Compute attention weights: how relevant is each concept to the question?
    attention_logits = np.array([
        cosine_similarity(question_embedding, concept_emb)
        for concept_emb in concept_embeddings
    ])
    attention_weights = softmax(attention_logits)
    
    # Rank choices
    results = []
    
    for i, choice in enumerate(choices):
        choice_emb = choice_embeddings[i]
        
        # Direct similarity between question and choice
        direct_sim = cosine_similarity(question_embedding, choice_emb)
        
        # Attention-weighted similarity: 
        # Sum of (attention_weight * similarity_to_choice) for each concept
        choice_concept_sims = np.array([
            cosine_similarity(choice_emb, concept_emb)
            for concept_emb in concept_embeddings
        ])
        
        attention_score = np.sum(attention_weights * choice_concept_sims)
        
        # Combined score (can adjust weights)
        total_score = 0.3 * direct_sim + 0.7 * attention_score
        
        # Find top attended concepts for this choice
        concept_contributions = attention_weights * choice_concept_sims
        top_indices = np.argsort(concept_contributions)[-5:][::-1]
        
        top_concepts = []
        for idx in top_indices:
            if concept_contributions[idx] > 0.01:  # Threshold
                top_concepts.append({
                    'concept': concept_metadata[idx]['concept'],
                    'relation': concept_metadata[idx]['relation'],
                    'attention_weight': float(attention_weights[idx]),
                    'choice_similarity': float(choice_concept_sims[idx]),
                    'contribution': float(concept_contributions[idx])
                })
        
        results.append((choice, total_score, {
            'direct_similarity': float(direct_sim),
            'attention_score': float(attention_score),
            'top_attended_concepts': top_concepts
        }))
    
    return sorted(results, key=lambda x: x[1], reverse=True)


def multi_head_attention_ranking(
    conceptnet_data: Dict[str, List[Tuple[str, float]]], 
    question: str,
    choices: List[str],
    num_heads: int = 4,
    model_name: str = 'sentence-transformers/all-MiniLM-L6-v2'
) -> List[Tuple[str, float, Dict[str, Any]]]:
    """
    Rank choices using multi-head attention over ConceptNet concepts.
    
    Args:
        conceptnet_data: Dict mapping relation types to lists of (concept, weight) tuples
        question: The question text
        choices: List of answer choices to rank
        num_heads: Number of attention heads
        model_name: Embedding model to use
    
    Returns:
        List of tuples (choice, score, details) sorted by score descending
    """
    # Get embeddings
    question_embedding = get_embeddings([question], model_name)[0]
    choice_embeddings = get_embeddings(choices, model_name)
    
    # Collect all concepts
    all_concepts = []
    concept_metadata = []
    
    for relation_type, concepts in conceptnet_data.items():
        for concept, weight in concepts:
            all_concepts.append(concept)
            concept_metadata.append({
                'concept': concept,
                'relation': relation_type
            })
    
    if not all_concepts:
        results = []
        for i, choice in enumerate(choices):
            direct_sim = cosine_similarity(question_embedding, choice_embeddings[i])
            results.append((choice, direct_sim, {}))
        return sorted(results, key=lambda x: x[1], reverse=True)
    
    concept_embeddings = get_embeddings(all_concepts, model_name)
    
    # For simplicity, we'll use different random projections for each head
    # In practice, you'd learn these projections
    np.random.seed(42)
    embedding_dim = question_embedding.shape[0]
    
    results = []
    
    for i, choice in enumerate(choices):
        choice_emb = choice_embeddings[i]
        direct_sim = cosine_similarity(question_embedding, choice_emb)
        
        head_scores = []
        
        # Compute score for each attention head
        for head in range(num_heads):
            # Simple projection: random permutation of embedding dimensions
            perm = np.random.permutation(embedding_dim)
            
            # Project embeddings for this head
            q_proj = question_embedding[perm]
            c_proj = choice_emb[perm]
            concept_projs = concept_embeddings[:, perm]
            
            # Compute attention weights for this head
            attention_logits = np.array([
                cosine_similarity(q_proj, concept_proj)
                for concept_proj in concept_projs
            ])
            attention_weights = softmax(attention_logits)
            
            # Compute choice-concept similarities
            choice_concept_sims = np.array([
                cosine_similarity(c_proj, concept_proj)
                for concept_proj in concept_projs
            ])
            
            # Attention-weighted score for this head
            head_score = np.sum(attention_weights * choice_concept_sims)
            head_scores.append(head_score)
        
        # Average across heads
        avg_attention_score = np.mean(head_scores)
        
        # Combined score
        total_score = 0.3 * direct_sim + 0.7 * avg_attention_score
        
        results.append((choice, total_score, {
            'direct_similarity': float(direct_sim),
            'multi_head_attention_score': float(avg_attention_score),
            'head_scores': [float(s) for s in head_scores]
        }))
    
    return sorted(results, key=lambda x: x[1], reverse=True)


# Example usage
if __name__ == "__main__":
    conceptnet_data = {
        'RelatedTo': [('dreht√ºr', 1.0),
                      ('revolving doors', 1.0),
                      ('tourniquet', 1.0),
                      ('bussola', 1.0)],
        'UsedFor': [('entering building', 3.464),
                    ('enter building', 1.0),
                    ('exiting building', 1.0),
                    ('getting into building', 1.0)],
        'AtLocation': [('lobby', 2.0),
                       ('bank', 1.0),
                       ('building', 1.0),
                       ('department store', 1.0),
                       ('entrance to building', 1.0),
                       ('hotel lobby', 1.0),
                       ('mall', 1.0)],
        'Synonym': [('dreht√ºr', 1.0)]
    }
    
    question = "A revolving door is convenient for two direction travel, but it also serves as a security measure at a what?"
    choices = ["bank", "library", "department store", "mall", "new york"]
    
    print("=" * 80)
    print("SOFT ATTENTION RANKING")
    print("=" * 80)
    
    ranked = attention_based_ranking(conceptnet_data, question, choices)
    
    for i, (choice, score, details) in enumerate(ranked, 1):
        print(f"\n{i}. {choice.upper()} (Score: {score:.4f})")
        print(f"   Direct similarity: {details['direct_similarity']:.4f}")
        print(f"   Attention score: {details['attention_score']:.4f}")
        
        if details['top_attended_concepts']:
            print(f"   Top attended concepts:")
            for concept_info in details['top_attended_concepts'][:3]:
                print(f"   - [{concept_info['relation']}] '{concept_info['concept']}'")
                print(f"     (attention: {concept_info['attention_weight']:.4f}, "
                      f"similarity: {concept_info['choice_similarity']:.4f}, "
                      f"contribution: {concept_info['contribution']:.4f})")
    
    print("\n" + "=" * 80)
    print("MULTI-HEAD ATTENTION RANKING")
    print("=" * 80)
    
    ranked_multi = multi_head_attention_ranking(conceptnet_data, question, choices, num_heads=4)
    
    for i, (choice, score, details) in enumerate(ranked_multi, 1):
        print(f"\n{i}. {choice.upper()} (Score: {score:.4f})")
        print(f"   Direct similarity: {details['direct_similarity']:.4f}")
        print(f"   Multi-head attention score: {details['multi_head_attention_score']:.4f}")
        print(f"   Individual head scores: {[f'{s:.4f}' for s in details['head_scores']]}")
    
    print("\n" + "=" * 80)
    print("Final rankings:")
    print("Soft attention:", [choice for choice, _, _ in ranked])
    print("Multi-head attention:", [choice for choice, _, _ in ranked_multi])

SOFT ATTENTION RANKING

1. MALL (Score: 0.2353)
   Direct similarity: 0.0394
   Attention score: 0.3192
   Top attended concepts:
   - [AtLocation] 'mall'
     (attention: 0.0541, similarity: 1.0000, contribution: 0.0541)
   - [AtLocation] 'department store'
     (attention: 0.0547, similarity: 0.6060, contribution: 0.0332)
   - [AtLocation] 'hotel lobby'
     (attention: 0.0601, similarity: 0.4122, contribution: 0.0248)

2. BANK (Score: 0.2352)
   Direct similarity: 0.1254
   Attention score: 0.2823
   Top attended concepts:
   - [AtLocation] 'bank'
     (attention: 0.0589, similarity: 1.0000, contribution: 0.0589)
   - [RelatedTo] 'revolving doors'
     (attention: 0.0994, similarity: 0.2250, contribution: 0.0224)
   - [AtLocation] 'mall'
     (attention: 0.0541, similarity: 0.3570, contribution: 0.0193)

3. DEPARTMENT STORE (Score: 0.1870)
   Direct similarity: 0.0512
   Attention score: 0.2452
   Top attended concepts:
   - [AtLocation] 'department store'
     (attention: 0.0547, s

Embedding based Ranking

In [50]:
from sentence_transformers import SentenceTransformer
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity


class ConceptNetRanker:
    def __init__(self, model_name="all-MiniLM-L6-v2"):
        self.model = SentenceTransformer(model_name)

    def _cosine(self, a, b):
        return cosine_similarity(a.reshape(1, -1), b.reshape(1, -1))[0][0]

    def build_conceptnet_strings(self, concept, conceptnet_data):
        """
        Convert ConceptNet relations into natural-language strings.
        Example: "revolving door AtLocation bank"
        """
        relation_texts = []
        for relation, targets in conceptnet_data.items():
            for t in targets:
                t = t[0] if isinstance(t, tuple) else t  # handle tuple (string,)
                relation_texts.append(f"{concept} {relation} {t}")
        return relation_texts

    def score(
        self,
        question: str,
        choices: list,
        concept: str,
        conceptnet_data: dict,
        w_question_choice=0.4,
        w_choice_conceptnet=0.6,
    ):
        """
        Compute combined similarity score:
        score = w1 * sim(Question, Choice) + w2 * sim(Choice, ConceptNet neighborhood)
        """

        # --- Step 1: encode embeddings ---
        emb_question = self.model.encode(question)
        emb_choices = self.model.encode(choices)

        # Build ConceptNet relation sentences
        cn_strings = self.build_conceptnet_strings(concept, conceptnet_data)
        emb_cn = self.model.encode(cn_strings)

        # --- Step 2: compute similarity ---

        # A. Question ‚Üí Choice similarity
        sim_QC = [self._cosine(emb_question, ec) for ec in emb_choices]

        # B. Choice ‚Üí ConceptNet (mean similarity to all CN relation strings)
        sim_choice_cn = []
        for ec in emb_choices:
            sims = cosine_similarity([ec], emb_cn)[0]
            sim_choice_cn.append(np.mean(sims))

        # --- Step 3: combine weighted scores ---
        final_scores = []
        for s1, s2 in zip(sim_QC, sim_choice_cn):
            final_scores.append(w_question_choice * s1 + w_choice_conceptnet * s2)

        # Return choices with scores
        ranked = sorted(zip(choices, final_scores), key=lambda x: x[1], reverse=True)
        return ranked, {
            "sim_question_choice": sim_QC,
            "sim_choice_conceptnet": sim_choice_cn,
            "scores": final_scores,
        }


In [51]:
# conceptnet_data = {
#     'RelatedTo': [('dreht√ºr'), ('revolving doors'), ('tourniquet'), ('bussola')],
#     'UsedFor': [('entering building'), ('enter building'), 
#                 ('exiting building'), ('getting into building')],
#     'AtLocation': [('lobby'), ('bank'), ('building'), 
#                    ('department store'), ('entrance to building'), 
#                    ('hotel lobby'), ('mall')],
#     'Synonym': [('dreht√ºr')]
# }
conceptnet_data = {'RelatedTo': [('dreht√ºr', 1.0),
  ('revolving doors', 1.0),
  ('tourniquet', 1.0),
  ('bussola', 1.0)],
 'UsedFor': [('entering building', 3.464),
  ('enter building', 1.0),
  ('exiting building', 1.0),
  ('getting into building', 1.0)],
 'AtLocation': [('lobby', 2.0),
  ('bank', 1.0),
  ('building', 1.0),
  ('department store', 1.0),
  ('entrance to building', 1.0),
  ('hotel lobby', 1.0),
  ('mall', 1.0)],
 'Synonym': [('dreht√ºr', 1.0)]}

question = "A revolving door is convenient for two direction travel, but it also serves as a security measure at a what?"
concept = "revolving door"
choices = [
    "bank",
    "library",
    "department store",
    "mall",
    "new york"
]

ranker = ConceptNetRanker()
ranked, debug = ranker.score(question, choices, concept, conceptnet_data)

print("Ranking:")
for c, s in ranked:
    print(f"{c:20}  score={s:.4f}")


Ranking:
bank                  score=0.1475
mall                  score=0.1400
department store      score=0.1208
library               score=0.0611
new york              score=0.0515
