In [None]:
# !pip install conceptnet_lite

Collecting conceptnet_lite
  Downloading conceptnet_lite-0.2.0-py3-none-any.whl.metadata (14 kB)
Collecting lmdb<2.0,>=1.0 (from conceptnet_lite)
  Downloading lmdb-1.7.5-cp311-cp311-win_amd64.whl.metadata (1.4 kB)
Collecting peewee<4.0,>=3.10 (from conceptnet_lite)
  Downloading peewee-3.18.3.tar.gz (3.0 MB)
     ---------------------------------------- 0.0/3.0 MB ? eta -:--:--
     ---------------------------------------- 3.0/3.0 MB 19.7 MB/s  0:00:00
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Collecting pysmartdl<2.0,>=1.3 (from conceptnet_lite)
  Downloading pySmartDL-1.3.4-py3-none-any.whl.metadata (2.8 kB)
Downloading conceptnet_lite-0.2.0-py3-none-any.whl (16 kB)
Downloading lmdb-1.7.5-cp311-cp311

In [1]:
from collections import deque
import requests
import spacy
from spacy.lang.en.stop_words import STOP_WORDS

from sentence_transformers import SentenceTransformer, util
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

# Download resources once
nltk.download('stopwords', quiet=True)
nltk.download('wordnet', quiet=True)
nltk.download('omw-1.4', quiet=True)

STOPWORDS = set(stopwords.words('english'))
LEMMATIZER = WordNetLemmatizer()


In [3]:
nlp = spacy.load("en_core_web_sm")
# embedder = SentenceTransformer("all-MiniLM-L6-v2")  # fast, small, good quality

In [4]:

def extract_keywords(question: str, max_terms: int = 3) -> list[str]:
    """
    Extracts key content words (mainly nouns) from the question.
    Cleans, lemmatizes, and removes stopwords/punctuation/numbers.
    Returns up to `max_terms` unique keywords.
    """
    if not isinstance(question, str):
        raise ValueError(f"Expected string input, got {type(question)}")

    doc = nlp(question)

    # Collect candidate tokens: prioritize noun chunks first
    candidates = []
    for chunk in doc.noun_chunks:
        root = chunk.root.lemma_.lower().strip()
        if root.isalpha() and root not in STOP_WORDS:
            candidates.append(root)

    # If too few candidates, add additional strong content words (nouns/adjectives)
    if len(candidates) < max_terms:
        for token in doc:
            if (
                token.pos_ in {"NOUN", "PROPN", "ADJ"} and
                token.is_alpha and
                token.lemma_.lower() not in STOP_WORDS
            ):
                candidates.append(token.lemma_.lower())

    # Deduplicate while preserving order
    cleaned = list(dict.fromkeys(candidates))

    return cleaned[:max_terms]


In [5]:
def get_cnet_entities(e1, e2):
    """
    Gets the entity in conceptnet most related to e1 and e2 (ideally would just be e1 and e2 themselves)
    """
    response = requests.get(f'http://api.conceptnet.io/related/c/en/{e1.strip()}')
    related_i = response.json()['related']
    related_i = [r for r in related_i if '/en/' in r['@id']]

    response = requests.get(f'http://api.conceptnet.io/related/c/en/{e2.strip()}')
    related_j = response.json()['related']
    related_j = [r for r in related_j if '/en/' in r['@id']]

    if len(related_i) == 0:
        cnet_entity1 = None
    else:
        cnet_entity1 = related_i[0]["@id"]

    if len(related_j) == 0:
        cnet_entity2 = None
    else:
        cnet_entity2 = related_j[0]["@id"]

    return cnet_entity1, cnet_entity2

In [6]:
def find_entity_path(start, end, max_depth, branching_factor):
    queue = deque()
    queue.append((start, []))
    visited = set([start])

    while queue:
        (vertex, path) = queue.popleft()
        if len(path) > max_depth:
            break

        # select branching factor number of edges
        vertex_edges = requests.get(f'http://api.conceptnet.io/{vertex}').json()['edges'][:branching_factor]

        # for each selected edge
        for edj_obj in vertex_edges:
            # construct edge with edge information
            if vertex == edj_obj['start']['@id']:
                n2 = edj_obj['end']['@id']
            else:
                n2 = edj_obj['start']['@id']

            path_edge = [{'n1': vertex, 'edge_type': edj_obj['rel']['label'], 'n2':n2, 'surface_text': edj_obj['surfaceText'].replace('[', '').replace(']', '') if edj_obj['surfaceText'] is not None else None}]
            # if end of this edge is the node to find
            if n2 == end:
                overall_path = path + path_edge
                path_text = ' '.join([e['surface_text'].capitalize() + "." if e['surface_text'] is not None else "" for e in overall_path])
                if path_text.strip() == "": return []
                else: return [path_text]
            
            # if node is not visited, add it to the queue
            elif '/c/en/' in n2 and n2 not in visited:
                visited.add(n2)
                queue.append((n2, path + path_edge))

    return []

In [7]:
question = "What is in the motorcyclist's mouth?"

keywords = extract_keywords(question, max_terms=3)
keywords

['mouth', 'motorcyclist']

In [14]:
e1, e2 = get_cnet_entities("mouth", "motorcyclist")
path_texts = find_entity_path(e1, e2, max_depth=10, branching_factor=2)
relations_text = path_texts[0] if path_texts else "No relation found."
relations_text

'No relation found.'

In [8]:
e1, e2 = get_cnet_entities("motorcyclist", "cigarette")
path_texts = find_entity_path(e1, e2, max_depth=10, branching_factor=2)
relations_text = path_texts[0] if path_texts else "No relation found."
relations_text

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [20]:
from conceptnet_lite import Label

In [21]:
keyword = "motorcyclist"

concepts = Label.get(text=keyword, language='en').concepts  

AttributeError: Cannot use uninitialized Proxy.

In [None]:
concepts

In [19]:
from conceptnet_lite import conceptnet
conceptnet.load()  # loads the default ConceptNet SQLite DB


ImportError: cannot import name 'conceptnet' from 'conceptnet_lite' (c:\InstalledApps\miniconda3\envs\my\Lib\site-packages\conceptnet_lite\__init__.py)

In [18]:
import numpy as np
from conceptnet_lite import ConceptNet
cn = ConceptNet()

def plausibility(subject, obj):
    rels = cn.get_relations_between(subject, obj)
    return len(rels)

ImportError: cannot import name 'ConceptNet' from 'conceptnet_lite' (c:\InstalledApps\miniconda3\envs\my\Lib\site-packages\conceptnet_lite\__init__.py)