In [2]:
import pickle

class Dataset:
    def __init__(self):
        self.id2node = {}
        self.node2id = {}
        self.id2rel = {}
        self.rel2id = {}
        self.node2title = {}
        self.title2node = {}

    def load_key_value_files(self, filename):
        '''
        Load key-value pairs from a file
        Args:
            filename (str): The name of the file to load.
            file_format (str): The format of the file ('pkl' or 'txt').
        Returns:
            tuple: (id2value, value2id) where:
                id2value (dict): Dictionary mapping IDs to values.
                value2id (dict): Dictionary mapping values to IDs.
        '''
        id2value = {}
        value2id = {}
        file_format = filename.split('.')[-1]
        if file_format == 'pkl':
            with open(filename, 'rb') as f:
                id2value = pickle.load(f)
        elif file_format == 'txt':
            with open(filename, 'r') as f:
                id2value = {}
                for line in f:
                    id, value = line.strip().split('\t')
                    id2value[id] = value
        else:
            raise ValueError("Unsupported file format. Use 'pkl' or 'txt'.")
        value2id = {v: k for k, v in id2value.items()}
        return id2value, value2id
    
    def set_id2node(self, filename):
        id2node, node2id = self.load_key_value_files(filename)
        self.id2node = id2node
        self.node2id = node2id
        print(f"Loaded {len(self.id2node)} nodes from {filename}.")

    def get_node_by_id(self, node_id):
        return self.id2node.get(node_id, None)
    
    def get_id_by_node(self, node):
        return self.node2id.get(node, None)

    def set_id2rel(self, filename):
        id2rel, rel2id = self.load_key_value_files(filename)
        self.id2rel = id2rel
        self.rel2id = rel2id
        print(f"Loaded {len(self.id2rel)} relations from {filename}.")

    def get_relation_by_id(self, rel_id):
        return self.id2rel.get(rel_id, None)
    
    def get_id_by_relation(self, relation):
        return self.rel2id.get(relation, None)

    def set_node2title(self, filename):
        node2title, title2node = self.load_key_value_files(filename)
        self.node_to_title = node2title
        self.title2node = title2node
        print(f"Loaded {len(self.node_to_title)} node titles from {filename}.")


    def get_title_by_node(self, node):
        return self.node_to_title.get(node, None)
    
    def get_node_by_title(self, title):
        return self.title2node.get(title, None)
    
    def get_num_nodes(self):
        return len(self.id2node)

In [3]:
import numpy as np

class Node:
    def __init__(self, name: str, id: int, title: str):
        self.id = id
        self.name = name
        self.title = title

    def get_id(self):
        return self.id

    def get_name(self):
        return self.name
    
    def get_title(self):
        return self.title

class Edge:
    def __init__(self, name:str, id: int, head: Node, tail: Node):
        self.id = id
        self.name = name
        self.head = head
        self.tail = tail

    def get_id(self):
        return self.id
    
    def get_name(self):
        return self.name

    def get_head(self):
        return self.head

    def get_tail(self):
        return self.tail

class Graph:
    def __init__(self, dataset: Dataset):
        self.dataset = dataset
        self.edges = []
    
    def add_edge(self, head: str, relation: str, tail: str, skip_missing: bool = True, add_reverse: bool = True):
        head_id = self.dataset.get_id_by_node(head)
        tail_id = self.dataset.get_id_by_node(tail)
        relation_id = self.dataset.get_id_by_relation(relation)
        if head_id is None and skip_missing:
            print(f'Node {head} not found in dataset, skipping edge')
        elif tail_id is None and skip_missing:
            print(f'Node {tail} not found in dataset, skipping edge')
        elif relation_id is None and skip_missing:
            print(f'Relation {relation} not found in dataset, skipping edge')
        else:
            head_node = Node(head, head_id, self.dataset.get_title_by_node(head))
            tail_node = Node(tail, tail_id, self.dataset.get_title_by_node(tail))
            edge = Edge(relation, relation_id, head_node, tail_node)
            self.edges.append(edge)
            if add_reverse:
                reverse_relation = f'{relation}_reverse'
                reverse_relation_id = self.dataset.get_id_by_relation(reverse_relation)
                reverse_edge = Edge(reverse_relation, reverse_relation_id, tail_node, head_node)
                self.edges.append(reverse_edge)

    def load_triples(self, filename: str, skip_missing: bool = True, add_reverse: bool = True):
        try:
            with open(filename, 'r') as f:
                for line in f:
                    head, relation, tail = line.strip().split('\t')
                    self.add_edge(head, relation, tail, skip_missing, add_reverse)
        except FileNotFoundError:
            raise ValueError(f'File {filename} not found')
        except Exception as e:
            raise ValueError(f'Error loading triples from {filename}: {e}')
        
    def get_num_edges(self):
        return len(self.edges)
    
    def get_edges(self):
        return self.edges

    def get_num_nodes(self):
        return self.dataset.get_num_nodes()

In [4]:
dataset = Dataset()

data_dir = 'data/FB15k-237'
dataset.set_id2node(f'{data_dir}/ind2ent.pkl')
dataset.set_id2rel(f'{data_dir}/ind2rel.pkl')
dataset.set_node2title(f'{data_dir}/extra/entity2text.txt')

Loaded 14505 nodes from data/FB15k-237/ind2ent.pkl.
Loaded 474 relations from data/FB15k-237/ind2rel.pkl.
Loaded 14951 node titles from data/FB15k-237/extra/entity2text.txt.


In [5]:
graph_train = Graph(dataset)
graph_train.load_triples(f'{data_dir}/train.txt', skip_missing=False, add_reverse=True)
graph_train.get_num_nodes(), graph_train.get_num_edges()

(14505, 544230)

In [6]:
graph_valid = Graph(dataset)
# add training edges to validation graph
for edge in graph_train.get_edges():
    # we set add_reverse=False because it already exists in the training graph
    graph_valid.add_edge(edge.get_head().get_name(), edge.get_name(), edge.get_tail().get_name(), skip_missing=False, add_reverse=False)
graph_valid.load_triples(f'{data_dir}/valid.txt', skip_missing=False, add_reverse=True)
graph_valid.get_num_nodes(), graph_valid.get_num_edges()

(14505, 579300)

In [7]:
graph_test = Graph(dataset)
# add training and validation edges to test graph (validation graph contains all training edges)
for edge in graph_valid.get_edges():
    # we set add_reverse=False because it already exists in the validation graph
    graph_test.add_edge(edge.get_head().get_name(), edge.get_name(), edge.get_tail().get_name(), skip_missing=False, add_reverse=False)
graph_test.load_triples(f'{data_dir}/test.txt', skip_missing=False, add_reverse=True)
graph_test.get_num_nodes(), graph_test.get_num_edges()

(14505, 620232)

In [None]:
class Query:
    def __init__(self, query_type: str, query_answer: tuple):
        self.query_type = query_type
        if len(query_answer) != 2:
            raise ValueError("Query answer must be a tuple of (query, answer)")
        elif type(query_answer[1]) is not list:
            raise ValueError("Query answer must be a tuple of (query, answer) where answer is a list")
        self.query = query_answer[0]
        self.answer = query_answer[1]

    def get_query(self):
        return self.query
    
    def get_answer(self):
        return self.answer
    
    def __repr__(self):
        return f"Query(type={self.query_type}, query={self.query}, answer={self.answer})"
    

class QueryDataset:
    def __init__(self, dataset: Dataset):
        self.dataset = dataset
        self.queries = {}

    def add_query(self, query_type: str, query_answer: tuple):
        if query_type not in self.queries:
            self.queries[query_type] = []
        query = Query(query_type, query_answer)
        self.queries[query_type].append(query)

    def get_queries(self, query_type: str):
        if query_type not in self.queries:
            raise ValueError(f"No queries of type {query_type} found")
        return self.queries[query_type]
    
    def get_all_queries(self):
        all_queries = []
        for query_type, queries in self.queries.items():
            all_queries.extend(queries)
        return all_queries
    
    def get_num_queries(self):
        return sum(len(queries) for queries in self.queries.values())
    
    def get_num_queries_by_type(self, query_type: str):
        if query_type not in self.queries:
            return 0
        return len(self.queries[query_type])
    
    def load_queries_from_pkl(self, filename: str, query_type: str = ''):
        try:
            with open(filename, 'rb') as f:
                queries = pickle.load(f)
                for query, answer in queries.items():
                    answer = list(answer)
                    self.add_query(query_type, (query, answer))
        except FileNotFoundError:
            raise ValueError(f'File {filename} not found')
        except Exception as e:
            raise ValueError(f'Error loading queries from {filename}: {e}')
        
def human_readable(query: Query, dataset: Dataset):
    if query.query_type == '2p':
        anchor = query.query[0][0]
        relations = query.query[0][1]
        rel1 = relations[0]
        rel2 = relations[1]
        anchor_name = dataset.get_node_by_id(anchor)
        rel1_name = dataset.get_relation_by_id(rel1)
        rel2_name = dataset.get_relation_by_id(rel2)
        anchor_title = dataset.get_title_by_node(anchor_name)
        answers_titles = [dataset.get_title_by_node(dataset.get_node_by_id(a)) for a in query.answer]
        print(f"Query:\n{anchor_title}\t--{rel1_name}-->\tV")
        print(f"V\t--{rel2_name}-->\t?")
        print(f"\nAnswer Set (?): \n{answers_titles}")

In [9]:
import pandas as pd

class SymbolicReasoning:
    def __init__(self, graph: Graph, logging: bool = True):
        self.graph = graph
        self.logging = logging

    def query_1p(self, head: int, relation: int):
        if self.logging:
            print(f"Querying for head: {self.graph.dataset.get_title_by_node(self.graph.dataset.get_node_by_id(head))} ({head} | {self.graph.dataset.get_node_by_id(head)}) and relation: {self.graph.dataset.get_relation_by_id(relation)} ({relation})")
        answers = []
        edges = self.graph.get_edges()
        for edge in edges:
            if edge.get_head().get_id() == head and edge.get_id() == relation:
                if self.logging:
                    print(f"Found edge: {edge.get_head().get_title()} --{edge.get_name()}--> {edge.get_tail().get_title()} ({edge.get_tail().get_id()})")
                answers.append(edge.get_tail().get_id())
        if self.logging:
            print("-" * 50)
        return list(set(answers))
    
    def query_2p(self, head: int, relations: tuple):
        first_level_answers = self.query_1p(head, relations[0])
        second_level_answers = {}
        for answer in first_level_answers:
            second_level_answers[answer] = self.query_1p(answer, relations[1])
        answers_set = set()
        for answer, second_level in second_level_answers.items():
            for item in second_level:
                answers_set.add(item)
        return second_level_answers, list(answers_set)
    
    def fixed_size_answer(self, answers: list, size: int):
        # make a dataframe which the index are answers and there is a column called score which the value is 1 for all answers
        array = np.full((len(answers), 1), 1)
        answers = np.array(answers)
        df = pd.DataFrame(array, index=answers, columns=['score'])
        if len(df) < size:
            # add random nodes to fill the size
            all_nodes = list(self.graph.dataset.id2node.keys())
            all_nodes_remaining = [node for node in all_nodes if node not in df.index]
            additional_nodes = np.random.choice(all_nodes_remaining, size - len(df), replace=False)
            additional_nodes = [int(node) for node in additional_nodes]
            # add them with score 0
            additional_df = pd.DataFrame(np.zeros((len(additional_nodes), 1)), index=additional_nodes, columns=['score'])
            df = pd.concat([df, additional_df])
        elif len(df) > size:
            # truncate the dataframe to the size
            df = df.sample(size, replace=False)
        return df

In [10]:
dir_query = 'data/FB15k-237/test_ans_2c_hard.pkl'
sample_query_type = '2p'
query_dataset = QueryDataset(dataset)
query_dataset.load_queries_from_pkl(dir_query, query_type=sample_query_type)

In [11]:
query_dataset.get_num_queries()

5000

In [12]:
sample_idx = 4000
query = query_dataset.get_queries(sample_query_type)[sample_idx]
human_readable(query, dataset)

Query:
Lamar Odom	--/education/educational_institution/students_graduates./education/education/student_reverse-->	V
V	--/education/educational_degree/people_with_this_degree./education/education/institution_reverse-->	?

Answer Set (?): 
['Doctorate']


In [13]:
def accuracy(query: Query, answers: list):
    correct_answers = set(query.get_answer())
    predicted_answers = set(answers)
    if len(correct_answers) == 0:
        return 0.0
    return len(correct_answers.intersection(predicted_answers)) / len(correct_answers)

In [14]:
reasoner_test = SymbolicReasoning(graph_train)

sample_idx = 4000
query = query_dataset.get_queries(sample_query_type)[sample_idx]
human_readable(query, dataset)

middle_steps, answers_test = reasoner_test.query_2p(query.get_query()[0][0], query.get_query()[0][1])
print(f"Answers from test graph: {middle_steps}")
print(f"Final Answers: {answers_test}")
print(f"Expected Answers: {query.get_answer()}")
print(f"Accuracy: {accuracy(query, answers_test)}")

Query:
Lamar Odom	--/education/educational_institution/students_graduates./education/education/student_reverse-->	V
V	--/education/educational_degree/people_with_this_degree./education/education/institution_reverse-->	?

Answer Set (?): 
['Doctorate']
Querying for head: Lamar Odom (12324 | /m/02_nkp) and relation: /education/educational_institution/students_graduates./education/education/student_reverse (45)
Found edge: Lamar Odom --/education/educational_institution/students_graduates./education/education/student_reverse--> University of Rhode Island (4074)
Found edge: Lamar Odom --/education/educational_institution/students_graduates./education/education/student_reverse--> University of Nevada, Las Vegas (9463)
--------------------------------------------------
Querying for head: University of Rhode Island (4074 | /m/02fjzt) and relation: /education/educational_degree/people_with_this_degree./education/education/institution_reverse (179)
Found edge: University of Rhode Island --/educ

In [15]:
reasoner_test = SymbolicReasoning(graph_test)

middle_steps, answers_test = reasoner_test.query_2p(query.get_query()[0][0], query.get_query()[0][1])
print(f"Answers from test graph: {middle_steps}")
print(f"Final Answers: {answers_test}")
print(f"Expected Answers: {query.get_answer()}")
print(f"Accuracy: {accuracy(query, answers_test)}")

Querying for head: Lamar Odom (12324 | /m/02_nkp) and relation: /education/educational_institution/students_graduates./education/education/student_reverse (45)
Found edge: Lamar Odom --/education/educational_institution/students_graduates./education/education/student_reverse--> University of Rhode Island (4074)
Found edge: Lamar Odom --/education/educational_institution/students_graduates./education/education/student_reverse--> University of Nevada, Las Vegas (9463)
--------------------------------------------------
Querying for head: University of Rhode Island (4074 | /m/02fjzt) and relation: /education/educational_degree/people_with_this_degree./education/education/institution_reverse (179)
Found edge: University of Rhode Island --/education/educational_degree/people_with_this_degree./education/education/institution_reverse--> PhD (587)
Found edge: University of Rhode Island --/education/educational_degree/people_with_this_degree./education/education/institution_reverse--> Bachelor o

In [16]:
reasoner_test.fixed_size_answer(answers_test, 10)

Unnamed: 0,score
706,1.0
587,1.0
3181,1.0
1177,1.0
1566,1.0
937,0.0
11469,0.0
13007,0.0
9515,0.0
5785,0.0


In [17]:
reasoner_train = SymbolicReasoning(graph_train, logging=False)
reasoner_valid = SymbolicReasoning(graph_valid, logging=False)
reasoner_test = SymbolicReasoning(graph_test, logging=False)

answers_train = reasoner_train.query_2p(query.get_query()[0][0], query.get_query()[0][1])[1]
answers_valid = reasoner_valid.query_2p(query.get_query()[0][0], query.get_query()[0][1])[1]
answers_test = reasoner_test.query_2p(query.get_query()[0][0], query.get_query()[0][1])[1]

print(f"Train Accuracy: {accuracy(query, answers_train)}")
print(f"Valid Accuracy: {accuracy(query, answers_valid)}")
print(f"Test Accuracy: {accuracy(query, answers_test)}")

Train Accuracy: 0.0
Valid Accuracy: 0.0
Test Accuracy: 1.0


In [18]:
import pickle

def create_cqd_file(query, original_file='data/FB15k-237/FB15k-237_test_hard.pkl', output_file='data/FB15k-237/FB15k-237_test_hard_sample1.pkl'):
    with open(original_file, 'rb') as f:
        data_hard = pickle.load(f)
    
    # remove all queries except the first one
    data_hard.type1_1chain = [data_hard.type1_1chain[0]]
    
    # replace the query with the provided one (note that the target is not important here, so we set it to 0)
    data_hard.type1_1chain[0].data['raw_chain'] = [query[0][0], query[0][1][0], [0]]
    data_hard.type1_1chain[0].data['anchors'] = [query[0][0]]
    data_hard.type1_1chain[0].data['optimisable'] = [-1, 0]
    data_hard.type1_1chain[0].data['targets'] = [0]
    
    with open(output_file, 'wb') as f:
        pickle.dump(data_hard, f)

In [19]:
query.get_query()

((12324, (45, 179)),)

In [20]:
create_cqd_file(query.get_query(), output_file='data/FB15k-237/FB15k-237_test_hard_sample.pkl')

In [23]:
import argparse
import torch
from kbc.cqd_co_xcqa import main
import pandas as pd

def cqd_query(query: Query, sample_path: str, result_path: str, k: int = 10):
    
    if sample_path is None:
        sample_path = 'data/FB15k-237/FB15k-237_test_hard_sample.pkl'
    if result_path is None:
        result_path = 'scores.pt'

    # Create a CQD file with the query
    create_cqd_file(query.get_query(), output_file=sample_path)

    # Set up the arguments for the CQD model (cqd_co_xcqa)
    args = argparse.Namespace(
        path = 'FB15k-237',
        sample_path = 'data/FB15k-237/FB15k-237_test_hard_sample.pkl',
        model_path = 'models/FB15k-237-model-rank-1000-epoch-100-1602508358.pt',
        dataset = 'FB15k-237',
        mode = 'test',
        chain_type = '1_1', # '1_1', '1_2', '2_2', '2_2_disj', '1_3', '2_3', '3_3', '4_3', '4_3_disj', '1_3_joint'
        t_norm = 'prod', # 'min', 'prod'
        reg = None,
        lr = 0.1,
        optimizer='adam', # 'adam', 'adagrad', 'sgd'
        max_steps = 1000,
        sample = True,
        result_path = result_path
    )

    # Run the CQD model
    main(args)

    # Load the scores
    scores = torch.load(result_path)
    scores_np = scores.cpu().numpy()

    # Create a DataFrame with the scores
    df = pd.DataFrame({'score': scores_np[0]})
    df = df.sort_values(by='score', ascending=False)
    
    # Get the top k answers
    top_k_answers = df.head(k)
    
    return top_k_answers

In [25]:
cqd_query(query, sample_path='data/FB15k-237/FB15k-237_test_hard_sample.pkl', result_path='scores.pt', k=10)

ComplEx(
  (embeddings): ModuleList(
    (0): Embedding(14505, 2000, sparse=True)
    (1): Embedding(474, 2000, sparse=True)
  )
)


Unnamed: 0,score
9463,10.657951
4074,10.455688
7265,6.088031
4683,5.748784
1236,5.67069
3169,5.650109
2173,5.561179
6483,5.536769
5153,5.439585
3895,5.423999


## Example Usage

In [40]:
dataset = Dataset()

data_dir = 'data/FB15k-237'
dataset.set_id2node(f'{data_dir}/ind2ent.pkl')
dataset.set_id2rel(f'{data_dir}/ind2rel.pkl')
dataset.set_node2title(f'{data_dir}/extra/entity2text.txt')

graph_train = Graph(dataset)
graph_train.load_triples(f'{data_dir}/train.txt', skip_missing=False, add_reverse=True)
graph_train.get_num_nodes(), graph_train.get_num_edges()

Loaded 14505 nodes from data/FB15k-237/ind2ent.pkl.
Loaded 474 relations from data/FB15k-237/ind2rel.pkl.
Loaded 14951 node titles from data/FB15k-237/extra/entity2text.txt.


(14505, 544230)

In [41]:
dir_query = 'data/FB15k-237/test_ans_2c_hard.pkl'
sample_query_type = '2p'
query_dataset = QueryDataset(dataset)
query_dataset.load_queries_from_pkl(dir_query, query_type=sample_query_type)

In [26]:
sample_idx = 4000
query = query_dataset.get_queries(sample_query_type)[sample_idx]
human_readable(query, dataset)

Query:
Lamar Odom	--/education/educational_institution/students_graduates./education/education/student_reverse-->	V
V	--/education/educational_degree/people_with_this_degree./education/education/institution_reverse-->	?

Answer Set (?): 
['Doctorate']


In [49]:
query

Query(type=2p, query=((12324, (45, 179)),), answer=[3181])

In [50]:
dataset.get_node_by_id(12324)

'/m/02_nkp'

In [51]:
dataset.get_title_by_node("/m/02fjzt")

'University of Rhode Island'

In [52]:
dataset.get_title_by_node("/m/01jpqb")

'University of Nevada, Las Vegas'

In [53]:
dataset.get_title_by_node("/m/04zx3q1")

'PhD'

In [None]:
dataset.get_title_by_node("/m/02h4rq6")

'Bachelor of Science'

In [55]:
dataset.get_title_by_node("/m/02_xgp2")

'Doctorate'

In [27]:
query

Query(type=2p, query=((12324, (45, 179)),), answer=[3181])

In [28]:
symolic = SymbolicReasoning(graph_train, logging=False)

In [29]:
query1 = symolic.query_1p(query.get_query()[0][0], query.get_query()[0][1][0])
query1 = symolic.fixed_size_answer(query1, 10)
query1

Unnamed: 0,score
4074,1.0
9463,1.0
12364,0.0
7627,0.0
11967,0.0
7422,0.0
4297,0.0
12665,0.0
2372,0.0
5024,0.0


In [48]:
v_list = query1.index.tolist()
for v in v_list:
    print(f"Node: {dataset.get_node_by_id(v)} | Title: {dataset.get_title_by_node(dataset.get_node_by_id(v))}")

Node: /m/02fjzt | Title: University of Rhode Island
Node: /m/01jpqb | Title: University of Nevada, Las Vegas
Node: /m/0g0vx | Title: Farmer-GB
Node: /m/01vs73g | Title: Rodney Jerkins
Node: /m/0r0ss | Title: Pomona
Node: /m/06fpsx | Title: The 40-Year-Old Virgin
Node: /m/0pmw9 | Title: Paul Shaffer
Node: /m/02gnj2 | Title: Len Wein
Node: /m/01xdn1 | Title: Goldman Sachs-US
Node: /m/01v0fn1 | Title: Keith Forsey


In [None]:
second_rel = query.get_query()[0][1][1]
final_answers = pd.DataFrame(columns=['score', 'path'])
for idx, row in query1.iterrows():
    current_query = Query('1p', (((idx, (second_rel,)),), []))
    create_cqd_file(current_query.get_query(), output_file='data/FB15k-237/FB15k-237_test_hard_sample.pkl')
    neuro_answers = cqd_query(current_query, sample_path='data/FB15k-237/FB15k-237_test_hard_sample.pkl', result_path='scores.pt', k=10)
    neuro_answers['score'] = neuro_answers['score'] * row['score']
    neuro_answers['path'] = str([(idx, float(row['score']))])
    final_answers = pd.concat([final_answers, neuro_answers])

final_answers = final_answers.sort_values(by='score', ascending=False)
final_answers.head(10)

  final_answers = pd.concat([final_answers, neuro_answers])


Unnamed: 0,score,path
1177,8.299611,"[(4074, 1.0)]"
706,8.166074,"[(4074, 1.0)]"
1566,7.979342,"[(9463, 1.0)]"
1177,7.85583,"[(9463, 1.0)]"
1566,7.546796,"[(4074, 1.0)]"
706,7.405836,"[(9463, 1.0)]"
587,7.124403,"[(4074, 1.0)]"
1019,6.903434,"[(4074, 1.0)]"
1330,6.76613,"[(4074, 1.0)]"
1019,6.62109,"[(9463, 1.0)]"


In [44]:
answers = final_answers.index.tolist()[:10]
answers

[1177, 706, 1566, 1177, 1566, 706, 587, 1019, 1330, 1019]

In [46]:
human_readable(query, dataset)

Query:
Lamar Odom	--/education/educational_institution/students_graduates./education/education/student_reverse-->	V
V	--/education/educational_degree/people_with_this_degree./education/education/institution_reverse-->	?

Answer Set (?): 
['Doctorate']


In [45]:
for answer in answers:
    print(f"Node ID: {answer}, Title: {dataset.get_title_by_node(dataset.get_node_by_id(answer))}")

Node ID: 1177, Title: Bachelor of Arts
Node ID: 706, Title: Bachelor of Science
Node ID: 1566, Title: Bachelor's degree
Node ID: 1177, Title: Bachelor of Arts
Node ID: 1566, Title: Bachelor's degree
Node ID: 706, Title: Bachelor of Science
Node ID: 587, Title: PhD
Node ID: 1019, Title: Master's Degree
Node ID: 1330, Title: Master of Arts
Node ID: 1019, Title: Master's Degree


In [None]:
def query_execution(query: Query, k: int = 10, coalition: list = None):
    for c in coalition:
        if c == 1:
            # run the CQD model
            pass
        elif c == 0:
            # run the symbolic reasoning
            pass
        else:
            raise ValueError(f"Unknown coalition type: {c}")
    