In [1]:
from graph import Dataset, Graph

In [2]:
dataset = Dataset()

data_dir = 'data/FB15k-237'
dataset.set_id2node(f'{data_dir}/ind2ent.pkl')
dataset.set_id2rel(f'{data_dir}/ind2rel.pkl')
dataset.set_node2title(f'{data_dir}/extra/entity2text.txt')

Loaded 14505 nodes from data/FB15k-237/ind2ent.pkl.
Loaded 474 relations from data/FB15k-237/ind2rel.pkl.
Loaded 14951 node titles from data/FB15k-237/extra/entity2text.txt.


In [3]:
graph_train = Graph(dataset)
graph_train.load_triples(f'{data_dir}/train.txt', skip_missing=False, add_reverse=True)
graph_train.get_num_nodes(), graph_train.get_num_edges()

(14505, 544230)

In [4]:
graph_valid = Graph(dataset)
# add training edges to validation graph
for edge in graph_train.get_edges():
    # we set add_reverse=False because it already exists in the training graph
    graph_valid.add_edge(edge.get_head().get_name(), edge.get_name(), edge.get_tail().get_name(), skip_missing=False, add_reverse=False)
graph_valid.load_triples(f'{data_dir}/valid.txt', skip_missing=False, add_reverse=True)
graph_valid.get_num_nodes(), graph_valid.get_num_edges()

(14505, 579300)

In [5]:
graph_test = Graph(dataset)
# add training and validation edges to test graph (validation graph contains all training edges)
for edge in graph_valid.get_edges():
    # we set add_reverse=False because it already exists in the validation graph
    graph_test.add_edge(edge.get_head().get_name(), edge.get_name(), edge.get_tail().get_name(), skip_missing=False, add_reverse=False)
graph_test.load_triples(f'{data_dir}/test.txt', skip_missing=False, add_reverse=True)
graph_test.get_num_nodes(), graph_test.get_num_edges()

(14505, 620232)

In [6]:
from query import Query, QueryDataset, human_readable

In [7]:
from symbolic import SymbolicReasoning

In [8]:
dir_query_2p = 'data/FB15k-237/test_ans_2c.pkl'
dir_query_3p = 'data/FB15k-237/test_ans_3c.pkl'
dir_query_2i = 'data/FB15k-237/test_ans_2i.pkl'
dir_query_2u = 'data/FB15k-237/test_ans_2u.pkl'
dir_query_3i = 'data/FB15k-237/test_ans_3i.pkl'
dir_query_pi = 'data/FB15k-237/test_ans_ci.pkl'
dir_query_ip = 'data/FB15k-237/test_ans_ic.pkl'
dir_query_up = 'data/FB15k-237/test_ans_uc.pkl'

query_dataset = QueryDataset(dataset)
query_dataset.load_queries_from_pkl(dir_query_2p, query_type='2p')
query_dataset.load_queries_from_pkl(dir_query_3p, query_type='3p')
query_dataset.load_queries_from_pkl(dir_query_2i, query_type='2i')
query_dataset.load_queries_from_pkl(dir_query_2u, query_type='2u')
query_dataset.load_queries_from_pkl(dir_query_3i, query_type='3i')
query_dataset.load_queries_from_pkl(dir_query_pi, query_type='pi')
query_dataset.load_queries_from_pkl(dir_query_ip, query_type='ip')
query_dataset.load_queries_from_pkl(dir_query_up, query_type='up')
query_dataset.get_num_queries()

40000

In [9]:
sample_idx = 4000
sample_query_type = '2p'
query = query_dataset.get_queries(sample_query_type)[sample_idx]
human_readable(query, dataset)

Query:
Lamar Odom	--/education/educational_institution/students_graduates./education/education/student_reverse-->	V
V	--/education/educational_degree/people_with_this_degree./education/education/institution_reverse-->	?

Answer Set (?): 
['Bachelor of Science', 'PhD', 'Doctorate', 'Bachelor of Arts', "Bachelor's degree"]


In [10]:
query_dataset.get_num_queries()

40000

In [11]:
dir_query_2p = 'data/FB15k-237/test_ans_2c_hard.pkl'
dir_query_3p = 'data/FB15k-237/test_ans_3c_hard.pkl'
dir_query_2i = 'data/FB15k-237/test_ans_2i_hard.pkl'
dir_query_2u = 'data/FB15k-237/test_ans_2u_hard.pkl'
dir_query_3i = 'data/FB15k-237/test_ans_3i_hard.pkl'
dir_query_pi = 'data/FB15k-237/test_ans_ci_hard.pkl'
dir_query_ip = 'data/FB15k-237/test_ans_ic_hard.pkl'
dir_query_up = 'data/FB15k-237/test_ans_uc_hard.pkl'

query_dataset_hard = QueryDataset(dataset)
query_dataset_hard.load_queries_from_pkl(dir_query_2p, query_type='2p')
query_dataset_hard.load_queries_from_pkl(dir_query_3p, query_type='3p')
query_dataset_hard.load_queries_from_pkl(dir_query_2i, query_type='2i')
query_dataset_hard.load_queries_from_pkl(dir_query_2u, query_type='2u')
query_dataset_hard.load_queries_from_pkl(dir_query_3i, query_type='3i')
query_dataset_hard.load_queries_from_pkl(dir_query_pi, query_type='pi')
query_dataset_hard.load_queries_from_pkl(dir_query_ip, query_type='ip')
query_dataset_hard.load_queries_from_pkl(dir_query_up, query_type='up')
query_dataset_hard.get_num_queries()

40000

In [12]:
sample_query_type = '2p'
sample_idx = 4000
query_hard = query_dataset_hard.get_queries(sample_query_type)[sample_idx]
human_readable(query_hard, dataset)

Query:
Lamar Odom	--/education/educational_institution/students_graduates./education/education/student_reverse-->	V
V	--/education/educational_degree/people_with_this_degree./education/education/institution_reverse-->	?

Answer Set (?): 
['Doctorate']


In [13]:
def accuracy(query: Query, answers: list):
    correct_answers = set(query.get_answer())
    predicted_answers = set(answers)
    if len(correct_answers) == 0:
        return 0.0
    return len(correct_answers.intersection(predicted_answers)) / len(correct_answers)

In [14]:
reasoner_train = SymbolicReasoning(graph_train)

sample_idx = 4000
query = query_dataset.get_queries(sample_query_type)[sample_idx]
human_readable(query, dataset)

middle_steps, answers_train = reasoner_train.query_2p(query.get_query()[0][0], query.get_query()[0][1])
print(f"Answers from train graph: {middle_steps}")
print(f"Final Answers: {answers_train}")
print(f"Expected Answers: {query.get_answer()}")
print(f"Accuracy: {accuracy(query, answers_train)}")

Query:
Lamar Odom	--/education/educational_institution/students_graduates./education/education/student_reverse-->	V
V	--/education/educational_degree/people_with_this_degree./education/education/institution_reverse-->	?

Answer Set (?): 
['Bachelor of Science', 'PhD', 'Doctorate', 'Bachelor of Arts', "Bachelor's degree"]
Querying for head: Lamar Odom (12324 | /m/02_nkp) and relation: /education/educational_institution/students_graduates./education/education/student_reverse (45)
Found edge: Lamar Odom --/education/educational_institution/students_graduates./education/education/student_reverse--> University of Rhode Island (4074)
Found edge: Lamar Odom --/education/educational_institution/students_graduates./education/education/student_reverse--> University of Nevada, Las Vegas (9463)
--------------------------------------------------
Querying for head: University of Rhode Island (4074 | /m/02fjzt) and relation: /education/educational_degree/people_with_this_degree./education/education/i

In [15]:
reasoner_test = SymbolicReasoning(graph_test)

middle_steps, answers_test = reasoner_test.query_2p(query.get_query()[0][0], query.get_query()[0][1])
print(f"Answers from test graph: {middle_steps}")
print(f"Final Answers: {answers_test}")
print(f"Expected Answers: {query.get_answer()}")
print(f"Accuracy: {accuracy(query, answers_test)}")

Querying for head: Lamar Odom (12324 | /m/02_nkp) and relation: /education/educational_institution/students_graduates./education/education/student_reverse (45)
Found edge: Lamar Odom --/education/educational_institution/students_graduates./education/education/student_reverse--> University of Rhode Island (4074)


Found edge: Lamar Odom --/education/educational_institution/students_graduates./education/education/student_reverse--> University of Nevada, Las Vegas (9463)
--------------------------------------------------
Querying for head: University of Rhode Island (4074 | /m/02fjzt) and relation: /education/educational_degree/people_with_this_degree./education/education/institution_reverse (179)
Found edge: University of Rhode Island --/education/educational_degree/people_with_this_degree./education/education/institution_reverse--> PhD (587)
Found edge: University of Rhode Island --/education/educational_degree/people_with_this_degree./education/education/institution_reverse--> Bachelor of Science (706)
Found edge: University of Rhode Island --/education/educational_degree/people_with_this_degree./education/education/institution_reverse--> Bachelor of Arts (1177)
Found edge: University of Rhode Island --/education/educational_degree/people_with_this_degree./education/education/institution_revers

In [16]:
reasoner_test.fixed_size_answer(answers_test, 10)

Unnamed: 0,score
706,1.0
587,1.0
3181,1.0
1177,1.0
1566,1.0
6003,0.0
14080,0.0
12392,0.0
1483,0.0
417,0.0


In [17]:
reasoner_train = SymbolicReasoning(graph_train, logging=False)
reasoner_valid = SymbolicReasoning(graph_valid, logging=False)
reasoner_test = SymbolicReasoning(graph_test, logging=False)

answers_train = reasoner_train.query_2p(query.get_query()[0][0], query.get_query()[0][1])[1]
answers_valid = reasoner_valid.query_2p(query.get_query()[0][0], query.get_query()[0][1])[1]
answers_test = reasoner_test.query_2p(query.get_query()[0][0], query.get_query()[0][1])[1]

print(f"Train Accuracy: {accuracy(query, answers_train)}")
print(f"Valid Accuracy: {accuracy(query, answers_valid)}")
print(f"Test Accuracy: {accuracy(query, answers_test)}")

Train Accuracy: 0.8
Valid Accuracy: 0.8
Test Accuracy: 1.0


In [18]:
from cqd import create_cqd_file

In [19]:
create_cqd_file([query], output_file='data/FB15k-237/FB15k-237_test_hard_sample.pkl')

Creating CQD file: 100%|██████████| 1/1 [00:00<00:00, 20460.02it/s]


In [20]:
from cqd import cqd_query, get_cache_prediction

In [21]:
cqd_query(query, sample_path='data/FB15k-237/FB15k-237_test_hard_sample.pkl', result_path='scores.json', k=5)

Creating CQD file: 100%|██████████| 1/1 [00:00<00:00, 16644.06it/s]


ComplEx(
  (embeddings): ModuleList(
    (0): Embedding(14505, 2000, sparse=True)
    (1): Embedding(474, 2000, sparse=True)
  )
)


100%|██████████| 1/1 [00:00<00:00,  8.30it/s]


Saving results to scores.json


Unnamed: 0,score
9463,10.657951
4074,10.455688
7265,6.088031
4683,5.748784
1236,5.67069


In [22]:
import pandas as pd

cqd_cache = pd.read_json('data/FB15k-237/all_1p_queries_top25.json', orient='records')
cqd_cache

Unnamed: 0,entity_id,relation_id,top_k_entities,top_k_scores
0,8227,402,"[1964, 7094, 5233, 3593, 6775, 4762, 7548, 577...","[1.641523718833923, 1.5853451490402222, 1.5796..."
1,8227,403,"[5145, 12787, 400, 7208, 4947, 9367, 12456, 11...","[6.992974281311035, 4.854063987731934, 4.71984..."
2,8227,404,"[1527, 7216, 4168, 226, 4994, 592, 32, 4331, 1...","[9.617685317993164, 4.58512020111084, 4.580133..."
3,8227,405,"[2037, 7859, 1779, 7421, 8288, 1104, 6393, 226...","[1.398288369178772, 1.334656715393066, 1.30624..."
4,8227,406,"[10948, 6607, 9650, 4005, 4, 863, 3202, 6386, ...","[1.6829309463500972, 1.536678314208984, 1.4762..."
...,...,...,...,...
6875365,1265,385,"[8410, 1265, 2328, 1266, 1568, 5790, 2327, 104...","[8.260475158691406, 7.059150695800781, 6.96028..."
6875366,1265,386,"[62, 4, 11, 163, 23, 344, 212, 706, 529, 9, 39...","[2.077280521392822, 1.39265489578247, 1.258444..."
6875367,1265,387,"[470, 862, 657, 6676, 818, 215, 845, 330, 8400...","[1.121290802955627, 1.064321041107177, 1.04515..."
6875368,1265,388,"[1265, 2328, 8410, 1266, 1568, 3842, 5790, 565...","[4.588125228881836, 3.088305473327636, 2.88703..."


In [23]:
cqd_query(query, k=25, cqd_cache=cqd_cache)

Unnamed: 0,score
9463,10.657951
4074,10.455688
7265,6.088031
4683,5.748784
1236,5.67069
3169,5.650109
2173,5.561179
6483,5.536769
5153,5.439585
3895,5.423999


In [24]:
from xcqa import XCQA

symbolic = SymbolicReasoning(graph_train, logging=False)
xcqa = XCQA(symbolic = symbolic, dataset=dataset, cqd_cache=cqd_cache, logging=True)

In [25]:
from shapley import shapley_value
from shapley import value_function

## Example Usage

In [24]:
sample_idx = 4000
query = query_dataset.get_queries(sample_query_type)[sample_idx]
human_readable(query, dataset)

Query:
Lamar Odom	--/education/educational_institution/students_graduates./education/education/student_reverse-->	V
V	--/education/educational_degree/people_with_this_degree./education/education/institution_reverse-->	?

Answer Set (?): 
['Bachelor of Science', 'PhD', 'Doctorate', 'Bachelor of Arts', "Bachelor's degree"]


In [24]:
symbolic = SymbolicReasoning(graph_train, logging=False)

In [26]:
sample_idx = 4000
query_hard = query_dataset_hard.get_queries(sample_query_type)[sample_idx]
query = query_dataset.get_queries(sample_query_type)[sample_idx]
easy_answers = query.get_answer()
easy_answers = [a for a in easy_answers if a not in query_hard.get_answer()]
human_readable(query_hard, dataset)
print("-"*70)
human_readable(query, dataset)

Query:
Lamar Odom	--/education/educational_institution/students_graduates./education/education/student_reverse-->	V
V	--/education/educational_degree/people_with_this_degree./education/education/institution_reverse-->	?

Answer Set (?): 
['Doctorate']
----------------------------------------------------------------------
Query:
Lamar Odom	--/education/educational_institution/students_graduates./education/education/student_reverse-->	V
V	--/education/educational_degree/people_with_this_degree./education/education/institution_reverse-->	?

Answer Set (?): 
['Bachelor of Science', 'PhD', 'Doctorate', 'Bachelor of Arts', "Bachelor's degree"]


In [27]:
query_hard

Query(type=2p, query=((12324, (45, 179)),), answer=[3181])

In [28]:
query

Query(type=2p, query=((12324, (45, 179)),), answer=[706, 587, 3181, 1177, 1566])

In [25]:
from xcqa import XCQA

symbolic = SymbolicReasoning(graph_train, logging=False)
xcqa = XCQA(symbolic = symbolic, dataset=dataset, cqd_cache=cqd_cache, logging=True)

In [30]:
xcqa.query_execution(query,k=10, coalition=[0, 0], t_norm='prod', t_conorm='min')

Time taken for first level query: 0.07 seconds
Time taken for second level query: 0.70 seconds


Unnamed: 0,score,path
1177,1.0,12324--45-->4074--179-->
706,1.0,12324--45-->4074--179-->
587,1.0,12324--45-->4074--179-->
1566,1.0,12324--45-->9463--179-->
13071,0.0,12324--45-->4074--179-->
...,...,...
14500,0.0,
14501,0.0,
14502,0.0,
14503,0.0,


In [31]:
xcqa.query_execution(query, k=10, coalition=[0, 1], t_norm='prod', t_conorm='min')

Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.19 seconds


Unnamed: 0,score,path
1177,8.299611,12324--45-->4074--179-->
706,8.166074,12324--45-->4074--179-->
1566,7.979342,12324--45-->9463--179-->
587,7.124403,12324--45-->4074--179-->
1019,6.903434,12324--45-->4074--179-->
...,...,...
14500,0.000000,
14501,0.000000,
14502,0.000000,
14503,0.000000,


In [32]:
xcqa.query_execution(query, k=10, coalition=[0, 1], t_norm='min', t_conorm='min')

Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds


Unnamed: 0,score,path
1177,1.0,12324--45-->4074--179-->
706,1.0,12324--45-->4074--179-->
1566,1.0,12324--45-->4074--179-->
587,1.0,12324--45-->4074--179-->
1019,1.0,12324--45-->4074--179-->
...,...,...
14500,0.0,
14501,0.0,
14502,0.0,
14503,0.0,


In [33]:
xcqa. query_execution(query, k=10, coalition=[1, 0], t_norm='prod', t_conorm='min')

Time taken for first level query: 0.02 seconds
Time taken for second level query: 0.54 seconds


Unnamed: 0,score,path
1177,10.657951,12324--45-->9463--179-->
1566,10.657951,12324--45-->9463--179-->
706,10.455688,12324--45-->4074--179-->
587,10.455688,12324--45-->4074--179-->
4709,6.088031,12324--45-->7265--179-->
...,...,...
14500,0.000000,
14501,0.000000,
14502,0.000000,
14503,0.000000,


In [34]:
xcqa.query_execution(query,  k=10, coalition=[1, 1], t_norm='prod', t_conorm='min')

Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.15 seconds


Unnamed: 0,score,path
1177,86.778140,12324--45-->4074--179-->
706,85.381916,12324--45-->4074--179-->
1566,85.043434,12324--45-->9463--179-->
587,74.490537,12324--45-->4074--179-->
1019,72.180147,12324--45-->4074--179-->
...,...,...
14500,0.000000,
14501,0.000000,
14502,0.000000,
14503,0.000000,


In [26]:
from shapley import value_function

In [36]:
xcqa = XCQA(symbolic=symbolic, dataset=dataset, cqd_cache=cqd_cache, logging=True)

In [37]:
value_function(xcqa, query, easy_answers, target_entity=0, qoi='rank', k=10, coalition=[0, 0], t_norm='prod', t_conorm='min')

0

In [38]:
value_function(xcqa, query, easy_answers, target_entity=0, qoi='rank', k=10, coalition=[0, 1], t_norm='prod', t_conorm='min')

Time taken for first level query: 0.07 seconds
Time taken for second level query: 0.19 seconds


26

In [39]:
value_function(xcqa, query, easy_answers, target_entity=0, qoi='rank', k=10, coalition=[1, 0], t_norm='prod', t_conorm='min')

Time taken for first level query: 0.02 seconds
Time taken for second level query: 0.68 seconds


64

In [40]:
value_function(xcqa, query, easy_answers, target_entity=0, qoi='rank', k=10, coalition=[1, 1], t_norm='prod', t_conorm='min')

Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.15 seconds


12

In [None]:
from shapley import shapley_value

In [42]:
xcqa = XCQA(symbolic=symbolic, dataset=dataset, cqd_cache=cqd_cache, logging=True)

Shapley value of the first player (atom 0):

In [43]:
shapley_value(xcqa, query, atom_idx=0, easy_answers=easy_answers, target_entity=query_hard.answer[0], qoi='rank', k=10, t_norm='prod', t_conorm='min')

Coalition: [0, 0], Atom Index: 0
Time taken for first level query: 0.02 seconds
Time taken for second level query: 0.67 seconds
Coalition: [0, 0], Contribution: 6 (before adding atom: 0, after adding atom: 6), weight: 0.5)
Coalition: [0, 1], Atom Index: 0
Time taken for first level query: 0.07 seconds
Time taken for second level query: 0.19 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.15 seconds
Coalition: [0, 1], Contribution: 0 (before adding atom: 4, after adding atom: 4), weight: 0.5)
Shapley value for atom 0: 3.0


3.0

Shapley value of the second player (atom 1):

In [44]:
shapley_value(xcqa, query, atom_idx=1, easy_answers=easy_answers, target_entity=query_hard.answer[0], qoi='rank', k=10, t_norm='prod', t_conorm='min')

Coalition: [0, 0], Atom Index: 1
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Coalition: [0, 0], Contribution: 4 (before adding atom: 0, after adding atom: 4), weight: 0.5)
Coalition: [1, 0], Atom Index: 1
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Coalition: [1, 0], Contribution: -2 (before adding atom: 6, after adding atom: 4), weight: 0.5)
Shapley value for atom 1: 1.0


1.0

Value function when every atom is present in the coalition:

In [45]:
value_function(xcqa, query, easy_answers, target_entity=query_hard.answer[0], qoi='rank', k=10, coalition=[1, 1], t_norm='prod', t_conorm='min')

Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds


4

## Shapley Value Examples

### Shapley Value for 2p

In [46]:
xcqa = XCQA(symbolic=symbolic, dataset=dataset, cqd_cache=cqd_cache, logging=True)

In [47]:
sample_idx = 1000
sample_query_type = '2p'
query_hard = query_dataset_hard.get_queries(sample_query_type)[sample_idx]
query = query_dataset.get_queries(sample_query_type)[sample_idx]
easy_answers = query.get_answer()
easy_answers = [a for a in easy_answers if a not in query_hard.get_answer()]
human_readable(query_hard, dataset)
print("-"*70)
human_readable(query, dataset)

Query:
Perkin Medal	--/award/award_category/winners./award/award_honor/award_winner-->	V
V	--/people/ethnicity/people_reverse-->	?

Answer Set (?): 
['African American']
----------------------------------------------------------------------
Query:
Perkin Medal	--/award/award_category/winners./award/award_honor/award_winner-->	V
V	--/people/ethnicity/people_reverse-->	?

Answer Set (?): 
['African American']


In [48]:
cqd_result = xcqa.query_execution(query, k=10, coalition=[1, 1], t_norm='prod', t_conorm='prod')
human_df = cqd_result.copy()
human_df['title'] = human_df.index.map(dataset.id2node)
human_df['title'] = human_df['title'].map(dataset.get_title_by_node)
human_df['is_easy_answer'] = human_df.index.isin(easy_answers)
human_df['is_hard_answer'] = human_df.index.isin(query_hard.get_answer())
human_df

Time taken for first level query: 0.02 seconds
Time taken for second level query: 0.18 seconds


Unnamed: 0,score,path,title,is_easy_answer,is_hard_answer
1253,59.608866,14483--94-->4165--143-->,Irish American,False,False
879,59.601220,14483--94-->4165--143-->,Jewish people,False,False
11038,55.189793,14483--94-->4165--143-->,Mexican American,False,False
3324,55.045102,14483--94-->4165--143-->,White American,False,False
2279,53.064225,14483--94-->4165--143-->,African American,False,True
...,...,...,...,...,...
14500,0.000000,,"Strategic Simulations, Inc.",False,False
14501,0.000000,,House of Plantagenet,False,False
14502,0.000000,,Humour,False,False
14503,0.000000,,Modernism,False,False


Let's explain the true hard answer which the model could predict it correctly.

In [49]:
target = 'African American'
target_entity = dataset.get_node_by_title(target)
target_id = dataset.get_id_by_node(target_entity)
print(f"Target entity: {target} ({target_entity}) with ID {target_id}")

Target entity: African American (/m/0x67) with ID 2279


In [50]:
atom_idx = 0
shapley_value(xcqa, query, atom_idx=atom_idx, easy_answers=easy_answers, target_entity=target_id, qoi='rank', k=10, t_norm='prod', t_conorm='prod')

Coalition: [0, 0], Atom Index: 0
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.69 seconds
Coalition: [0, 0], Contribution: 2367 (before adding atom: 0, after adding atom: 2367), weight: 0.5)
Coalition: [0, 1], Atom Index: 0
Time taken for first level query: 0.07 seconds
Time taken for second level query: 0.17 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Coalition: [0, 1], Contribution: 0 (before adding atom: 4, after adding atom: 4), weight: 0.5)
Shapley value for atom 0: 1183.5


1183.5

In [51]:
atom_idx = 1
shapley_value(xcqa, query, atom_idx=atom_idx, easy_answers=easy_answers, target_entity=target_id, qoi='rank', k=10, t_norm='prod', t_conorm='prod')

Coalition: [0, 0], Atom Index: 1
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Coalition: [0, 0], Contribution: 4 (before adding atom: 0, after adding atom: 4), weight: 0.5)
Coalition: [1, 0], Atom Index: 1
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Coalition: [1, 0], Contribution: -2363 (before adding atom: 2367, after adding atom: 4), weight: 0.5)
Shapley value for atom 1: -1179.5


-1179.5

Now, let's look at the explanation for another target entity, which is not in the answer set of the query.

In [52]:
target = "Italian American"
target_entity = dataset.get_node_by_title(target)
target_id = dataset.get_id_by_node(target_entity)
print(f"Target entity: {target} ({target_entity}) with ID {target_id}")

Target entity: Italian American (/m/0xnvg) with ID 3330


In [53]:
atom_idx = 0
shapley_value(xcqa, query, atom_idx=atom_idx, easy_answers=easy_answers, target_entity=target_id, qoi='rank', k=10, t_norm='prod', t_conorm='prod')

Coalition: [0, 0], Atom Index: 0
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Coalition: [0, 0], Contribution: 3410 (before adding atom: 0, after adding atom: 3410), weight: 0.5)
Coalition: [0, 1], Atom Index: 0
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Coalition: [0, 1], Contribution: 0 (before adding atom: 7, after adding atom: 7), weight: 0.5)
Shapley value for atom 0: 1705.0


1705.0

In [54]:
atom_idx = 1
shapley_value(xcqa, query, atom_idx=atom_idx, easy_answers=easy_answers, target_entity=target_id, qoi='rank', k=10, t_norm='prod', t_conorm='prod')

Coalition: [0, 0], Atom Index: 1
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Coalition: [0, 0], Contribution: 7 (before adding atom: 0, after adding atom: 7), weight: 0.5)
Coalition: [1, 0], Atom Index: 1
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Coalition: [1, 0], Contribution: -3403 (before adding atom: 3410, after adding atom: 7), weight: 0.5)
Shapley value for atom 1: -1698.0


-1698.0

Let's look at the explanation when we use `min` as t-norm.

In [55]:
cqd_result = xcqa.query_execution(query, k=10, coalition=[1, 1], t_norm='min', t_conorm='max')
human_df = cqd_result.copy()
human_df['title'] = human_df.index.map(dataset.id2node)
human_df['title'] = human_df['title'].map(dataset.get_title_by_node)
human_df['is_easy_answer'] = human_df.index.isin(easy_answers)
human_df['is_hard_answer'] = human_df.index.isin(query_hard.get_answer())
human_df

Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds


Unnamed: 0,score,path,title,is_easy_answer,is_hard_answer
1253,5.624588,14483--94-->4165--143-->,Irish American,False,False
879,5.623867,14483--94-->4165--143-->,Jewish people,False,False
11038,5.207612,14483--94-->4165--143-->,Mexican American,False,False
3324,5.193959,14483--94-->4165--143-->,White American,False,False
2279,5.007047,14483--94-->4165--143-->,African American,False,True
...,...,...,...,...,...
14500,0.000000,,"Strategic Simulations, Inc.",False,False
14501,0.000000,,House of Plantagenet,False,False
14502,0.000000,,Humour,False,False
14503,0.000000,,Modernism,False,False


In [56]:
target = 'African American'
target_entity = dataset.get_node_by_title(target)
target_id = dataset.get_id_by_node(target_entity)
print(f"Target entity: {target} ({target_entity}) with ID {target_id}")

Target entity: African American (/m/0x67) with ID 2279


In [57]:
atom_idx = 0
shapley_value(xcqa, query, atom_idx=atom_idx, easy_answers=easy_answers, target_entity=target_id, qoi='rank', k=10, t_norm='prod', t_conorm='prod')

Coalition: [0, 0], Atom Index: 0
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds


Coalition: [0, 0], Contribution: 2367 (before adding atom: 0, after adding atom: 2367), weight: 0.5)
Coalition: [0, 1], Atom Index: 0
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Coalition: [0, 1], Contribution: 0 (before adding atom: 4, after adding atom: 4), weight: 0.5)
Shapley value for atom 0: 1183.5


1183.5

In [58]:
atom_idx = 1
shapley_value(xcqa, query, atom_idx=atom_idx, easy_answers=easy_answers, target_entity=target_id, qoi='rank', k=10, t_norm='prod', t_conorm='prod')

Coalition: [0, 0], Atom Index: 1
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Coalition: [0, 0], Contribution: 4 (before adding atom: 0, after adding atom: 4), weight: 0.5)
Coalition: [1, 0], Atom Index: 1
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Coalition: [1, 0], Contribution: -2363 (before adding atom: 2367, after adding atom: 4), weight: 0.5)
Shapley value for atom 1: -1179.5


-1179.5

### Shapley Value for 3p

In [59]:
xcqa = XCQA(symbolic=symbolic, dataset=dataset, cqd_cache=cqd_cache, logging=True)

In [60]:
sample_query_type = '3p'
sample_idx = 1000
query_hard = query_dataset_hard.get_queries(sample_query_type)[sample_idx]
query = query_dataset.get_queries(sample_query_type)[sample_idx]
easy_answers = query.get_answer()
easy_answers = [a for a in easy_answers if a not in query_hard.get_answer()]
human_readable(query_hard, dataset)
print("-"*70)
human_readable(query, dataset)

Query:
Winston-Salem	--/location/hud_county_place/place_reverse-->	V1
V1	--/base/biblioness/bibs_location/country-->	V2
V2	--/location/statistical_region/places_exported_to./location/imports_and_exports/exported_to-->	?

Answer Set (?): 
['Angola', 'Anguilla', 'Australia', 'Netherlands', 'Afghanistan', 'Japan', 'Singapore', 'Sierra Leone', 'Germany', 'Algeria', 'Brazil', 'China', 'Mexico', 'Taiwan', 'Belgium', 'South Korea', 'Switzerland', 'Dubai', 'United Kingdom']
----------------------------------------------------------------------
Query:
Winston-Salem	--/location/hud_county_place/place_reverse-->	V1
V1	--/base/biblioness/bibs_location/country-->	V2
V2	--/location/statistical_region/places_exported_to./location/imports_and_exports/exported_to-->	?

Answer Set (?): 
['Angola', 'Anguilla', 'Australia', 'Netherlands', 'Afghanistan', 'Japan', 'Singapore', 'Sierra Leone', 'Germany', 'Algeria', 'Brazil', 'China', 'Mexico', 'Taiwan', 'Belgium', 'South Korea', 'Switzerland', 'Dubai', 'Unit

In [61]:
cqd_result = xcqa.query_execution(query, k=10, coalition=[0, 0, 0], t_norm='prod', t_conorm='min')
human_df = cqd_result.copy()
human_df['title'] = human_df.index.map(dataset.id2node)
human_df['title'] = human_df['title'].map(dataset.get_title_by_node)
human_df['is_easy_answer'] = human_df.index.isin(easy_answers)
human_df['is_hard_answer'] = human_df.index.isin(query_hard.get_answer())
human_df

Time taken for first level query: 0.07 seconds
Time taken for second level query: 7.68 seconds


Unnamed: 0,score,path,title,is_easy_answer,is_hard_answer
4840,0.0,5561--113-->4508--266-->4636--436-->,Maxis,False,False
8351,0.0,5561--113-->5561--266-->711--436-->,Brown,False,False
9787,0.0,5561--113-->5561--266-->711--436-->,Cardiac arrest,False,False
9152,0.0,5561--113-->5561--266-->711--436-->,Gotha,False,False
2142,0.0,5561--113-->5561--266-->711--436-->,The Man Who Knew Too Much,False,False
...,...,...,...,...,...
14500,0.0,,"Strategic Simulations, Inc.",False,False
14501,0.0,,House of Plantagenet,False,False
14502,0.0,,Humour,False,False
14503,0.0,,Modernism,False,False


In [62]:
cqd_result = xcqa.query_execution(query, k=10, coalition=[1, 1, 1], t_norm='prod', t_conorm='min')
human_df = cqd_result.copy()
human_df['title'] = human_df.index.map(dataset.id2node)
human_df['title'] = human_df['title'].map(dataset.get_title_by_node)
human_df['is_easy_answer'] = human_df.index.isin(easy_answers)
human_df['is_hard_answer'] = human_df.index.isin(query_hard.get_answer())
human_df

Time taken for first level query: 0.02 seconds
Time taken for second level query: 0.79 seconds


Unnamed: 0,score,path,title,is_easy_answer,is_hard_answer
5138,620.009017,5561--113-->5561--266-->32--436-->,Anguilla,False,True
6150,613.922489,5561--113-->5561--266-->32--436-->,Angola,False,True
3100,608.222374,5561--113-->5561--266-->32--436-->,Afghanistan,False,True
190,601.928367,5561--113-->5561--266-->32--436-->,Algeria,False,True
32,594.299743,5561--113-->5561--266-->32--436-->,United States of America,False,False
...,...,...,...,...,...
14500,0.000000,,"Strategic Simulations, Inc.",False,False
14501,0.000000,,House of Plantagenet,False,False
14502,0.000000,,Humour,False,False
14503,0.000000,,Modernism,False,False


In [63]:
target = 'Anguilla'
target_entity = dataset.get_node_by_title(target)
target_id = dataset.get_id_by_node(target_entity)
print(f"Target entity: {target} ({target_entity}) with ID {target_id}")

Target entity: Anguilla (/m/0n3g) with ID 5138


In [64]:
atom_idx = 0
shapley_value(xcqa, query, atom_idx=atom_idx, easy_answers=easy_answers, target_entity=target_id, qoi='rank', k=10, t_norm='prod', t_conorm='min')

Coalition: [0, 0, 0], Atom Index: 0
Time taken for first level query: 0.00 seconds
Time taken for second level query: 6.70 seconds
Coalition: [0, 0, 0], Contribution: 4 (before adding atom: 0, after adding atom: 4), weight: 0.3333333333333333)
Coalition: [0, 0, 1], Atom Index: 0
Time taken for first level query: 0.00 seconds
Time taken for second level query: 2.06 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 1.72 seconds
Coalition: [0, 0, 1], Contribution: -5716 (before adding atom: 5716, after adding atom: 0), weight: 0.16666666666666666)
Coalition: [0, 1, 0], Atom Index: 0
Time taken for first level query: 0.00 seconds
Time taken for second level query: 3.15 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.73 seconds
Coalition: [0, 1, 0], Contribution: -3 (before adding atom: 8, after adding atom: 5), weight: 0.16666666666666666)
Coalition: [0, 1, 1], Atom Index: 0
Time taken for first level query

-951.8333333333333

In [65]:
atom_idx = 1
shapley_value(xcqa, query, atom_idx=atom_idx, easy_answers=easy_answers, target_entity=target_id, qoi='rank', k=10, t_norm='prod', t_conorm='min')

Coalition: [0, 0, 0], Atom Index: 1
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.04 seconds
Coalition: [0, 0, 0], Contribution: 8 (before adding atom: 0, after adding atom: 8), weight: 0.3333333333333333)
Coalition: [0, 0, 1], Atom Index: 1
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.04 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.04 seconds
Coalition: [0, 0, 1], Contribution: -5716 (before adding atom: 5716, after adding atom: 0), weight: 0.16666666666666666)
Coalition: [1, 0, 0], Atom Index: 1
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.04 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.04 seconds
Coalition: [1, 0, 0], Contribution: 1 (before adding atom: 4, after adding atom: 5), weight: 0.16666666666666666)
Coalition: [1, 0, 1], Atom Index: 1
Time taken for first level query:

-949.8333333333334

In [66]:
atom_idx = 2
shapley_value(xcqa, query, atom_idx=atom_idx, easy_answers=easy_answers, target_entity=target_id, qoi='rank', k=10, t_norm='prod', t_conorm='min')

Coalition: [0, 0, 0], Atom Index: 2
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.04 seconds
Coalition: [0, 0, 0], Contribution: 5716 (before adding atom: 0, after adding atom: 5716), weight: 0.3333333333333333)
Coalition: [0, 1, 0], Atom Index: 2
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.04 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.04 seconds
Coalition: [0, 1, 0], Contribution: -8 (before adding atom: 8, after adding atom: 0), weight: 0.16666666666666666)
Coalition: [1, 0, 0], Atom Index: 2
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.04 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.04 seconds
Coalition: [1, 0, 0], Contribution: -4 (before adding atom: 4, after adding atom: 0), weight: 0.16666666666666666)
Coalition: [1, 1, 0], Atom Index: 2
Time taken for first level query

1901.6666666666665

### Shapley Value for 2u

In [67]:
xcqa = XCQA(symbolic=symbolic, dataset=dataset, cqd_cache=cqd_cache, logging=True)

In [68]:
sample_query_type = '2u'
sample_idx = 40
query_hard = query_dataset_hard.get_queries(sample_query_type)[sample_idx]
query = query_dataset.get_queries(sample_query_type)[sample_idx]
easy_answers = query.get_answer()
easy_answers = [a for a in easy_answers if a not in query_hard.get_answer()]
human_readable(query_hard, dataset)
print("-"*70)
human_readable(query, dataset)

Query:
Michel Legrand	--/people/person/profession-->	V1
Vitamin B-12	--/food/food/nutrients./food/nutrition_fact/nutrient_reverse-->	V2
V1	OR	V2	-->	?

Answer Set (?): 
['Conductor', 'Chicken meat']
----------------------------------------------------------------------
Query:
Michel Legrand	--/people/person/profession-->	V1
Vitamin B-12	--/food/food/nutrients./food/nutrition_fact/nutrient_reverse-->	V2
V1	OR	V2	-->	?

Answer Set (?): 
['Composer', 'Pasta', 'Ice cream', 'Beef', 'Conductor', 'Chicken meat', 'Milk', 'Pork', 'Egg', 'Cheese', 'Pianist-GB']


In [69]:
cqd_result = xcqa.query_execution(query, k=10, coalition=[1, 1], t_norm='prod', t_conorm='prod')
human_df = cqd_result.copy()
human_df['title'] = human_df.index.map(dataset.id2node)
human_df['title'] = human_df['title'].map(dataset.get_title_by_node)
human_df['is_easy_answer'] = human_df.index.isin(easy_answers)
human_df['is_hard_answer'] = human_df.index.isin(query_hard.get_answer())
human_df

Time taken for first level query: 0.02 seconds
Time taken for second level query: 0.02 seconds


Unnamed: 0,path_1,path_2,score,path,title,is_easy_answer,is_hard_answer
10329,0,13927--275-->,7.837002,,Pork,True,False
5883,0,13927--275-->,7.825286,,Egg,True,False
5756,0,13927--275-->,7.823646,,Cheese,True,False
10844,3873--34-->,0,7.803871,,Pianist-GB,True,False
9450,0,13927--275-->,7.675278,,Pasta,True,False
...,...,...,...,...,...,...,...
14500,,,0.000000,,"Strategic Simulations, Inc.",False,False
14501,,,0.000000,,House of Plantagenet,False,False
14502,,,0.000000,,Humour,False,False
14503,,,0.000000,,Modernism,False,False


In [70]:
target = 'Chicken meat'
target_entity = dataset.get_node_by_title(target)
target_id = dataset.get_id_by_node(target_entity)
print(f"Target entity: {target} ({target_entity}) with ID {target_id}")

Target entity: Chicken meat (/m/0f25w9) with ID 1746


In [71]:
xcqa = XCQA(symbolic=symbolic, dataset=dataset, cqd_cache=cqd_cache, logging=True)

In [72]:
atom_idx = 0
shapley_value(xcqa, query, atom_idx=atom_idx, easy_answers=easy_answers, target_entity=target_id, qoi='rank', k=10, t_norm='prod', t_conorm='prod')

Coalition: [0, 0], Atom Index: 0
Time taken for first level query: 0.02 seconds


Time taken for second level query: 0.07 seconds
Coalition: [0, 0], Contribution: 1752 (before adding atom: 0, after adding atom: 1752), weight: 0.5)
Coalition: [0, 1], Atom Index: 0
Time taken for first level query: 0.07 seconds
Time taken for second level query: 0.02 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Coalition: [0, 1], Contribution: 0 (before adding atom: 0, after adding atom: 0), weight: 0.5)
Shapley value for atom 0: 876.0


876.0

In [73]:
atom_idx = 1
shapley_value(xcqa, query, atom_idx=atom_idx, easy_answers=easy_answers, target_entity=target_id, qoi='rank', k=10, t_norm='prod', t_conorm='prod')

Coalition: [0, 0], Atom Index: 1
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Coalition: [0, 0], Contribution: 0 (before adding atom: 0, after adding atom: 0), weight: 0.5)
Coalition: [1, 0], Atom Index: 1
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Coalition: [1, 0], Contribution: -1752 (before adding atom: 1752, after adding atom: 0), weight: 0.5)
Shapley value for atom 1: -876.0


-876.0

In [74]:
cqd_result = xcqa.query_execution(query, k=10, coalition=[1, 1], t_norm='min', t_conorm='max')
human_df = cqd_result.copy()
human_df['title'] = human_df.index.map(dataset.id2node)
human_df['title'] = human_df['title'].map(dataset.get_title_by_node)
human_df['is_easy_answer'] = human_df.index.isin(easy_answers)
human_df['is_hard_answer'] = human_df.index.isin(query_hard.get_answer())
human_df

Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds


Unnamed: 0,path_1,path_2,score,path,title,is_easy_answer,is_hard_answer
10329,0,13927--275-->,7.837002,,Pork,True,False
5883,0,13927--275-->,7.825286,,Egg,True,False
5756,0,13927--275-->,7.823646,,Cheese,True,False
10844,3873--34-->,0,7.803871,,Pianist-GB,True,False
9450,0,13927--275-->,7.675278,,Pasta,True,False
...,...,...,...,...,...,...,...
14500,,,0.000000,,"Strategic Simulations, Inc.",False,False
14501,,,0.000000,,House of Plantagenet,False,False
14502,,,0.000000,,Humour,False,False
14503,,,0.000000,,Modernism,False,False


In [75]:
atom_idx = 0
shapley_value(xcqa, query, atom_idx=atom_idx, easy_answers=easy_answers, target_entity=target_id, qoi='rank', k=10, t_norm='min', t_conorm='max')

Coalition: [0, 0], Atom Index: 0
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Coalition: [0, 0], Contribution: 1752 (before adding atom: 0, after adding atom: 1752), weight: 0.5)
Coalition: [0, 1], Atom Index: 0
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Coalition: [0, 1], Contribution: 0 (before adding atom: 0, after adding atom: 0), weight: 0.5)
Shapley value for atom 0: 876.0


876.0

In [76]:
atom_idx = 1
shapley_value(xcqa, query, atom_idx=atom_idx, easy_answers=easy_answers, target_entity=target_id, qoi='rank', k=10, t_norm='min', t_conorm='max')

Coalition: [0, 0], Atom Index: 1
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds


Coalition: [0, 0], Contribution: 0 (before adding atom: 0, after adding atom: 0), weight: 0.5)
Coalition: [1, 0], Atom Index: 1
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Coalition: [1, 0], Contribution: -1752 (before adding atom: 1752, after adding atom: 0), weight: 0.5)
Shapley value for atom 1: -876.0


-876.0

### Shapley Value for 2i

In [77]:
xcqa = XCQA(symbolic=symbolic, dataset=dataset, cqd_cache=cqd_cache, logging=True)

In [78]:
sample_query_type = '2i'
sample_idx = 1000
query_hard = query_dataset_hard.get_queries(sample_query_type)[sample_idx]
query = query_dataset.get_queries(sample_query_type)[sample_idx]
easy_answers = query.get_answer()
easy_answers = [a for a in easy_answers if a not in query_hard.get_answer()]
human_readable(query_hard, dataset)
print("-"*70)
human_readable(query, dataset)

Query:
Roots reggae	--/music/genre/parent_genre-->	V1
Red Hot Chili Peppers	--/music/genre/artists_reverse-->	V2
V1	AND	V2	-->	?

Answer Set (?): 
['Reggae']
----------------------------------------------------------------------
Query:
Roots reggae	--/music/genre/parent_genre-->	V1
Red Hot Chili Peppers	--/music/genre/artists_reverse-->	V2
V1	AND	V2	-->	?

Answer Set (?): 
['Reggae', 'Ska']


In [79]:
query

Query(type=2i, query=((13677, (104,)), (7779, (41,))), answer=[184, 5825])

In [80]:
def answer_readable(answers, dataset):
    return [dataset.get_title_by_node(dataset.get_node_by_id(a)) for a in answers]

In [81]:
answer_readable(symbolic.query_1p(13677, 104), dataset)

['Ska']

In [82]:
answer_readable(symbolic.query_1p(7779, 41), dataset)

['Ska',
 'Classic rock',
 'Funk metal',
 'Psychedelic rock',
 'Alternative rock',
 'Rap rock',
 'Reggae',
 'Punk rock',
 'Funk rock',
 'Hard rock']

In [83]:
cqd_result = xcqa.query_execution(query, k=10, coalition=[0, 0], t_norm='prod', t_conorm='prod')
human_df = cqd_result.copy()
human_df['title'] = human_df.index.map(dataset.id2node)
human_df['title'] = human_df['title'].map(dataset.get_title_by_node)
human_df['is_easy_answer'] = human_df.index.isin(easy_answers)
human_df['is_hard_answer'] = human_df.index.isin(query_hard.get_answer())
human_df

Time taken for first level query: 0.07 seconds
Time taken for second level query: 0.06 seconds


Unnamed: 0,path_1,path_2,score,path,title,is_easy_answer,is_hard_answer
5825,13677--104-->,7779--41-->,1.0,,Ska,True,False
200,0,7779--41-->,0.0,,Alternative rock,False,False
184,0,7779--41-->,0.0,,Reggae,False,True
831,0,7779--41-->,0.0,,Hard rock,False,False
1929,13677--104-->,0,0.0,,Defensive end,False,False
...,...,...,...,...,...,...,...
14500,,,0.0,,"Strategic Simulations, Inc.",False,False
14501,,,0.0,,House of Plantagenet,False,False
14502,,,0.0,,Humour,False,False
14503,,,0.0,,Modernism,False,False


In [84]:
cqd_result = xcqa.query_execution(query, k=10, coalition=[1, 1], t_norm='prod', t_conorm='prod')
human_df = cqd_result.copy()
human_df['title'] = human_df.index.map(dataset.id2node)
human_df['title'] = human_df['title'].map(dataset.get_title_by_node)
human_df['is_easy_answer'] = human_df.index.isin(easy_answers)
human_df['is_hard_answer'] = human_df.index.isin(query_hard.get_answer())
human_df

Time taken for first level query: 0.02 seconds
Time taken for second level query: 0.02 seconds


Unnamed: 0,path_1,path_2,score,path,title,is_easy_answer,is_hard_answer
5825,13677--104-->,7779--41-->,75.102507,,Ska,True,False
5658,13677--104-->,7779--41-->,38.702916,,Punk rock,False,False
185,13677--104-->,0,0.000000,,World music,False,False
184,13677--104-->,0,0.000000,,Reggae,False,True
200,0,7779--41-->,0.000000,,Alternative rock,False,False
...,...,...,...,...,...,...,...
14500,,,0.000000,,"Strategic Simulations, Inc.",False,False
14501,,,0.000000,,House of Plantagenet,False,False
14502,,,0.000000,,Humour,False,False
14503,,,0.000000,,Modernism,False,False


In [85]:
target = 'Punk rock'
target_entity = dataset.get_node_by_title(target)
target_id = dataset.get_id_by_node(target_entity)
print(f"Target entity: {target} ({target_entity}) with ID {target_id}")

Target entity: Punk rock (/m/05r6t) with ID 5658


In [86]:
atom_idx = 0
shapley_value(xcqa, query, atom_idx=atom_idx, easy_answers=easy_answers, target_entity=target_id, qoi='rank', k=10, t_norm='prod', t_conorm='prod')

Coalition: [0, 0], Atom Index: 0
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Coalition: [0, 0], Contribution: 1 (before adding atom: 0, after adding atom: 1), weight: 0.5)
Coalition: [0, 1], Atom Index: 0
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Coalition: [0, 1], Contribution: -7 (before adding atom: 7, after adding atom: 0), weight: 0.5)
Shapley value for atom 0: -3.0


-3.0

In [87]:
atom_idx = 1
shapley_value(xcqa, query, atom_idx=atom_idx, easy_answers=easy_answers, target_entity=target_id, qoi='rank', k=10, t_norm='prod', t_conorm='prod')

Coalition: [0, 0], Atom Index: 1
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Coalition: [0, 0], Contribution: 7 (before adding atom: 0, after adding atom: 7), weight: 0.5)
Coalition: [1, 0], Atom Index: 1
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Coalition: [1, 0], Contribution: -1 (before adding atom: 1, after adding atom: 0), weight: 0.5)
Shapley value for atom 1: 3.0


3.0

In [88]:
cqd_result = xcqa.query_execution(query, k=10, coalition=[1, 1], t_norm='min', t_conorm='max')
human_df = cqd_result.copy()
human_df['title'] = human_df.index.map(dataset.id2node)
human_df['title'] = human_df['title'].map(dataset.get_title_by_node)
human_df['is_easy_answer'] = human_df.index.isin(easy_answers)
human_df['is_hard_answer'] = human_df.index.isin(query_hard.get_answer())
human_df

Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds


Unnamed: 0,path_1,path_2,score,path,title,is_easy_answer,is_hard_answer
5825,13677--104-->,7779--41-->,7.169825,,Ska,True,False
5658,13677--104-->,7779--41-->,5.042561,,Punk rock,False,False
185,13677--104-->,0,0.000000,,World music,False,False
184,13677--104-->,0,0.000000,,Reggae,False,True
200,0,7779--41-->,0.000000,,Alternative rock,False,False
...,...,...,...,...,...,...,...
14500,,,0.000000,,"Strategic Simulations, Inc.",False,False
14501,,,0.000000,,House of Plantagenet,False,False
14502,,,0.000000,,Humour,False,False
14503,,,0.000000,,Modernism,False,False


In [89]:
atom_idx = 0
shapley_value(xcqa, query, atom_idx=atom_idx, easy_answers=easy_answers, target_entity=target_id, qoi='rank', k=10, t_norm='min', t_conorm='max')

Coalition: [0, 0], Atom Index: 0
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Coalition: [0, 0], Contribution: 1 (before adding atom: 0, after adding atom: 1), weight: 0.5)
Coalition: [0, 1], Atom Index: 0
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Coalition: [0, 1], Contribution: -7 (before adding atom: 7, after adding atom: 0), weight: 0.5)
Shapley value for atom 0: -3.0


-3.0

In [90]:
atom_idx = 1
shapley_value(xcqa, query, atom_idx=atom_idx, easy_answers=easy_answers, target_entity=target_id, qoi='rank', k=10, t_norm='min', t_conorm='max')

Coalition: [0, 0], Atom Index: 1
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Coalition: [0, 0], Contribution: 7 (before adding atom: 0, after adding atom: 7), weight: 0.5)
Coalition: [1, 0], Atom Index: 1
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds


Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Coalition: [1, 0], Contribution: -1 (before adding atom: 1, after adding atom: 0), weight: 0.5)
Shapley value for atom 1: 3.0


3.0

### Shapley Value for 3i

In [112]:
xcqa = XCQA(symbolic=symbolic, dataset=dataset, cqd_cache=cqd_cache, logging=True)

In [113]:
query_hard

Query(type=3i, query=((117, (11,)), (117, (260,)), (11, (9,))), answer=[8417, 5130, 5279])

In [130]:
sample_query_type = '3i'
sample_idx = 3205
query_hard = query_dataset_hard.get_queries(sample_query_type)[sample_idx]
query = query_dataset.get_queries(sample_query_type)[sample_idx]
easy_answers = query.get_answer()
easy_answers = [a for a in easy_answers if a not in query_hard.get_answer()]
human_readable(query_hard, dataset)
print("-"*70)
human_readable(query, dataset)

Query:
United States Dollar	--/education/university/local_tuition./measurement_unit/dated_money_value/currency_reverse-->	V1
Bachelor of Arts	--/education/educational_degree/people_with_this_degree./education/education/institution-->	V2
Public university	--/education/educational_institution/school_type_reverse-->	V3
V1	AND	V2	AND	V3	-->	?

Answer Set (?): 
['Eastern Kentucky University', 'University of Oklahoma', 'Purdue University', 'University of Washington', 'University of New Hampshire', 'University of North Carolina at Chapel Hill', 'University of California, Santa Cruz', 'University of Florida', 'University of Nevada, Las Vegas', 'University at Buffalo, The State University of New York', 'Western Michigan University', 'University of California, San Diego', 'San Diego State University', 'California State Polytechnic University, Pomona', 'University of Colorado Boulder', 'City College of New York', 'University of Arizona', 'California State University, Sacramento']
----------------

In [131]:
cqd_result = xcqa.query_execution(query, k=10, coalition=[1, 1, 1], t_norm='prod', t_conorm='prod')
human_df = cqd_result.copy()
human_df['title'] = human_df.index.map(dataset.id2node)
human_df['title'] = human_df['title'].map(dataset.get_title_by_node)
human_df['is_easy_answer'] = human_df.index.isin(easy_answers)
human_df['is_hard_answer'] = human_df.index.isin(query_hard.get_answer())
human_df

Time taken for first level query: 0.02 seconds
Time taken for second level query: 0.02 seconds
Time taken for third level query: 0.02 seconds


Unnamed: 0,score,path,title,is_easy_answer,is_hard_answer
53,0.0,0\n1177--178-->\n0,Yale University,False,False
273,0.0,90--257-->\n0\n0,Ohio State University,True,False
415,0.0,0\n1177--178-->\n0,University of Iowa,True,False
447,0.0,0\n1177--178-->\n0,Stanford University,False,False
588,0.0,90--257-->\n0\n0,University of Southern California,False,False
...,...,...,...,...,...
14500,0.0,,"Strategic Simulations, Inc.",False,False
14501,0.0,,House of Plantagenet,False,False
14502,0.0,,Humour,False,False
14503,0.0,,Modernism,False,False


In [132]:
target = 'University of Arizona'
target_entity = dataset.get_node_by_title(target)
target_id = dataset.get_id_by_node(target_entity)
print(f"Target entity: {target} ({target_entity}) with ID {target_id}")

Target entity: University of Arizona (/m/07vyf) with ID 4029


In [133]:
atom_idx = 0
shapley_value(xcqa, query, atom_idx=atom_idx, easy_answers=easy_answers, target_entity=target_id, qoi='rank', k=10, t_norm='prod', t_conorm='prod')

Coalition: [0, 0, 0], Atom Index: 0
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.06 seconds
Time taken for third level query: 0.06 seconds
Coalition: [0, 0, 0], Contribution: 8 (before adding atom: 0, after adding atom: 8), weight: 0.3333333333333333)
Coalition: [0, 0, 1], Atom Index: 0
Time taken for first level query: 0.06 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 0.00 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 0.00 seconds
Coalition: [0, 0, 1], Contribution: -4014 (before adding atom: 4024, after adding atom: 10), weight: 0.16666666666666666)
Coalition: [0, 1, 0], Atom Index: 0
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 0.00 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 se

-2670.8333333333335

In [134]:
atom_idx = 1
shapley_value(xcqa, query, atom_idx=atom_idx, easy_answers=easy_answers, target_entity=target_id, qoi='rank', k=10, t_norm='prod', t_conorm='prod')

Coalition: [0, 0, 0], Atom Index: 1
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 0.00 seconds
Coalition: [0, 0, 0], Contribution: 4022 (before adding atom: 0, after adding atom: 4022), weight: 0.3333333333333333)
Coalition: [0, 0, 1], Atom Index: 1
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 0.00 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 0.00 seconds
Coalition: [0, 0, 1], Contribution: -3 (before adding atom: 4024, after adding atom: 4021), weight: 0.16666666666666666)
Coalition: [1, 0, 0], Atom Index: 1
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 0.00 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.

1341.6666666666665

In [135]:
atom_idx = 2
shapley_value(xcqa, query, atom_idx=atom_idx, easy_answers=easy_answers, target_entity=target_id, qoi='rank', k=10, t_norm='prod', t_conorm='prod')

Coalition: [0, 0, 0], Atom Index: 2
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 0.00 seconds
Coalition: [0, 0, 0], Contribution: 4024 (before adding atom: 0, after adding atom: 4024), weight: 0.3333333333333333)
Coalition: [0, 1, 0], Atom Index: 2
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 0.00 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 0.00 seconds
Coalition: [0, 1, 0], Contribution: -1 (before adding atom: 4022, after adding atom: 4021), weight: 0.16666666666666666)
Coalition: [1, 0, 0], Atom Index: 2
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 0.00 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.

1342.1666666666665

### Shapley Value for up

In [33]:
sample_query_type = 'up'
sample_idx = 3205
query_hard = query_dataset_hard.get_queries(sample_query_type)[sample_idx]
query = query_dataset.get_queries(sample_query_type)[sample_idx]
easy_answers = query.get_answer()
easy_answers = [a for a in easy_answers if a not in query_hard.get_answer()]
human_readable(query_hard, dataset)
print("-"*70)
human_readable(query, dataset)

Query:
Novara Calcio	--/soccer/football_team/current_roster./soccer/football_roster_position/position-->	V1
Vittorio Storaro	--/award/award_winning_work/awards_won./award/award_honor/award_winner_reverse-->	V2
V1	OR	V2	-->	V3
V3	--/media_common/netflix_genre/titles_reverse-->	?

Answer Set (?): 
['Period piece']
----------------------------------------------------------------------
Query:
Novara Calcio	--/soccer/football_team/current_roster./soccer/football_roster_position/position-->	V1
Vittorio Storaro	--/award/award_winning_work/awards_won./award/award_honor/award_winner_reverse-->	V2
V1	OR	V2	-->	V3
V3	--/media_common/netflix_genre/titles_reverse-->	?

Answer Set (?): 
['Drama', 'Epic film', 'China', 'Biography', 'Political drama', 'Period piece', 'United Kingdom']


In [34]:
xcqa = XCQA(symbolic=symbolic, dataset=dataset, cqd_cache=cqd_cache, logging=True)
cqd_result = xcqa.query_execution(query, k=10, coalition=[1, 1, 1], t_norm='prod', t_conorm='prod')
human_df = cqd_result.copy()
human_df['title'] = human_df.index.map(dataset.id2node)
human_df['title'] = human_df['title'].map(dataset.get_title_by_node)
human_df['is_easy_answer'] = human_df.index.isin(easy_answers)
human_df['is_hard_answer'] = human_df.index.isin(query_hard.get_answer())
human_df

Time taken for first level query: 0.02 seconds
Time taken for second level query: 0.02 seconds
Time taken for third level query: 0.36 seconds


Unnamed: 0,score,path,title,is_easy_answer,is_hard_answer
3248,75.674648,0\n10367--25-->2699--5-->,Political drama,True,False
4,74.963372,0\n10367--25-->2699--5-->,Drama,True,False
5245,73.496998,0\n10367--25-->7647--5-->,Miniseries,False,False
1130,72.670414,0\n10367--25-->2699--5-->,Biography,True,False
84,72.091218,0\n10367--25-->2699--5-->,Period piece,False,True
...,...,...,...,...,...
14500,0.000000,,"Strategic Simulations, Inc.",False,False
14501,0.000000,,House of Plantagenet,False,False
14502,0.000000,,Humour,False,False
14503,0.000000,,Modernism,False,False


In [35]:
target = 'Period piece'
target_entity = dataset.get_node_by_title(target)
target_id = dataset.get_id_by_node(target_entity)
print(f"Target entity: {target} ({target_entity}) with ID {target_id}")

Target entity: Period piece (/m/04xvlr) with ID 84


In [36]:
atom_idx = 0
shapley_value(xcqa, query, atom_idx=atom_idx, easy_answers=easy_answers, target_entity=target_id, qoi='rank', k=10, t_norm='prod', t_conorm='prod')

Coalition: [0, 0, 0], Atom Index: 0
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.08 seconds
Time taken for third level query: 1.49 seconds
Coalition: [0, 0, 0], Contribution: 272 (before adding atom: 0, after adding atom: 272), weight: 0.3333333333333333)
Coalition: [0, 0, 1], Atom Index: 0
Time taken for first level query: 0.07 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 0.26 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 0.01 seconds
Coalition: [0, 0, 1], Contribution: 59 (before adding atom: 0, after adding atom: 59), weight: 0.16666666666666666)
Coalition: [0, 1, 0], Atom Index: 0
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 1.05 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seco

100.33333333333331

In [37]:
atom_idx = 1
shapley_value(xcqa, query, atom_idx=atom_idx, easy_answers=easy_answers, target_entity=target_id, qoi='rank', k=10, t_norm='prod', t_conorm='prod')

Coalition: [0, 0, 0], Atom Index: 1
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 0.01 seconds
Coalition: [0, 0, 0], Contribution: 3 (before adding atom: 0, after adding atom: 3), weight: 0.3333333333333333)
Coalition: [0, 0, 1], Atom Index: 1
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 0.01 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 0.01 seconds
Coalition: [0, 0, 1], Contribution: 1 (before adding atom: 0, after adding atom: 1), weight: 0.16666666666666666)
Coalition: [1, 0, 0], Atom Index: 1
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 0.01 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Ti

-63.16666666666667

In [38]:
atom_idx = 2
shapley_value(xcqa, query, atom_idx=atom_idx, easy_answers=easy_answers, target_entity=target_id, qoi='rank', k=10, t_norm='prod', t_conorm='prod')

Coalition: [0, 0, 0], Atom Index: 2
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 0.01 seconds
Coalition: [0, 0, 0], Contribution: 0 (before adding atom: 0, after adding atom: 0), weight: 0.3333333333333333)
Coalition: [0, 1, 0], Atom Index: 2
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 0.01 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 0.01 seconds
Coalition: [0, 1, 0], Contribution: -2 (before adding atom: 3, after adding atom: 1), weight: 0.16666666666666666)
Coalition: [1, 0, 0], Atom Index: 2
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 0.01 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
T

-36.16666666666667

### Shapley Value for ip

In [45]:
sample_query_type = 'ip'
sample_idx = 700
query_hard = query_dataset_hard.get_queries(sample_query_type)[sample_idx]
query = query_dataset.get_queries(sample_query_type)[sample_idx]
easy_answers = query.get_answer()
easy_answers = [a for a in easy_answers if a not in query_hard.get_answer()]
human_readable(query_hard, dataset)
print("-"*70)
human_readable(query, dataset)

Query:
Ska	--/music/genre/parent_genre_reverse-->	V1
Dub	--/music/genre/parent_genre-->	V2
V1	AND	V2	-->	V3
V3	--/music/genre/parent_genre-->	?

Answer Set (?): 
['Reggae']
----------------------------------------------------------------------
Query:
Ska	--/music/genre/parent_genre_reverse-->	V1
Dub	--/music/genre/parent_genre-->	V2
V1	AND	V2	-->	V3
V3	--/music/genre/parent_genre-->	?

Answer Set (?): 
['Reggae', 'World music', 'Ska', 'Rocksteady']


In [46]:
xcqa = XCQA(symbolic=symbolic, dataset=dataset, cqd_cache=cqd_cache, logging=True)
cqd_result = xcqa.query_execution(query, k=10, coalition=[1, 1, 1], t_norm='prod', t_conorm='prod')
human_df = cqd_result.copy()
human_df['title'] = human_df.index.map(dataset.id2node)
human_df['title'] = human_df['title'].map(dataset.get_title_by_node)
human_df['is_easy_answer'] = human_df.index.isin(easy_answers)
human_df['is_hard_answer'] = human_df.index.isin(query_hard.get_answer())
human_df

Time taken for first level query: 0.02 seconds
Time taken for second level query: 0.02 seconds
Time taken for third level query: 0.29 seconds


Unnamed: 0,score,path,title,is_easy_answer,is_hard_answer
5825,1105.806612,5825--105-->\n11441--104-->13677--104-->,Ska,True,False
13677,690.465515,5825--105-->\n11441--104-->13677--104-->,Roots reggae,False,False
184,684.101425,5825--105-->\n11441--104-->13677--104-->,Reggae,False,True
2483,634.967023,5825--105-->\n11441--104-->13677--104-->,Ska punk,False,False
11441,623.276578,5825--105-->\n11441--104-->13677--104-->,Dub,False,False
...,...,...,...,...,...
14500,0.000000,,"Strategic Simulations, Inc.",False,False
14501,0.000000,,House of Plantagenet,False,False
14502,0.000000,,Humour,False,False
14503,0.000000,,Modernism,False,False


In [47]:
target = 'Reggae'
target_entity = dataset.get_node_by_title(target)
target_id = dataset.get_id_by_node(target_entity)
print(f"Target entity: {target} ({target_entity}) with ID {target_id}")

Target entity: Reggae (/m/06cqb) with ID 184


In [48]:
atom_idx = 0
shapley_value(xcqa, query, atom_idx=atom_idx, easy_answers=easy_answers, target_entity=target_id, qoi='rank', k=10, t_norm='prod', t_conorm='prod')

Coalition: [0, 0, 0], Atom Index: 0
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.08 seconds
Time taken for third level query: 1.34 seconds
Coalition: [0, 0, 0], Contribution: 29 (before adding atom: 0, after adding atom: 29), weight: 0.3333333333333333)
Coalition: [0, 0, 1], Atom Index: 0
Time taken for first level query: 0.08 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 0.30 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 0.01 seconds
Coalition: [0, 0, 1], Contribution: 0 (before adding atom: 1, after adding atom: 1), weight: 0.16666666666666666)
Coalition: [0, 1, 0], Atom Index: 0
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 0.91 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds


-13.666666666666666

In [49]:
atom_idx = 1
shapley_value(xcqa, query, atom_idx=atom_idx, easy_answers=easy_answers, target_entity=target_id, qoi='rank', k=10, t_norm='prod', t_conorm='prod')

Coalition: [0, 0, 0], Atom Index: 1
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 0.01 seconds
Coalition: [0, 0, 0], Contribution: 143 (before adding atom: 0, after adding atom: 143), weight: 0.3333333333333333)
Coalition: [0, 0, 1], Atom Index: 1
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 0.01 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 0.01 seconds
Coalition: [0, 0, 1], Contribution: 0 (before adding atom: 1, after adding atom: 1), weight: 0.16666666666666666)
Coalition: [1, 0, 0], Atom Index: 1
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 0.01 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 second

43.33333333333333

In [50]:
atom_idx = 2
shapley_value(xcqa, query, atom_idx=atom_idx, easy_answers=easy_answers, target_entity=target_id, qoi='rank', k=10, t_norm='prod', t_conorm='prod')

Coalition: [0, 0, 0], Atom Index: 2
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 0.01 seconds
Coalition: [0, 0, 0], Contribution: 1 (before adding atom: 0, after adding atom: 1), weight: 0.3333333333333333)
Coalition: [0, 1, 0], Atom Index: 2
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 0.01 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 0.01 seconds
Coalition: [0, 1, 0], Contribution: -142 (before adding atom: 143, after adding atom: 1), weight: 0.16666666666666666)
Coalition: [1, 0, 0], Atom Index: 2
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 0.01 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 secon

-28.666666666666668

### Shapley Value for pi

In [53]:
sample_query_type = 'pi'
sample_idx = 3205
query_hard = query_dataset_hard.get_queries(sample_query_type)[sample_idx]
query = query_dataset.get_queries(sample_query_type)[sample_idx]
easy_answers = query.get_answer()
easy_answers = [a for a in easy_answers if a not in query_hard.get_answer()]
human_readable(query_hard, dataset)
print("-"*70)
human_readable(query, dataset)

Query:
Michigan	--/base/aareas/schema/administrative_area/administrative_parent-->	V1
V1	--/people/person/nationality_reverse-->	V2
Television Hall of Fame	--/award/hall_of_fame/inductees./award/hall_of_fame_induction/inductee-->	V3
V2	AND	V3	-->	?

Answer Set (?): 
['Ed Asner', 'Fred Astaire', "Carroll O'Connor", 'Lorne Michaels', 'Bea Arthur', 'James Burrows', 'Carol Burnett', 'Jackie Gleason', 'James L. Brooks', 'Walt Disney', 'Barbara Walters', 'Betty White', 'Jack Webb']
----------------------------------------------------------------------
Query:
Michigan	--/base/aareas/schema/administrative_area/administrative_parent-->	V1
V1	--/people/person/nationality_reverse-->	V2
Television Hall of Fame	--/award/hall_of_fame/inductees./award/hall_of_fame_induction/inductee-->	V3
V2	AND	V3	-->	?

Answer Set (?): 
['Mark Goodson', 'Merv Griffin', 'Bea Arthur', 'Walt Disney', 'Angela Lansbury', 'James L. Brooks', 'Barbara Walters', 'Betty White', 'Jack Webb', 'Art Carney', 'Katie Couric', 'Car

In [54]:
xcqa = XCQA(symbolic=symbolic, dataset=dataset, cqd_cache=cqd_cache, logging=True)
cqd_result = xcqa.query_execution(query, k=10, coalition=[1, 1, 1], t_norm='prod', t_conorm='prod')
human_df = cqd_result.copy()
human_df['title'] = human_df.index.map(dataset.id2node)
human_df['title'] = human_df['title'].map(dataset.get_title_by_node)
human_df['is_easy_answer'] = human_df.index.isin(easy_answers)
human_df['is_hard_answer'] = human_df.index.isin(query_hard.get_answer())
human_df

Time taken for first level query: 0.02 seconds
Time taken for second level query: 0.20 seconds
Time taken for third level query: 0.02 seconds


Unnamed: 0,score,path,title,is_easy_answer,is_hard_answer
16,0.0,1695--124-->32--97-->\n0\n,Danny DeVito,False,False
48,0.0,1695--124-->1695--97-->\n0\n,Bob Seger,False,False
464,0.0,1695--124-->637--97-->\n0\n,Claude Debussy,False,False
724,0.0,1695--124-->75--97-->\n0\n,Ricky Martin,False,False
775,0.0,1695--124-->90--97-->\n0\n,Gillian Anderson,False,False
...,...,...,...,...,...
14500,0.0,,"Strategic Simulations, Inc.",False,False
14501,0.0,,House of Plantagenet,False,False
14502,0.0,,Humour,False,False
14503,0.0,,Modernism,False,False


In [55]:
target = 'Walt Disney'
target_entity = dataset.get_node_by_title(target)
target_id = dataset.get_id_by_node(target_entity)
print(f"Target entity: {target} ({target_entity}) with ID {target_id}")

Target entity: Walt Disney (/m/081nh) with ID 2197


In [56]:
atom_idx = 0
shapley_value(xcqa, query, atom_idx=atom_idx, easy_answers=easy_answers, target_entity=target_id, qoi='rank', k=10, t_norm='prod', t_conorm='prod')

Coalition: [0, 0, 0], Atom Index: 0
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.71 seconds
Time taken for third level query: 0.07 seconds
Coalition: [0, 0, 0], Contribution: 2272 (before adding atom: 0, after adding atom: 2272), weight: 0.3333333333333333)
Coalition: [0, 0, 1], Atom Index: 0
Time taken for first level query: 0.07 seconds
Time taken for second level query: 0.67 seconds
Time taken for third level query: 0.00 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 0.00 seconds
Coalition: [0, 0, 1], Contribution: -9 (before adding atom: 2281, after adding atom: 2272), weight: 0.16666666666666666)
Coalition: [0, 1, 0], Atom Index: 0
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.17 seconds
Time taken for third level query: 0.00 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.

748.6666666666666

In [57]:
atom_idx = 1
shapley_value(xcqa, query, atom_idx=atom_idx, easy_answers=easy_answers, target_entity=target_id, qoi='rank', k=10, t_norm='prod', t_conorm='prod')

Coalition: [0, 0, 0], Atom Index: 1
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 0.00 seconds
Coalition: [0, 0, 0], Contribution: 33 (before adding atom: 0, after adding atom: 33), weight: 0.3333333333333333)
Coalition: [0, 0, 1], Atom Index: 1
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 0.00 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 0.00 seconds
Coalition: [0, 0, 1], Contribution: -2249 (before adding atom: 2281, after adding atom: 32), weight: 0.16666666666666666)
Coalition: [1, 0, 0], Atom Index: 1
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 0.00 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 

-1490.8333333333333

In [58]:
atom_idx = 2
shapley_value(xcqa, query, atom_idx=atom_idx, easy_answers=easy_answers, target_entity=target_id, qoi='rank', k=10, t_norm='prod', t_conorm='prod')

Coalition: [0, 0, 0], Atom Index: 2
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 0.00 seconds
Coalition: [0, 0, 0], Contribution: 2281 (before adding atom: 0, after adding atom: 2281), weight: 0.3333333333333333)
Coalition: [0, 1, 0], Atom Index: 2
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 0.00 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 0.00 seconds
Coalition: [0, 1, 0], Contribution: -1 (before adding atom: 33, after adding atom: 32), weight: 0.16666666666666666)
Coalition: [1, 0, 0], Atom Index: 2
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 seconds
Time taken for third level query: 0.00 seconds
Time taken for first level query: 0.00 seconds
Time taken for second level query: 0.00 s

751.1666666666666