Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Semantic tag with wikidata api #468

Merged
merged 7 commits into from
Nov 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 2 additions & 1 deletion project/backend/api/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -563,4 +563,5 @@ def test_update_review_request(self):
url = reverse('update_req')
response = self.client.put(url, {'id': self.request.id, 'status': 'R'}, format='json')
self.assertEqual(response.status_code, 200)
self.assertEqual(response.data['status'], 'R')
self.assertEqual(response.data['status'], 'R')

113 changes: 113 additions & 0 deletions project/backend/api/wikidata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import requests
import json

def search_entity(keyword):
url = "https://www.wikidata.org/w/api.php"
params = {
"action" : "wbsearchentities",
"language" : "en",
"format" : "json",
"search" : keyword
}

response = requests.get(url,params=params)
my_json = response.content.decode('utf8').replace("'", '"')
dic = json.loads(my_json)

results = []
for item in dic.get("search"):
block = {
"id": item.get("id"),
"label": item.get("display").get("label").get("value"),
"description": item.get("display").get("description").get("value")
}

results.append(block)

return results

def get_parent_ids(entity_id):
url = "https://query.wikidata.org/sparql"

query = """
SELECT DISTINCT ?itemId
WHERE {
{
wd:""" + entity_id + """ wdt:P31 ?instanceOfEntity.
} UNION {
wd:""" + entity_id + """ wdt:P279 ?instanceOfEntity.
}
BIND(wikibase:decodeUri(REPLACE(STR(?instanceOfEntity), ".*Q", "Q")) AS ?itemId)
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
"""

params = {
"format" : "json",
"query" : query
}

response = requests.get(url, params=params)
my_json = response.content.decode('utf8').replace("'", '"')
dic = json.loads(my_json)

idlist = []

for item in dic.get("results").get("bindings"):
idlist.append(item.get("itemId").get("value"))

return idlist

def get_children_ids(entity_id_list):
url = "https://query.wikidata.org/sparql"

head = """
SELECT DISTINCT ?itemId
WHERE {
"""

body = """
{
?item wdt:P31 wd:""" + entity_id_list[0] + """
}
UNION
{
?item wdt:P279 wd:""" + entity_id_list[0] + """
}
"""

for id in entity_id_list[1 : -1]:
block = """
UNION
{
?item wdt:P31 wd:""" + id + """
}
UNION
{
?item wdt:P279 wd:""" + id + """
}
"""

tail = """
BIND(wikibase:decodeUri(REPLACE(STR(?item), ".*Q", "Q")) AS ?itemId)
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
"""

query = head + body + tail

params = {
"format" : "json",
"query" : query
}

response = requests.get(url, params=params)
my_json = response.content.decode('utf8').replace("'", '"')
dic = json.loads(my_json)

idlist = []

for item in dic.get("results").get("bindings"):
idlist.append(item.get("itemId").get("value"))

return idlist
1 change: 0 additions & 1 deletion project/backend/database/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
admin.site.register(Node)
admin.site.register(Proof)
admin.site.register(Question)
admin.site.register(WikiTag)
admin.site.register(Annotation)
admin.site.register(Request)
admin.site.register(ReviewRequest)
Expand Down
64 changes: 34 additions & 30 deletions project/backend/database/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,50 +4,55 @@
import copy
from datetime import datetime
import enum
from api.wikidata import *


class SemanticTag(models.Model):
created_at = models.DateTimeField(auto_now_add=True)
label = models.CharField(max_length=50, unique=True)
desc = models.CharField(max_length=100)
parent_tag = models.ForeignKey("SemanticTag", on_delete=models.CASCADE, null=True, blank=True,
related_name="sub_tags")
wid = models.CharField(max_length=20)
label = models.CharField(max_length=30, unique=True)

@property
def nodes(self):
return Node.objects.filter(semantic_tags__wid=self.wid)

@property
def count(self):
return self.node_set.all().count()

return self.nodes.count()
@property
def nodes(self):
return self.node_set.all()
def related_nodes(self):
parent_wids = get_parent_ids(self.wid)
sibling_wids = get_children_ids(parent_wids)

@property
def recursive_nodes(self):
nodes = list(self.nodes)
if self.wid in sibling_wids:
sibling_wids.remove(self.wid)

for sub in self.sub_tags.all():
nodes.extend(sub.recursive_nodes)
children_wids = get_children_ids([self.wid])
combined = sibling_wids + parent_wids + children_wids

return nodes
return Node.objects.filter(semantic_tags__wid__in=combined)

@property
def recursive_count(self):
return len(self.recursive_nodes)
def related_count(self):
return self.related_nodes.count()

@classmethod
def existing_search_results(cls, keyword):
wiki_results = search_entity(keyword)

class Meta:
constraints = [
models.UniqueConstraint(fields=['label', 'parent_tag'],
name='semantictag_label_parenttag_unique_constraint')
]
existings = []

for item in wiki_results:
node_count = Node.objects.filter(semantic_tags__wid=item["id"]).count()
if node_count > 0:
existings.append(item)

return existings

def __str__(self):
return self.label + " - " + self.wid

class WikiTag(models.Model):
pass
class Request(models.Model):
"""
This class definition is written beforehand (to be implemented afterwards)
in order to be referred from other classes. e.g. ReviewRequest
"""
pass
class Entry(models.Model):
entry_id = models.AutoField(primary_key=True)
entry_index = models.IntegerField()
Expand Down Expand Up @@ -200,7 +205,6 @@ class Node(models.Model):
# Nodes also have to_referenced_nodes list to access the nodes this node references
# Nodes also have a 'proofs' list which can be accessed as Node.proofs.all()
semantic_tags = models.ManyToManyField(SemanticTag)
wiki_tags = models.ManyToManyField(WikiTag)
annotations = models.ManyToManyField(Annotation)
is_valid = models.BooleanField()
num_visits = models.IntegerField()
Expand Down
2 changes: 1 addition & 1 deletion project/backend/database/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ class NodeSerializer(serializers.ModelSerializer):
class Meta:
model = Node
fields = ['node_id', 'node_title', 'publish_date', 'is_valid', 'num_visits' , 'theorem', 'contributors',
'reviewers', 'from_referenced_nodes' , 'to_referenced_nodes', 'proofs' , 'question_set', 'semantic_tags', 'wiki_tags', 'annotations']
'reviewers', 'from_referenced_nodes' , 'to_referenced_nodes', 'proofs' , 'question_set', 'semantic_tags', 'annotations']

class RequestSerializer(serializers.ModelSerializer):
class Meta:
Expand Down
77 changes: 77 additions & 0 deletions project/backend/database/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -613,3 +613,80 @@ def test_reviewer_serializer_fields(self):
)
self.assertEqual(set(serializer.data.keys()), expected_fields)


class SemanticTagModelTestCase(TestCase):
def setUp(self):
self.algo_tag = SemanticTag.objects.create(
wid="Q8366",
label="Algorithm"
)
self.search_tag = SemanticTag.objects.create(
wid="Q755673",
label="Search algorithm"
)
self.sort_tag = SemanticTag.objects.create(
wid="Q181593",
label="Sorting Agorithm"
)
self.combinational_tag = SemanticTag.objects.create(
wid="Q41883552",
label="Combinational Algorithm"
)

self.algo_node = Node.objects.create( #parent
node_title="Algorithm Node",
publish_date="2023-01-01",
is_valid=True,
num_visits=0,
)
self.algo_node.semantic_tags.add(self.algo_tag)

self.search_node = Node.objects.create(
node_title="Search Algorithm Node",
publish_date="2023-01-01",
is_valid=True,
num_visits=0,
)
self.search_node.semantic_tags.add(self.search_tag)

self.sort_node = Node.objects.create(
node_title="Sorting algorithm Node",
publish_date="2023-01-01",
is_valid=True,
num_visits=0,
)
self.sort_node.semantic_tags.add(self.sort_tag)

self.combinational_node = Node.objects.create(
node_title="Combinational Algorithm Node",
publish_date="2023-01-01",
is_valid=True,
num_visits=0,
)
self.combinational_node.semantic_tags.add(self.combinational_tag)

def tearDown(self):
Node.objects.all().delete()
SemanticTag.objects.all().delete()
print("All tests for the Semantic Tag Model are completed!")

def test_search(self):
search_res = SemanticTag.existing_search_results("sorting algorithm")
l = len(search_res)
self.assertEqual(1, l, "Search result length is wrong!")

if l:
self.assertEqual(self.sort_tag.wid, search_res[0]["id"], "Search result id mismatch!")

def test_nodes(self):
self.assertEqual(self.sort_tag.count, 1, "Sort tag node count mismatch!")
if self.sort_tag.count:
self.assertEqual(self.sort_tag.nodes[0].pk, self.sort_node.pk, "Sort nodes mismatch!")

self.assertEqual(self.combinational_tag.related_count, 3, "Combinational tag related count mismatch!")
if self.combinational_tag.related_count:
r_nodes = self.combinational_tag.related_nodes
self.assertIn(self.algo_node, r_nodes, "Algorithm node not in related nodes of combintaional semantic tag!")
self.assertIn(self.search_node, r_nodes, "Search node not in related nodes of combintaional semantic tag!")
self.assertIn(self.sort_node, r_nodes, "Sort node not in related nodes of combintaional semantic tag!")

3 changes: 2 additions & 1 deletion project/backend/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@ Django==4.2.6
djangorestframework==3.14.0
psycopg==3.1.12
psycopg-binary==3.1.12
django-cors-headers==4.3.0
django-cors-headers==4.3.0
requests