Skip to content

Commit 77d73d2

Browse files
committed
fixes #65
1 parent 7b5b525 commit 77d73d2

File tree

6 files changed

+225
-69
lines changed

6 files changed

+225
-69
lines changed

snapquery/github_access.py

Lines changed: 80 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,28 +4,66 @@
44
Created on 2025-12-02
55
@author: wf
66
"""
7+
import json
8+
from pathlib import Path
9+
from typing import List, Dict, Any, Optional, Union
10+
711
import requests
812

13+
914
class GitHub:
1015
"""
1116
A simple GitHub API client for accessing repository contents.
1217
"""
1318

14-
def __init__(self, owner: str, repo: str, token: str = None):
19+
def __init__(self, owner: str, repo: str, token: Optional[str] = None, session: Optional[requests.Session] = None):
1520
"""
1621
Initialize GitHub client.
1722
1823
Args:
1924
owner: Repository owner (username or organization)
2025
repo: Repository name
2126
token: Optional GitHub API token for authentication
27+
session: Optional custom requests.Session
2228
"""
2329
self.owner = owner
2430
self.repo = repo
2531
self.token = token
2632
self.base_url = f"https://api.github.com/repos/{owner}/{repo}"
33+
# Use provided token or read from file (compatible with GitHubApi)
34+
self.token = token if token is not None else self._read_token()
35+
36+
# Use custom session or create new one
37+
self.session = session or requests.Session()
38+
39+
40+
41+
def _headers(self) -> Dict[str, str]:
42+
headers = {"Accept": "application/vnd.github.v3+json"}
43+
if self.token:
44+
headers["Authorization"] = f"token {self.token}"
45+
return headers
46+
47+
def _read_token(self) -> Optional[str]:
48+
"""
49+
Read GitHub token from ~/.github/access_token.json
50+
(compatible with GitHubApi token storage format).
51+
52+
Returns:
53+
GitHub token or None if not found
54+
"""
55+
token_path = Path.home() / ".github" / "access_token.json"
56+
if token_path.exists():
57+
try:
58+
with open(token_path, 'r') as f:
59+
data = json.load(f)
60+
return data.get("access_token")
61+
except (json.JSONDecodeError, OSError):
62+
pass
63+
return None
2764

28-
def get_contents(self, path: str = ""):
65+
66+
def get_contents(self, path: str = "") -> Union[List[Dict[str, Any]], Dict[str, Any]]:
2967
"""
3068
Get contents of a directory or file from the repository.
3169
@@ -36,11 +74,46 @@ def get_contents(self, path: str = ""):
3674
List of dictionaries for directories, or content for files
3775
"""
3876
url = f"{self.base_url}/contents/{path}"
39-
headers = {"Accept": "application/vnd.github.v3+json"}
77+
response = self.session.get(url, headers=self._headers(), timeout=30)
78+
response.raise_for_status()
79+
return response.json()
4080

41-
if self.token:
42-
headers["Authorization"] = f"token {self.token}"
81+
def list_files_recursive(self, path: str = "", suffix: Optional[str] = None) -> List[Dict[str, Any]]:
82+
"""
83+
Recursively list files under a given path. Optionally filter by file suffix.
84+
85+
Args:
86+
path: starting path within the repository
87+
suffix: optional filename suffix filter, e.g., ".ttl"
88+
89+
Returns:
90+
A flat list of GitHub content item dicts for files.
91+
"""
92+
items = self.get_contents(path)
93+
if isinstance(items, dict):
94+
items = [items]
95+
96+
files: List[Dict[str, Any]] = []
97+
for item in items:
98+
item_type = item.get("type")
99+
item_path = item.get("path", "")
100+
if item_type == "file":
101+
if suffix is None or item_path.endswith(suffix):
102+
files.append(item)
103+
elif item_type == "dir":
104+
files.extend(self.list_files_recursive(item_path, suffix=suffix))
105+
return files
106+
107+
def download(self, download_url: str) -> str:
108+
"""
109+
Download raw file content via a download_url.
43110
44-
response = requests.get(url, headers=headers)
111+
Args:
112+
download_url: The GitHub-provided raw download URL
113+
114+
Returns:
115+
The text content of the file
116+
"""
117+
response = self.session.get(download_url, headers=self._headers(), timeout=30)
45118
response.raise_for_status()
46-
return response.json()
119+
return response.text

snapquery/samples/endpoints.yaml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,31 @@ endpoints:
7676
- blazegraph
7777
- commons
7878

79+
wikidata-qlever:
80+
description: QLever Freiburg Wikidata.
81+
lang: sparql
82+
method: POST
83+
database: qlever
84+
endpoint: https://qlever.dev/api/wikidata
85+
website: https://qlever.dev/wikidata
86+
calls_per_minute: 60
87+
prefix_sets:
88+
- rdf
89+
- wikidata
90+
- commons
91+
92+
wikidata-qlever-dbis:
93+
description: QLever RWTH Wikidata.
94+
lang: sparql
95+
method: POST
96+
database: qlever
97+
endpoint: https://qlever-api.wikidata.dbis.rwth-aachen.de/
98+
website: https://qlever.wikidata.dbis.rwth-aachen.de/wikidata
99+
calls_per_minute: 60
100+
prefix_sets:
101+
- rdf
102+
- wikidata
103+
79104
dblp:
80105
description: DBLP official (qlever - reliable).
81106
lang: sparql

snapquery/samples/endpoints_optional.yaml

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -26,30 +26,6 @@ endpoints:
2626
- rdf
2727
- wikidata
2828

29-
wikidata-qlever:
30-
description: QLever Freiburg Wikidata.
31-
lang: sparql
32-
method: POST
33-
database: qlever
34-
endpoint: https://qlever.cs.uni-freiburg.de/api/wikidata
35-
website: https://qlever.cs.uni-freiburg.de/wikidata
36-
calls_per_minute: 60
37-
prefix_sets:
38-
- rdf
39-
- wikidata
40-
- commons
41-
42-
wikidata-qlever-dbis:
43-
description: QLever RWTH Wikidata.
44-
lang: sparql
45-
method: POST
46-
database: qlever
47-
endpoint: https://qlever-api.wikidata.dbis.rwth-aachen.de/
48-
website: https://qlever.wikidata.dbis.rwth-aachen.de/wikidata
49-
calls_per_minute: 60
50-
prefix_sets:
51-
- rdf
52-
- wikidata
5329

5430
wikidata-scatter:
5531
description: Scatter experimental.

snapquery/snapquery_cmd.py

Lines changed: 59 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
44
@author: wf
55
"""
6-
6+
from tqdm import tqdm
77
import logging
88
import sys
99
from argparse import ArgumentParser
@@ -87,6 +87,12 @@ def getArgParser(self, description: str, version_msg) -> ArgumentParser:
8787
action=StoreDictKeyPair,
8888
help="query parameters as Key-value pairs in the format key1=value1,key2=value2",
8989
)
90+
parser.add_argument(
91+
"--progress",
92+
action="store_true",
93+
help="show progress bars when testing queries (--testQueries)",
94+
)
95+
9096
parser.add_argument(
9197
"--domain",
9298
type=str,
@@ -144,6 +150,56 @@ def cmd_parse(self, argv: Optional[list] = None):
144150
self.args.func(self.args)
145151
return self.args
146152

153+
def handle_test_queries(self):
154+
"""
155+
Handle the --testQueries option by executing queries against endpoints.
156+
The endpoint is the outer loop, queries are the inner loop.
157+
"""
158+
# Determine which endpoints to use
159+
if self.args.endpointName:
160+
endpoint_names = [self.args.endpointName]
161+
else:
162+
endpoint_names = list(self.nqm.endpoints.keys())
163+
164+
# Get all queries to test
165+
queries = self.nqm.get_all_queries(domain=self.args.domain, namespace=self.args.namespace)
166+
167+
# Create execution instance
168+
execution = Execution(self.nqm, debug=self.args.debug)
169+
170+
# Outer loop: endpoints
171+
endpoint_iter = tqdm(endpoint_names, desc="Testing endpoints") if self.args.progress else endpoint_names
172+
for endpoint_name in endpoint_iter:
173+
# Inner loop: queries
174+
query_iter = tqdm(queries, desc=f"Queries for {endpoint_name}", leave=False) if self.args.progress else queries
175+
for i, nq in enumerate(query_iter, start=1):
176+
execution.execute(
177+
nq,
178+
endpoint_name=endpoint_name,
179+
context=self.args.context,
180+
title=f"{endpoint_name}::query {i:3}/{len(queries)}",
181+
prefix_merger=QueryPrefixMerger.get_by_name(self.args.prefix_merger),
182+
)
183+
184+
def handle_test_queries_no_progress_version(self):
185+
if self.args.endpointName:
186+
endpoint_names = [self.args.endpointName]
187+
else:
188+
endpoint_names = list(self.nqm.endpoints.keys())
189+
queries = self.nqm.get_all_queries(domain=self.args.domain, namespace=self.args.namespace)
190+
execution = Execution(self.nqm, debug=self.args.debug)
191+
query_iter = tqdm(queries, desc="Testing queries") if self.args.progress else queries
192+
for i, nq in enumerate(query_iter, start=1):
193+
for endpoint_name in endpoint_names:
194+
execution.execute(
195+
nq,
196+
endpoint_name=endpoint_name,
197+
context=self.args.context,
198+
title=f"query {i:3}/{len(queries)}::{endpoint_name}",
199+
prefix_merger=QueryPrefixMerger.get_by_name(self.args.prefix_merger),
200+
)
201+
202+
147203
def handle_args(self, args) -> bool:
148204
"""
149205
handle the command line args
@@ -174,21 +230,8 @@ def handle_args(self, args) -> bool:
174230
print(f"{namespace}:{count}")
175231
handled = True
176232
elif self.args.testQueries:
177-
if self.args.endpointName:
178-
endpoint_names = [self.args.endpointName]
179-
else:
180-
endpoint_names = list(nqm.endpoints.keys())
181-
queries = self.nqm.get_all_queries(domain=self.args.domain, namespace=self.args.namespace)
182-
execution = Execution(self.nqm, debug=self.args.debug)
183-
for i, nq in enumerate(queries, start=1):
184-
for endpoint_name in endpoint_names:
185-
execution.execute(
186-
nq,
187-
endpoint_name=endpoint_name,
188-
context=self.args.context,
189-
title=f"query {i:3}/{len(queries)}::{endpoint_name}",
190-
prefix_merger=QueryPrefixMerger.get_by_name(self.args.prefix_merger),
191-
)
233+
self.handle_test_queries()
234+
handled = True
192235
elif self.args.queryName is not None or self.args.query_id is not None:
193236
if self.args.query_id is not None:
194237
query_name = QueryName.from_query_id(self.args.query_id)

snapquery/snapquery_core.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -728,7 +728,7 @@ def from_samples(
728728
for source_class, pk in [
729729
(NamedQuery, "query_id"),
730730
(QueryStats, "stats_id"),
731-
(QueryDetails, "quer_id"),
731+
(QueryDetails, "query_id"),
732732
]:
733733
# Fetch sample records from the specified class
734734
sample_records = cls.get_sample_records(source_class=source_class)

tests/test_sib_examples.py

Lines changed: 60 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,74 @@
11
"""
2+
Test SIB SPARQL Examples fetching.
3+
Verifies integration between SIB fetcher and Snapquery Core.
24
3-
SIB Swiss Institute of Bioinformatics
4-
5-
sparql examples
6-
7-
@author wf
5+
Created on 2025-12-02
6+
@author: wf
87
"""
8+
import os
99
import unittest
10-
1110
from basemkit.basetest import Basetest
12-
from rdflib import Graph
13-
11+
from snapquery.snapquery_core import NamedQueryManager
12+
from snapquery.sib_sparql_examples import SibSparqlExamples
1413

1514
class TestSibExamples(Basetest):
1615
"""
17-
Test for Issue #59
18-
https://github.com/WolfgangFahl/snapquery/issues/59
19-
https://github.com/sib-swiss/sparql-examples/
20-
21-
Snap query should consider using the sparql-examples style of encoding queries as their own entities as the basis of it's data interchange
16+
Test retrieving SIB SPARQL examples using GitHub cache/api.
2217
"""
2318

24-
def setUp(self, debug=False, profile=True):
19+
def setUp(self, debug=True, profile=True):
2520
Basetest.setUp(self, debug=debug, profile=profile)
2621

27-
28-
@unittest.skip("needs github clone to work -postpone")
29-
def testBgee(self):
22+
@unittest.skipIf(Basetest.inPublicCI(), "avoid github rate limit in CI")
23+
def test_sib_examples_fetch_and_store(self):
3024
"""
31-
test a single example
25+
Test retrieving SIB examples, populating the DB, and exporting to YAML.
3226
"""
33-
g= Graph().parse("examples/Bgee/001.ttl", format="turtle")
34-
query = g.value(None, g.namespace("sh")["select"]) # Gets sh:select literal
35-
print(query) # Full SPARQL text
27+
db_path = "/tmp/sib_examples.db"
28+
yaml_path = "/tmp/sib_examples.yaml"
29+
30+
if os.path.exists(db_path):
31+
os.remove(db_path)
32+
if os.path.exists(yaml_path):
33+
os.remove(yaml_path)
34+
35+
nqm = NamedQueryManager.from_samples(db_path=db_path)
36+
sib_fetcher = SibSparqlExamples(nqm, debug=self.debug)
37+
38+
# Limit for testing efficiency
39+
limit = 7 # if self.inPublicCI() else None
40+
if self.debug:
41+
print(f"Fetching SIB examples (limit={limit})...")
42+
43+
loaded_queries = sib_fetcher.extract_queries(limit=limit, debug_print=self.debug)
44+
45+
self.assertTrue(len(loaded_queries) > 0, "Should have loaded at least one query")
46+
self.assertEqual(len(sib_fetcher.named_query_set), len(loaded_queries))
47+
48+
# Verify SQL Database Storage
49+
records = nqm.sql_db.query(
50+
"""
51+
SELECT count(*) as count
52+
FROM NamedQuery
53+
WHERE namespace=? AND domain=?
54+
""",
55+
(sib_fetcher.named_query_set.namespace, sib_fetcher.named_query_set.domain)
56+
)
57+
db_count = records[0]['count']
58+
self.assertEqual(db_count, len(loaded_queries), "DB count should match loaded count")
59+
60+
# Verify YAML Export
61+
if self.debug:
62+
print(f"Exporting to {yaml_path}...")
63+
sib_fetcher.save_to_yaml(yaml_path)
64+
65+
self.assertTrue(os.path.exists(yaml_path))
66+
67+
# Optional: Read back to verify YamlAble structure
68+
with open(yaml_path, 'r') as f:
69+
content = f.read()
70+
self.assertIn("sib-examples", content)
71+
self.assertIn("queries:", content)
72+
73+
if self.debug:
74+
print(f"Successfully processed {db_count} queries.")

0 commit comments

Comments
 (0)