In [15]:
import json
import re

# Define 20 functions for 20 templates
def convert_template_1(sparql_query):
    pattern = r'ASK WHERE \{ wd:(Q\d+) wdt:(P\d+) wd:(Q\d+) \. wd:\1 wdt:\2 wd:(Q\d+)(?:.) \}'
    match = re.match(pattern, sparql_query)
    if match:
        entity, predicate, obj1, obj2 = match.groups()
        return f"( ASK ( AND ( {entity} {predicate} {obj1} ) ( {entity} {predicate} {obj2} ) ) )"
    return None

def convert_template_2(sparql_query):
    pattern = r'select \?ent where \{ \?ent wdt:(P\d+) wd:(Q\d+) . \?ent wdt:(P\d+) \?obj \} ORDER BY DESC\(\?obj\)LIMIT 5'
    match = re.match(pattern, sparql_query)
    if match:
        rel1, type_entity, rel2 = match.groups()
        return f"( MAX ( JOIN ( R {rel2} ) ( JOIN {rel1} {type_entity} ) ) )"
    return None

def convert_template_3(sparql_query):
    pattern = r'SELECT \?answer WHERE \{ wd:(Q\d+) wdt:(P\d+) \?X . \?X wdt:(P\d+) \?answer\}'
    match = re.match(pattern, sparql_query)
    if match:
        entity, rel1, rel2 = match.groups()
        return f"( JOIN ( R {rel2} ) ( JOIN ( R {rel1} ) {entity} ) )"
    return None

def convert_template_4(sparql_query):
    pattern = r' select distinct \?obj where \{ wd:(Q\d+) wdt:(P\d+) \?obj . \?obj wdt:(P\d+) wd:(Q\d+) \}'
    match = re.match(pattern, sparql_query)
    if match:
        en1, rel1, rel2, en2 = match.groups()
        return f"( AND ( JOIN ( R {rel1} ) {en1} ) ( JOIN {rel2} {en2} ) )"
    return None

def convert_template_5(sparql_query):
    pattern = r'select \?ent where \{ \?ent wdt:(P\d+) wd:(Q\d+) . \?ent wdt:(P\d+) \?obj . \?ent wdt:(P\d+) wd:(Q\d+)\.?\ ?\} ORDER BY ASC\(\?obj\)LIMIT 5'
    match = re.match(pattern, sparql_query)
    if match:
        rel1, obj1, rel, rel2, obj2 = match.groups()
        return f"( MIN ( JOIN ( R {rel} ) ( AND ( JOIN {rel1} {obj1} ) ( JOIN {rel2} {obj2} ) ) ) )"
    return None

def convert_template_6(sparql_query):
    pattern = r'select \?ent where \{ \?ent wdt:(P\d+) wd:(Q\d+) . \?ent wdt:(P\d+) \?obj . \?ent wdt:(P\d+) wd:(Q\d+)\ ?\} ORDER BY DESC\(\?obj\)LIMIT 5'
    match = re.match(pattern, sparql_query)
    if match:
        rel1, obj1, rel, rel2, obj2 = match.groups()
        return f"( MAX ( JOIN ( R {rel} ) ( AND ( JOIN {rel1} {obj1} ) ( JOIN {rel2} {obj2} ) ) ) )"
    return None

def convert_template_7(sparql_query):
    pattern = r'select distinct \?answer where \{ \?answer wdt:(P\d+) wd:(Q\d+)\}'
    match = re.match(pattern, sparql_query)
    if match:
        predicate, obj = match.groups()
        return f"( JOIN {predicate} {obj} )"
    return None

def convert_template_8(sparql_query):
    pattern = r'SELECT \?obj WHERE \{ wd:(Q\d+) p:(P\d+) \?s . \?s ps:\2 \?obj . \?s pq:(P\d+) wd:(Q\d+) \}'
    match = re.match(pattern, sparql_query)
    if match:
        entity, pred, qualifier, qual_obj = match.groups()
        return f"( JOIN ( R {pred} ) ( AND ( JOIN ( R {pred} ) {entity} ) ( JOIN {qualifier} {qual_obj} ) ) )"
    return None

def convert_template_9(sparql_query):
    pattern = r'SELECT \?value WHERE \{ wd:(Q\d+) p:(P\d+) \?s . \?s ps:\2 wd:(Q\d+) . \?s pq:(P\d+) \?value\}'
    match = re.match(pattern, sparql_query)
    if match:
        entity, pred, obj, qualifier = match.groups()
        return f"( JOIN ( R {qualifier} ) ( AND ( JOIN ( R {pred} ) {entity} ) ( JOIN {pred} {obj} ) ) )"
    return None

def convert_template_10(sparql_query):
    pattern = r' select distinct \?sbj where \{ \?sbj wdt:(P\d+) wd:(Q\d+) . \?sbj wdt:(P\d+) wd:(Q\d+) \}'
    match = re.match(pattern, sparql_query)
    if match:
        pred, obj,p2, type_obj = match.groups()
        return f"( AND ( JOIN {pred} {obj} ) ( JOIN {p2} {type_obj} ) )"
    return None



In [16]:
# Define functions for specific templates
def convert_template_11(sparql_query):
    pattern = r'SELECT \?answer WHERE \{ wd:(Q\d+) wdt:(P\d+) \?answer . \?answer wdt:(P\d+) wd:(Q\d+)\}'
    match = re.match(pattern, sparql_query)
    if match:
        entity, rel1, rel2, obj = match.groups()
        return f"( AND ( JOIN ( R {rel1} ) {entity} ) ( JOIN {rel2} {obj} ) )"
    return None

def convert_template_12(sparql_query):
    pattern = r'ASK WHERE \{ wd:(Q\d+) wdt:(P\d+) \?obj filter\(\?obj\s*(=|>=|<=|>|<)\s*(\d+(\.\d+)?)\) \}'
    match = re.match(pattern, sparql_query)
    if match:
        entity, predicate, operator, value, _ = match.groups()
        return f"( ASK ( {operator} ( JOIN ( R {predicate} ) {entity} ) {value} ) )"
    return None

def convert_template_13(sparql_query):
    pattern = r'SELECT \?ans_1 \?ans_2 WHERE \{ wd:(Q\d+) wdt:(P\d+) \?ans_1 . wd:\1 wdt:(P\d+) \?ans_2 \}'
    match = re.match(pattern, sparql_query)
    if match:
        entity, pred1, pred2 = match.groups()
        return f"( JOIN ( R {pred1} ) {entity} ) ( JOIN ( R {pred2} ) {entity} )"
    return None

def convert_template_14(sparql_query):
    pattern = r'SELECT \(COUNT\(\?sub\) AS \?value \) \{ \?sub wdt:(P\d+) wd:(Q\d+) \}'
    match = re.match(pattern, sparql_query)
    if match:
        predicate, obj = match.groups()
        return f"( COUNT ( JOIN {predicate} {obj} ) )"
    return None

def convert_template_15(sparql_query):
    pattern = r'SELECT \(COUNT\(\?obj\) AS \?value \) \{ wd:(Q\d+) wdt:(P\d+) \?obj \}'
    match = re.match(pattern, sparql_query)
    if match:
        entity, predicate = match.groups()
        return f"( COUNT ( JOIN ( R {predicate} ) {entity} ) )"
    return None


In [17]:
convert_template_11("SELECT ?answer WHERE { wd:Q113904 wdt:P26 ?answer . ?answer wdt:P40 wd:Q2841495}")

'( AND ( JOIN ( R P26 ) Q113904 ) ( JOIN P40 Q2841495 ) )'

In [18]:
convert_template_1("ASK WHERE { wd:Q174843 wdt:P106 wd:Q1804811 . wd:Q174843 wdt:P106 wd:Q33231 }")

'( ASK ( AND ( Q174843 P106 Q1804811 ) ( Q174843 P106 Q3323 ) ) )'

In [19]:
def convert_template_16(sparql_query):
    pattern = r'select distinct \?answer where \{ wd:(Q\d+) wdt:(P\d+) \?answer\}'
    match = re.match(pattern, sparql_query)
    if match:
        entity, predicate = match.groups()
        return f"( JOIN ( R {predicate} ) {entity} )"
    return None

def convert_template_17(sparql_query):
    pattern = r"SELECT DISTINCT \?sbj \?sbj_label WHERE \{ \?sbj wdt:(P\d+) wd:(Q\d+) . \?sbj wdt:(P\d+) wd:(Q\d+) . .*? FILTER\(STRSTARTS\(lcase\(\?sbj_label\), '(.*?)'\)\)"
    match = re.match(pattern, sparql_query)
    if match:
        pred1, obj1, pred2, obj2, char = match.groups()
        return f"( CHAR ( AND ( JOIN {pred1} {obj1} ) ( JOIN {pred2} {obj2} ) ) '{char}' )"
    return None

def convert_template_18(sparql_query):
    pattern = r'ASK WHERE \{ wd:(Q\d+) wdt:(P\d+) wd:(Q\d+) \}'
    match = re.match(pattern, sparql_query)
    if match:
        entity, predicate, obj = match.groups()
        return f"( ASK ( {entity} {predicate} {obj} ) )"
    return None

def convert_template_19(sparql_query):
    pattern = r"SELECT DISTINCT \?sbj \?sbj_label WHERE \{ \?sbj wdt:(P\d+) wd:(Q\d+) . .*? FILTER\(CONTAINS\(lcase\(\?sbj_label\), '(.*?)'\)\)"
    match = re.match(pattern, sparql_query)
    if match:
        predicate, obj, word = match.groups()
        return f"( WORD ( JOIN {predicate} {obj} ) '{word}' )"
    return None
def convert_template_20(sparql_query):
    pattern = r'SELECT \?answer WHERE \{ wd:(Q\d+) wdt:(P\d+) \?X . \?X wdt:(P\d+) \?answer\}'
    match = re.match(pattern, sparql_query)
    if match:
        entity, rel1, rel2 = match.groups()
        return f"( JOIN ( R {rel2} ) ( JOIN ( R {rel1} ) {entity} ) )"
    return None


In [20]:
def convert_template_21(sparql_query):
    pattern = r'SELECT \?obj WHERE \{ wd:(Q\d+) p:(P\d+) \?s . \?s ps:(P\d+) \?obj . \?s pq:(P\d+) \?x filter\(contains\(\?x,\'(.*?)\'\)\)\ ?\}'
    match = re.match(pattern, sparql_query)
    if match:
        entity, pred1, pred2, pred3, value = match.groups()
        return f"( JOIN ( R {pred2} ) ( FILTER ( JOIN ( R {pred3} ) ( JOIN ( R {pred1} ) {entity} ) ) ( '{value}' ) ) )"
    return None

def convert_template_22(sparql_query):
    pattern = r"SELECT \?answer WHERE \{ wd:(Q\d+) wdt:(P\d+) \?answer . \?answer wdt:(P\d+) \?x FILTER\(contains\(\?x,\'(.*?)'\)\)\}"
    match = re.match(pattern, sparql_query)
    if match:
        entity, rel1, rel2, value = match.groups()
        return f"( JOIN ( R {rel1} ) ( FILTER ( JOIN ( R {rel2} ) {entity} ) ( '{value}' ) ) )"
    return None

def convert_template_23(sparql_query):
    pattern = r'SELECT \?obj WHERE \{ wd:(Q\d+) p:(P\d+) \?s \. \?s ps:\2 \?obj \. \?s pq:(P\d+) \?x filter\(contains\(YEAR\(\?x\),[\'\"](.*?)[\'\"]\)\) \}'
    match = re.match(pattern, sparql_query)
    if match:
        entity, pred1, pred2, value = match.groups()
        return f"( JOIN ( R {pred1} ) ( FILTER ( JOIN ( R {pred2} ) ( JOIN ( R {pred1} ) {entity} ) ) ( '{value}' ) ) )"
    return None
def convert_template_24(sparql_query):
    pattern = r"SELECT \?answer WHERE \{ wd:(Q\d+) wdt:(P\d+) \?answer . \?answer wdt:(P\d+) \?x FILTER\(contains\(YEAR\(\?x\),\'(.*?)'\)\)\}"
    match = re.match(pattern, sparql_query)
    if match:
        entity, rel1, rel2, value = match.groups()
        return f"( FILTER ( JOIN ( R {rel2} ) ( JOIN ( R {rel1} ) {entity} ) ) ( YEAR '{value}' ) )"
    return None

def convert_template_25(sparql_query):
    pattern = r"SELECT DISTINCT \?sbj \?sbj_label WHERE \{ \?sbj wdt:(P\d+) wd:(Q\d+) . .*? FILTER\(STRSTARTS\(lcase\(\?sbj_label\), ['\"](.*?)['\"]\)\) .*? FILTER \(lang\(\?sbj_label\) = ['\"]en['\"]\) .*?\}"
    match = re.match(pattern, sparql_query)
    if match:
        predicate, obj, word = match.groups()
        return f"( WORD ( JOIN {predicate} {obj} ) '{word}' )"
    return None

In [21]:
def convert_sparql_to_s_expression(sparql_query):
    functions = [
        convert_template_1, convert_template_2, convert_template_3, convert_template_4, convert_template_5,
        convert_template_6, convert_template_7, convert_template_8, convert_template_9, convert_template_10,
        convert_template_11, convert_template_12, convert_template_13, convert_template_14, convert_template_15,
        convert_template_16, convert_template_17, convert_template_18, convert_template_19, convert_template_20,
        convert_template_21, convert_template_22, convert_template_23, convert_template_24, convert_template_25
    ]
    for func in functions:
        s_expression = func(sparql_query)
        if s_expression:
            return s_expression  # Trả về giá trị hợp lệ đầu tiên tìm thấy
    return "UNKNOWN"

def process_test_file(input_file, output_file):
    with open(input_file, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    filtered_data = []
    
    for entry in data:
        sparql_query = entry.get("sparql_wikidata", "")
        s_expression = convert_sparql_to_s_expression(sparql_query)
        
        if s_expression != "UNKNOWN":
            entry["s_expression"] = s_expression
            filtered_data.append(entry)  # Giữ lại điểm dữ liệu hợp lệ

    # Ghi kết quả đã lọc vào tệp đầu ra
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(filtered_data, f, indent=4, ensure_ascii=False)

# Run conversion
input_file = "origin/test.json"
output_file = "s_expression/test_with_s_expression.json"
process_test_file(input_file, output_file)
print(f"Conversion completed. Output saved to {output_file}")

Conversion completed. Output saved to s_expression/test_with_s_expression.json


In [22]:
# Run conversion
input_file = "origin/train.json"
output_file = "s_expression/train_with_s_expression.json"
process_test_file(input_file, output_file)
print(f"Conversion completed. Output saved to {output_file}")

Conversion completed. Output saved to s_expression/train_with_s_expression.json
