# Compare expected and actual SPARL results

In this notebook, we check how well the LLM did at generating SPARQL queries on the Uniprot dataset. 

In ```get_expected_results.ipynb```, we ran each of the ground-truth queries against either the public Uniprot SPARQL endpoint or your own Neptune database to obtain expected SPARQL results for those queries. The results are in the ```up``` folder. 

In ```run_gen_tests.ipynb```, we asked the LLM to generate SPARQL queries for each ground-truth example. We ran the query against the Uniprot endpoint or your Neptune database. The results are in the ```gen_results``` folder. 

In this notebook, we compare the results. Are generated SPARQL queries similar to queries from ground truth? Are results similar?

See included file ```comparison.html``` for a comparison of our test data. We also provide the full ```up``` and ```gen_results``` folders.


In [None]:
from pathlib import Path
import json, re
from itertools import count

import pandas as pd
import jinja2
from IPython.core.display import display, HTML

In [31]:
jenv = jinja2.Environment()

In [32]:
def cleanup_line(sparql_line: str) -> str:
    """ crude fixes to make it look better """
    if re.match(r"^\s+(?:SELECT|LIMIT|WHERE)", sparql_line):
        return sparql_line.strip()
    else:
        return sparql_line

def remove_prefixes(sparql: str) -> str:
    result = [line for line in sparql.split("\n") if not re.match(r"^\s*PREFIX", line)]
    # also remove empty lines
    return "\n".join(cleanup_line(line) for line in result if not re.match(r"^\s*$", line))


PREFIXES = {
    "taxon": "http://purl.uniprot.org/taxonomy/",
    "uniprotkb": "http://purl.uniprot.org/uniprot/"
}

def shorten_common_URIs(uri_str: str) -> str:
    uri_str = uri_str.strip()
    for prefix, uri in PREFIXES.items():
        if uri_str.startswith(uri):
            return f"{prefix}:{uri_str[len(uri):]}"
    return uri_str


def process_value(json_blob):
    if json_blob["type"] == "uri":
        return shorten_common_URIs(json_blob["value"])
    elif json_blob["type"] == "literal":
        try:
            return int(json_blob["value"])
        except:
            try:
                return float(json_blob["value"])
            except:
                return str(json_blob["value"])

 
def results_to_data_frame(json_blob, problems) -> pd.DataFrame:
    try:
        res = json_blob["res"]
        if 'boolean' in res:
            return  pd.DataFrame({"boolean_result": [ res["boolean"] ] })
        if  len(json_blob['error_type']) > 0:
            problems.append(json_blob['error_type']) 
            return pd.DataFrame({"error_msg": [json_blob["error_type"], json_blob["error_msg"][0:40]]})
        columns = res["head"]["vars"]
        data = {col: [] for col in columns}
        for row in res["results"]["bindings"]:
            if len(row) > 0:
                for col in columns:
                    data[col].append(process_value(row.get(col, dict(type="literal", value=""))))
        return pd.DataFrame(data=data)
    except Exception as ex:
        print(f"Caught: {ex}")
        #print(json.dumps(json_blob, indent=2))

In [None]:
process_value({
            "type": "literal",
            "value": "http://rdf.wwpdb.org/pdb/6DZI"
          })

In [34]:
import yaml
from pathlib import Path

resources = Path.cwd() / "resources"
ground_truth = yaml.safe_load((resources / "ground-truth.yaml").read_text())

expected={}
results={}
for p in (Path.cwd() / "up").glob("*.json"):
    if not("_" in p.stem):
        expected[int(p.stem)]=json.loads(p.read_text())
for p in (Path.cwd() / "gen_results").glob("*.json"):
    if not("_" in p.stem):
        results[int(p.stem)]=json.loads(p.read_text())
    
    
diff_results=[]

for index, g in enumerate(ground_truth):
    up_problems = []
    res_problems = []
    assert(index in expected)
    assert(index in results)
    assert('error_type' in expected[index])
    assert('error_type' in results[index])
    
    diff_results.append({
        'question': g['question'], 
        'expected_sparql': remove_prefixes(g['SPARQL']),
        'expected_results': results_to_data_frame(expected[index], up_problems),
        'actual_sparql': remove_prefixes(results[index]['actual_sparql']),
        'actual_results': results_to_data_frame(results[index], res_problems)
    })
    diff_results[-1]['up_problems']=up_problems
    diff_results[-1]['res_problems']=res_problems
    


In [26]:
import html
def blob_to_table_html(iter) -> str:
    
    dr=diff_results[iter]
    
    template = jenv.from_string("""
    <table>
      <thead>
          <tr>
              <th colspan="2" style="text-align: center;">{{question}}</th>
          </tr>
          <tr>
              <th style="text-align: center;">Expected</th>
              <th style="text-align: center;">Generated</th>
          </tr>
      <thead>
      <tbody>
        <tr>
            <td><pre style="text-align: left;">
{{expected_sparql}}
</pre></td>
            <td><pre style="text-align: left;">
{{actual_sparql}}
</pre></td>
        </tr>
        <tr>
            <td>{{df_html_left}}</td>
            <td>{{df_html_right}}</td>
        </tr>
      </tbody>
    </table>
    """)
    return template.render(question=f"{iter} {dr['question']}",
                           expected_sparql=html.escape(dr["expected_sparql"]),
                           actual_sparql=html.escape(dr["actual_sparql"]),
                           df_html_left=dr['expected_results'][0:30].to_html(),
                           df_html_right=dr['actual_results'][0:30].to_html())

In [None]:

filter_set=[]
html_str = ""

for index, q in enumerate(ground_truth):
    if len(filter_set) == 0 or (len(filter_set) > 0 and index in filter_set):
        print(index)
        html_str += "\n" + blob_to_table_html(index)

with open("comparison.html", "w") as file1:
    # Writing data to a file
    file1.write(html_str)
    
display(HTML(html_str))