# GleanerIO SHACL multisite processor

## About

Ted Habberman presented to the CDF on ISO metadata analysis.  I have been exploring some approaches to accessing the FAIR Digital Object server that is part of the standard setup supporting GeoCODES and other groups.  Leveraging Python Dask and S3 Boto it is easy to access these object stores in a highly performant manner.  

In addition to DASK there is S3SELECT as well as SPARQL calls on objects and triplestores.  

## Notes

Curious above local context file reading when doing lots of calls.  PyLD seems to know to do caching but not sure how it could in a Dask based pattern.

May need to explore approach to read local files for context like

## Imports


In [2]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)  ## remove pandas future warning

import boto3
import pandas as pd
import json
import s3fs
import kglab
import seaborn as sns
from rdflib import Graph  #, plugin
import getpass
import plotly.express as px
from tqdm import tnrange
from matplotlib import pyplot as plt
import dask
from dask.distributed import Client
import dask.dataframe as dd
import dask.multiprocessing

## Set up Dask client

In [3]:
from dask.distributed import Client    #, progress
client = Client(threads_per_worker=5, n_workers=4)
client

Perhaps you already have a cluster running?
Hosting the HTTP server on port 45661 instead


0,1
Connection method: Cluster object,Cluster type: LocalCluster
Dashboard: http://127.0.0.1:45661/status,

0,1
Status: running,Using processes: True
Dashboard: http://127.0.0.1:45661/status,Workers: 4
Total threads:  20,Total memory:  31.17 GiB

0,1
Comm: tcp://127.0.0.1:46601,Workers: 4
Dashboard: http://127.0.0.1:45661/status,Total threads:  20
Started:  Just now,Total memory:  31.17 GiB

0,1
Comm: tcp://127.0.0.1:42593,Total threads: 5
Dashboard: http://127.0.0.1:45497/status,Memory: 7.79 GiB
Nanny: tcp://127.0.0.1:34837,
Local directory: /home/fils/Containers/dvols/jupyter/work/Gleaner/notebooks/validation/dask-worker-space/worker-8p0udqcc,Local directory: /home/fils/Containers/dvols/jupyter/work/Gleaner/notebooks/validation/dask-worker-space/worker-8p0udqcc
GPU: NVIDIA GeForce GTX 1050 Ti,GPU memory: 4.00 GiB

0,1
Comm: tcp://127.0.0.1:40649,Total threads: 5
Dashboard: http://127.0.0.1:45665/status,Memory: 7.79 GiB
Nanny: tcp://127.0.0.1:45711,
Local directory: /home/fils/Containers/dvols/jupyter/work/Gleaner/notebooks/validation/dask-worker-space/worker-dqoegkfq,Local directory: /home/fils/Containers/dvols/jupyter/work/Gleaner/notebooks/validation/dask-worker-space/worker-dqoegkfq
GPU: NVIDIA GeForce GTX 1050 Ti,GPU memory: 4.00 GiB

0,1
Comm: tcp://127.0.0.1:41659,Total threads: 5
Dashboard: http://127.0.0.1:36963/status,Memory: 7.79 GiB
Nanny: tcp://127.0.0.1:40037,
Local directory: /home/fils/Containers/dvols/jupyter/work/Gleaner/notebooks/validation/dask-worker-space/worker-b08r59bq,Local directory: /home/fils/Containers/dvols/jupyter/work/Gleaner/notebooks/validation/dask-worker-space/worker-b08r59bq
GPU: NVIDIA GeForce GTX 1050 Ti,GPU memory: 4.00 GiB

0,1
Comm: tcp://127.0.0.1:37919,Total threads: 5
Dashboard: http://127.0.0.1:41599/status,Memory: 7.79 GiB
Nanny: tcp://127.0.0.1:38825,
Local directory: /home/fils/Containers/dvols/jupyter/work/Gleaner/notebooks/validation/dask-worker-space/worker-np1zancp,Local directory: /home/fils/Containers/dvols/jupyter/work/Gleaner/notebooks/validation/dask-worker-space/worker-np1zancp
GPU: NVIDIA GeForce GTX 1050 Ti,GPU memory: 4.00 GiB


## File Access

In [9]:
# Set the shapegraph to use
sg = './shapes/geocodes_dcscan.ttl'

## Anonymous S3 File system
oss = s3fs.S3FileSystem(
    anon=True,
    client_kwargs = {"endpoint_url":"https://oss.geodex.org"}
)

## Access controlled s3
# session = boto3.Session(profile_name='default' ,   region_name="us-east-1")
# s3 = session.client('s3')  # needed later for listing objects
# s3r = session.resource('s3')
# oss = s3fs.S3FileSystem( profile="default")

## Manual code access
# ACCESS_CODE = getpass.getpass()
# SECRET_CODE = getpass.getpass()

# oss = s3fs.S3FileSystem(
#     anon=False,
#     key=ACCESS_CODE,
#     secret=SECRET_CODE,
#     client_kwargs = {"endpoint_url":"http://192.168.86.45:49159"}
# )

In [10]:
# [optional] List the directories we can work with later
sumlist = oss.ls('gleaner/summoned') # gleaner.oih/summoned/
print(sumlist)
print(len(sumlist))

# for testing, set to a smaller subset

# sumlist = ['gleaner/summoned/lipdverse', 'gleaner/summoned/magic', 'gleaner/summoned/hydroshare']

['gleaner/summoned/bcodmo', 'gleaner/summoned/cchdo', 'gleaner/summoned/earthchem', 'gleaner/summoned/getiedadataorg', 'gleaner/summoned/hydroshare', 'gleaner/summoned/ieda', 'gleaner/summoned/iris', 'gleaner/summoned/lipdverse', 'gleaner/summoned/magic', 'gleaner/summoned/ocd', 'gleaner/summoned/opentopo', 'gleaner/summoned/ssdb', 'gleaner/summoned/ucar', 'gleaner/summoned/unavco', 'gleaner/summoned/wikilinkedearth', 'gleaner/summoned/wwwbco-dmoorg', 'gleaner/summoned/wwwhydroshareorg']
17


### Definitions

In [11]:
# some color coding for pandas for later
def change_color_group(x):
    dfcg = x.copy()
    dfcg.loc[df['severity'] == "shacl:Violation", :] = 'background-color: #F89782'
    dfcg.loc[df['severity'] == "shacl:Warning", :] = 'background-color: #F0F480'
    dfcg.loc[df['severity'] == "shacl:Info", :] = 'background-color: #CBFBD2'
    return dfcg

In [12]:
## need a SPARQL for counts of the various validations


bc = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX sh: <http://www.w3.org/ns/shacl#>
SELECT  ?tid ?b
  WHERE {
    ?tid rdf:type sh:ValidationReport .
    ?tid sh:conforms ?b .
  }

"""

ccount = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX sh: <http://www.w3.org/ns/shacl#>
SELECT  ?b (COUNT(?tid) as ?tcount)
  WHERE {
    ?tid rdf:type sh:ValidationReport .
    ?tid sh:conforms ?b .
  }

group by (?b)
"""

scount = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX sh: <http://www.w3.org/ns/shacl#>
SELECT  ?severity (COUNT(?id) as ?tcount)
  WHERE {
    ?id rdf:type sh:ValidationResult .
    ?id sh:resultSeverity ?severity .
  }
group by (?severity)

"""





In [13]:
# SHACL function
@dask.delayed()
def read_and_validate(fn):
    # make a kg to hold our data graph
    namespaces = {
        "schema":  "https://schema.org/",
        "shacl":   "http://www.w3.org/ns/shacl#" ,
    }

    kg = kglab.KnowledgeGraph(
        name = "Schema.org based datagraph",
        base_uri = "https://example.org/id/",
        namespaces = namespaces,
    )

    ## this is a mess.   no need to load to a graph objet to validate.  also, 
    ## can we load up the graph first and then validate later for everythng?
    with oss.open(fn, 'rb') as f:
        jld = json.loads(f.read().decode("utf-8", "ignore").replace('\n',' '))
        jlds = json.dumps(jld)

        g = Graph().parse(data=jlds, format='json-ld')
        context = {
            "@vocab": "https://schema.org/",
        }

        ttl = g.serialize(format='ttl', context=context, indent=4)
        kg.load_rdf_text(ttl, format="ttl", base=None)

        conforms, report_graph, report_text = kg.validate(
            shacl_graph=sg,
            shacl_graph_format="ttl"
        )

        return report_graph.save_rdf_text()  # return graph object or string of graph

## Get files


### Multi Source Loop

In [14]:
# supress warning, use with caution
import warnings 
warnings.filterwarnings('ignore')

%%time
## Loop all items in summoned prefix

allresults = []

# hack to remove some problematic ones
try:
    sumlist.remove("gleaner/summoned/bcodmo")
except:
    print("bcodmo, note present..")
    
for item in sumlist:
    try:
        print(item)
        fns = oss.ls(item)
        o = []
        # o = [read_and_validate(f) for f in fns]  # need to skip error file ".jsonld"
        for f in fns:
                temp = read_and_validate(f)
                o.append(temp)
        results = dask.compute(*o)
        allresults.append(results)
    except:
        print("ERROR")
        print(item)

# fns = oss.ls(checkPrefix)
# o = [read_and_validate(f) for f in fns]
# results = dask.compute(*o)

gleaner/summoned/cchdo
gleaner/summoned/earthchem
gleaner/summoned/getiedadataorg




gleaner/summoned/hydroshare




gleaner/summoned/ieda


Function:  read_and_validate
args:      ('gleaner/summoned/ieda/.jsonld')
kwargs:    {}
Exception: JSONDecodeError('Expecting property name enclosed in double quotes: line 1 column 6694 (char 6693)')

distributed.core - ERROR - Exception while handling op heartbeat_worker
Traceback (most recent call last):
  File "/home/fils/.conda/envs/kglab/lib/python3.8/site-packages/distributed/core.py", line 498, in handle_comm
    result = handler(comm, **msg)
  File "/home/fils/.conda/envs/kglab/lib/python3.8/site-packages/distributed/scheduler.py", line 3949, in heartbeat_worker
    ws._executing = {
  File "/home/fils/.conda/envs/kglab/lib/python3.8/site-packages/distributed/scheduler.py", line 3950, in <dictcomp>
    parent._tasks[key]: duration for key, duration in executing.items()
KeyError: 'read_and_validate-3da5131b-4fc9-40ea-a43a-455b2e91893f'
distributed.core - ERROR - Exception while handling op heartbeat_worker
Traceback (most recent call last):
  File "/home/fils/.conda/envs/kglab/l

ERROR
gleaner/summoned/ieda
gleaner/summoned/iris


n
    return await retry(
  File "/home/fils/.conda/envs/kglab/lib/python3.8/site-packages/distributed/utils_comm.py", line 370, in retry
    return await coro()
  File "/home/fils/.conda/envs/kglab/lib/python3.8/site-packages/distributed/core.py", line 866, in send_recv_from_rpc
    result = await send_recv(comm=comm, op=key, **kwargs)
  File "/home/fils/.conda/envs/kglab/lib/python3.8/site-packages/distributed/core.py", line 667, in send_recv
    raise exc.with_traceback(tb)
  File "/home/fils/.conda/envs/kglab/lib/python3.8/site-packages/distributed/core.py", line 498, in handle_comm
    result = handler(comm, **msg)
  File "/home/fils/.conda/envs/kglab/lib/python3.8/site-packages/distributed/scheduler.py", line 3949, in heartbeat_worker
    ws._executing = {
  File "/home/fils/.conda/envs/kglab/lib/python3.8/site-packages/distributed/scheduler.py", line 3950, in <dictcomp>
    parent._tasks[key]: duration for key, duration in executing.items()
KeyError: 'read_and_validate-3da5131b-

gleaner/summoned/lipdverse




gleaner/summoned/magic




gleaner/summoned/ocd




gleaner/summoned/opentopo




gleaner/summoned/ssdb


Function:  read_and_validate
args:      ('gleaner/summoned/ssdb/3765e18c5d85e9dc0ee1ef2d5cca0df10d118e74.jsonld')
kwargs:    {}
Exception: UnicodeDecodeError('utf-8', b'\x1f\x8b\x08\x00\x00\x00\x00\x00\x02\xff\xa4X[s\xdb6\x16~^\xff\n\x84/\x96wD\xd2r\xda\xedn,i6\xad\x9b8;\xdb$\xb3N\xd3\xc9d2; \t\x91\xb0I\x82\x01@\xc9\x9a\xa6\xff}\xbf\x03\x90\x14i\xbb\xc9\xcc\xda\x0f2\x88\xcb\xc1\xb9\x9f\xef\xe0h\xf9\xe4\xe2\xcdO\xef>\xbc\xfd\x99\x15\xb6*\xd7GK\xfa\xc7J^\xe7\xab@\xd4\x01&\x9e\x84!{)j\xa1\xb9\x15\x19\xdbhU\xb1KU\x89\xe8\xfa\x8c\x85\xe1\xfa\x88\xe1oY\x08\x9euC+m)\xd6Wi!*\x1e)\x9d\xb3\x90\x8d>\x96\xb1_\xf7\xc7*a9K\x0b\xae\x8d\xb0\xab\xa0\xb5\x9b\xf0\xef\x01\xeb\xe8\xb8\xb5\x9aWb\x15l\xa5\xd85J\xdb\x80\xa5\xaa\xb6\xa2\xc6\xde\x9d\xccl\xb1\xca\xc4V\xa6"t\x1fs&ki%/C\x93\xf2R\xac\x16\xe0\xdd\xf16"\x94\t\x93j\xd9X\xa9\xea\x11\xad\x11{\xd20\xce\xc0\x0cS\x1b&nq\x95\x91I)\x98q\xd2\x18f\x0bn\x99\xa89\xe6\x0c\xdb\x89\xa4\xe2\xc6\n\x8dy\xc5D\x95\x88\xcc]h\xacnS\xdbj(+\xe3\x90O\xd58\'\xa4\xa6\x03\xac\x

ERROR
gleaner/summoned/ssdb
gleaner/summoned/ucar


d0\x18q\xd1\x95\x13Ds\xb2~\x8d2\xb7\x8c\x93\xf53\xb6W-\xe3Z0\x023\xb2\xce\t\x130\xec\x14\xa5j(H\x9cy\xb7\x00\x13\xc8$\x84>\x86\xe4\xd6;\xe6\xd8\x1d\xbd\xcb\x93k\x92\x91#w\xf6J\x88\xc3!/b\xa1v;\xb1S\xfa\xa6\x93\xeeR\xed\x00B\x18\xcd\xd09\x07C*\x05\x9e2\xe00Y\x1a"\xd4I4\x08v\xc4<\x02\xbc\xf4UF\x00\x9f8\x08\xd8\x97\x0e\xa1\xfb\x1a\xe3\xa2\xb9O\xb0\x15\xea\xc7O\x1e\xac\r6]\x16\x8b\xf5o\xa2D\xd1\x14\x84\x98\xc6B`\xe5\xe8h\xf0\xf0e3\xcd\x08\x87\x9d\xcc\x01\xb4T\x95%\x82\x08\x80Tn\xc5\x1c\xa0\xb0\xaaZ`\xbf=\xe3\xc8][\x1a\xec\xa4-\x80\xe4*i\x9c>q[\xaa\x05\xf0\xeb|\xb8\x83\xb8%&\xa9\xd0\xcd\x1d\xecj\x80la\xad\x01\xec\x11H{\x18\xcbM\x88\xbc\x02"\xd5\xb5\xb0s\x02z\x03\xc4#4\nL\x08\xad\xb2J\x18C\xa8\xcf\xdf\x92\x88\xbd\xaa3o2\xa7\xb1\x18\xc2\x0e\x04\xbf"\xf9V\xa5<iK\xae\xf7,\xe55K\x04!\xc8\xccKZ\xf1z\xcf2\xb9\xd9\x08\rW\x02\x98LU\x06\x1f3Sq%\x8aVK\xf3\xec?\x17/\xf8\x9c\xfd"S\xad\x1cD%\xd4\xf9\xaf\xab7\xaf\xc3\x7f_D\xec]!\x00M\x87\xeb$Pi\xaa\xe0\x99\x03\x93\xc46\xae\x01\xd8&\xa1\x80E=H-dc\xc0\x96\x

ERROR
gleaner/summoned/ucar
gleaner/summoned/unavco


Function:  read_and_validate
args:      ('gleaner/summoned/unavco/886b1c0ca6b7da233cdc2052c52453aba12a4d61.jsonld')
kwargs:    {}
Exception: UnicodeDecodeError('utf-8', b'\x1f\x8b\x08\x00\x00\x00\x00\x00\x02\xff\xa4X[s\xdb6\x16~^\xff\n\x84/\x96wD\xd2r\xda\xedn,i6\xad\x9b8;\xdb$\xb3N\xd3\xc9d2; \t\x91\xb0I\x82\x01@\xc9\x9a\xa6\xff}\xbf\x03\x90\x14i\xbb\xc9\xcc\xda\x0f2\x88\xcb\xc1\xb9\x9f\xef\xe0h\xf9\xe4\xe2\xcdO\xef>\xbc\xfd\x99\x15\xb6*\xd7GK\xfa\xc7J^\xe7\xab@\xd4\x01&\x9e\x84!{)j\xa1\xb9\x15\x19\xdbhU\xb1KU\x89\xe8\xfa\x8c\x85\xe1\xfa\x88\xe1oY\x08\x9euC+m)\xd6Wi!*\x1e)\x9d\xb3\x90\x8d>\x96\xb1_\xf7\xc7*a9K\x0b\xae\x8d\xb0\xab\xa0\xb5\x9b\xf0\xef\x01\xeb\xe8\xb8\xb5\x9aWb\x15l\xa5\xd85J\xdb\x80\xa5\xaa\xb6\xa2\xc6\xde\x9d\xccl\xb1\xca\xc4V\xa6"t\x1fs&ki%/C\x93\xf2R\xac\x16\xe0\xdd\xf16"\x94\t\x93j\xd9X\xa9\xea\x11\xad\x11{\xd20\xce\xc0\x0cS\x1b&nq\x95\x91I)\x98q\xd2\x18f\x0bn\x99\xa89\xe6\x0c\xdb\x89\xa4\xe2\xc6\n\x8dy\xc5D\x95\x88\xcc]h\xacnS\xdbj(+\xe3\x90O\xd58\'\xa4\xa6\x03\xac

ERROR
gleaner/summoned/unavco
gleaner/summoned/wikilinkedearth


h\xacnS\xdbj(+\xe3\x90O\xd58\'\xa4\xa6\x03\xac\xe19\x8en\x94f\xad\x11,\xd9\xe32\xae\xd3\x02DsYc\x85\xd7\x19S\xd8\xae\x19o\x9aR\xa6\x9cx5Q\xc0\xe2N\x9aR\xd67L\x8br\x15\x98\x02*I[\xcb$\xb4\x120\xbbo\xa0*Y\xe1\x82\xb8\xa9\xf3\x80\x15ZlVA\x9c\xa9\xd4\xc4\x1b\x0eE\xa9:\xc2O\xf0\x00%\xbb\x87@\x85\x10\xd0\xb0\'c!|\x9c\x1a3\xa5\xe2\xd5\x00\x9bFn\xe9\xff\'\x04\xbb\xd5\xca\x8a\xc7\x92i\xb4\xb0Vn\xf6\x8f\xa5\xf3 ;\xdeS\x98\xd1\xe9*(\xacm\xcc\xb38\xe6\xd7\xfc6\xca\x95\xcaK\xc1\x1bi\xa2TUn..eb\xe2\xeb\xcf\xad\xd0\xfb\xf8i\xf4}\xb4\xe8>\xa2J\xd6\xd1\xb5\t\xd6\xcb\xd8\xd3\xeb\xac8&\xee-t\xd0\xed\xfd\xed\xce\xad\x8e\x96\xb1\x8f\xb2e\xa2\xb2}G\x87b\xf3\x12\xb1\x07\x7f1\x96k\xeb\xc3\xf3-|\xc0\xcf\xf6A\xea\x82\xf8\xca\xc2\x9bRv\xa1R\xf6\xaa6\x02\xbb\xfb\x8d.\x90\x8f\x96\x99\xdc2\x99\xad\x02\x8a2\x0ew\xd4.\x84\x86i\xba^\xe8\x9d\xe6M\x1fZ\xfd\x01\xf2jZ] \xaaJn\xcc*\xa8TR\xf3m\xb7\x0f\t\x82vvK\x9eL\x98\x94*\xbd\t\xdc}\xfd\xf1\xb0\x14\x1b;\x9c\xe9N\x11CF"\x14\x91\n\xceF\x8bX\xe6\xbd\x8f\x07\xa3|\xb3\x8c\xb9W

ERROR
gleaner/summoned/wikilinkedearth
gleaner/summoned/wwwbco-dmoorg


Function:  read_and_validate
args:      ('gleaner/summoned/wwwbco-dmoorg/37164a4241362dd4fe906065b31e4de84504eb12.jsonld')
kwargs:    {}
Exception: UnicodeDecodeError('utf-8', b'\x1f\x8b\x08\x00\x00\x00\x00\x00\x02\xff\xa4X[s\xdb6\x16~^\xff\n\x84/\x96wD\xd2r\xda\xedn,i6\xad\x9b8;\xdb$\xb3N\xd3\xc9d2; \t\x91\xb0I\x82\x01@\xc9\x9a\xa6\xff}\xbf\x03\x90\x14i\xbb\xc9\xcc\xda\x0f2\x88\xcb\xc1\xb9\x9f\xef\xe0h\xf9\xe4\xe2\xcdO\xef>\xbc\xfd\x99\x15\xb6*\xd7GK\xfa\xc7J^\xe7\xab@\xd4\x01&\x9e\x84!{)j\xa1\xb9\x15\x19\xdbhU\xb1KU\x89\xe8\xfa\x8c\x85\xe1\xfa\x88\xe1oY\x08\x9euC+m)\xd6Wi!*\x1e)\x9d\xb3\x90\x8d>\x96\xb1_\xf7\xc7*a9K\x0b\xae\x8d\xb0\xab\xa0\xb5\x9b\xf0\xef\x01\xeb\xe8\xb8\xb5\x9aWb\x15l\xa5\xd85J\xdb\x80\xa5\xaa\xb6\xa2\xc6\xde\x9d\xccl\xb1\xca\xc4V\xa6"t\x1fs&ki%/C\x93\xf2R\xac\x16\xe0\xdd\xf16"\x94\t\x93j\xd9X\xa9\xea\x11\xad\x11{\xd20\xce\xc0\x0cS\x1b&nq\x95\x91I)\x98q\xd2\x18f\x0bn\x99\xa89\xe6\x0c\xdb\x89\xa4\xe2\xc6\n\x8dy\xc5D\x95\x88\xcc]h\xacnS\xdbj(+\xe3\x90O\xd58\'\xa4\xa6\

ERROR
gleaner/summoned/wwwbco-dmoorg
gleaner/summoned/wwwhydroshareorg




CPU times: user 6min 13s, sys: 17.6 s, total: 6min 31s
Wall time: 22min 44s


### Single Source

In [None]:
%%time
## single item

checkPrefix = "gleaner/summoned/magic"
fns = oss.ls(checkPrefix)
o = [read_and_validate(f) for f in fns]
singleresults = dask.compute(*o)

In [None]:
print(singleresults[1])

### Build an array of graphs based on these.  (or a map?)

In [15]:
allgraphs = []

for ar in allresults:
    rnamespaces = {
        "schema":  "https://schema.org/",
        "shacl":   "http://www.w3.org/ns/shacl#" ,
    }

    kg = kglab.KnowledgeGraph(
        name = "Schema.org shacl eval datagraph",
        base_uri = "https://gleaner.io/id/genid/",
        namespaces = rnamespaces,
    )

    empty = True
    for r in ar:
        if r != None:
            kg.load_rdf_text(data=r, format="ttl")
            empty = False
            
    if not empty:        
        allgraphs.append(kg)

In [16]:
print(len(allgraphs))

10


In [17]:
sparql = """
SELECT ?path ?value ?constraint ?severity ?message ?id ?focus
  WHERE {
    ?id rdf:type shacl:ValidationResult .
    ?id shacl:focusNode ?focus .
    ?id shacl:resultMessage ?message .
    ?id shacl:resultSeverity ?severity .
    ?id shacl:sourceConstraintComponent ?constraint .
    OPTIONAL {
        ?id shacl:resultPath ?path .
    }
    OPTIONAL {
        ?id shacl:value ?value .
    }
  }
"""

for k in allgraphs:
    pdf = k.query_as_df(sparql)
    df = pdf.to_pandas()
    df.style.background_gradient(cmap='Blues')

    if len(df) > 0 :
        dfc = df.groupby('severity').count().reset_index().rename(columns={'path': 'Count'})
        ctst = pd.crosstab(df['message'], df['severity'], margins = True, margins_name = 'Subtotals')

        print("Checking {} objects in {}".format("len of original bucket","NAME" ))
        print(ctst)
        print("-----------------------------------------")

# fig, axes = plt.subplots(1, 3, figsize=(15, 5), sharey=True)
# fig.suptitle('test results')
i = 1
plt.subplots(figsize=(20, 70))

for k in allgraphs:
    # TODO put a try in here...  
    pdf = k.query_as_df(sparql)
    df = pdf.to_pandas()
    df.style.background_gradient(cmap='Blues')

    if len(df) > 0 :
        dfc = df.groupby('severity').count().reset_index().rename(columns={'path': 'Count'})
        ctst = pd.crosstab(df['message'], df['severity'], margins = True, margins_name = 'Subtotals')

        # sns.set(rc={'figure.figsize':(11.7,8.27)})
        plt.subplot(len(allgraphs),1,i)
        sns.heatmap(ctst, annot=True, fmt=".0f", cmap = sns.cm.crest)
        # axes[i].set_title(i)
        i+=1

plt.show()

AttributeError: 'DataFrame' object has no attribute 'to_pandas'