In [1]:
import datetime
import rdflib
import requests
import threading
now = datetime.datetime.utcnow()
LDP = rdflib.Namespace("http://www.w3.org/ns/ldp#")
print(now)

2020-01-31 22:03:25.167410


In [2]:
def get_sinopia_production():
    start = datetime.datetime.utcnow()
    print(f"Started extracting Sinopia at {start}")
    repository_request = requests.get('https://trellis.sinopia.io/repository/')
    repo_graph = rdflib.ConjunctiveGraph()
    repo_graph.parse(data=repository_request.text, format='turtle')
    resources = []
    for group_uri in repo_graph.objects(predicate=LDP.contains):
        if group_uri.endswith("ld4p") or group_uri.endswith("yale"):
            continue
        group_graph = get_group_graph(group_uri)
        print(f"{group_uri} finished with {len(group_graph)} triples.")
        resources.append(group_graph)
    end = datetime.datetime.utcnow()
    print(f"Finished at {end}, total time {(end - start).seconds / 60.0} minutes.")
    return resources

def get_group_graph(group_uri):
    group_result = requests.get(group_uri)
    group_graph = rdflib.ConjunctiveGraph()
    group_graph.parse(data=group_result.text, format='turtle')
    for resource_uri in group_graph.objects(predicate=LDP.contains):
        resource_result = requests.get(str(resource_uri))
        group_graph.parse(data=resource_result.text, format='turtle')
    return group_graph

def check_uri(yale_uri, group_graph):
    query = f"""SELECT ?s WHERE {{ ?s ?p <{yale_uri}> }}"""
    query_results = group_graph.query(query)
    if len(query_results) > 0:
        return True
    return False

def check_yale_uri(sinopia_graphs):
    start = datetime.datetime.utcnow()
    print(f"Checking for Yale URIs used in non-Yale descriptions at {start}")
    home = requests.get('https://trellis.sinopia.io/repository/yale')
    yale_graph = rdflib.ConjunctiveGraph()
    yale_graph.parse(data=home.text, format='turtle')
    clean = True
    for obj in yale_graph.objects(predicate=LDP.contains):
        for i, graph in enumerate(sinopia_graphs):
            if check_uri(str(obj), graph) is True:
                print(f"{obj} is used as an object in {i}")
                clean = False
    end = datetime.datetime.utcnow()
    print(f"Finished Yale URI check at {end}, total time {(end-start)} ")
    
def check_yale_graph(sinopia_graphs):
    start = datetime.datetime.utcnow()
    print(f"Checking for Yale URIs used in non-Yale descriptions at {start}")
    print(f"Combining {len(sinopia_graphs)} into a single graph")
    sinopia_graph = rdflib.ConjunctiveGraph()
    print(f"Combining {len(sinopia_graphs)} into a single graph, inital size {len(sinopia_graph)}")
    for graph in sinopia_graphs:
        sinopia_graph += graph
    print(f"Finished sinopia graph has {len(sinopia_graph)} triples")
    home = requests.get('https://trellis.sinopia.io/repository/yale')
    yale_graph = rdflib.ConjunctiveGraph()
    yale_graph.parse(data=home.text, format='turtle')
    for obj in yale_graph.objects(predicate=LDP.contains):
        if check_uri(obj, sinopia_graph) is True:
            print(f"{obj} used as in another Sinopia description")
    
    end = datetime.datetime.utcnow()
    print(f"Finished Yale URI full graph check at {end}, total time {(end-start)} ")   

In [4]:
sinopia_graphs = get_sinopia_production()

Started extracting Sinopia at 2020-01-31 22:03:43.094074
https://trellis.sinopia.io/repository/ucsd finished with 1 triples.
https://trellis.sinopia.io/repository/penn finished with 17 triples.
https://trellis.sinopia.io/repository/frick finished with 1 triples.
https://trellis.sinopia.io/repository/hrc finished with 1 triples.
https://trellis.sinopia.io/repository/stanford finished with 17 triples.
https://trellis.sinopia.io/repository/dlc finished with 14 triples.
https://trellis.sinopia.io/repository/chicago finished with 229 triples.
https://trellis.sinopia.io/repository/duke finished with 17 triples.
https://trellis.sinopia.io/repository/princeton finished with 280 triples.
https://trellis.sinopia.io/repository/harvard finished with 94 triples.
https://trellis.sinopia.io/repository/washington finished with 4084 triples.
https://trellis.sinopia.io/repository/alberta finished with 1813 triples.
https://trellis.sinopia.io/repository/minnesota finished with 504 triples.
https://trelli

In [7]:
check_yale_uri(sinopia_graphs)

Checking for Yale URIs used in non-Yale descriptions at 2020-01-31 22:25:01.360692
Finished Yale URI check at 2020-01-31 22:25:02.008417, total time 0:00:00.647725 


In [8]:
check_yale_graph(sinopia_graphs)

Checking for Yale URIs used in non-Yale descriptions at 2020-01-31 22:25:05.569016
Combining 21 into a single graph
Combining 21 into a single graph, inital size 0
Finished sinopia graph has 9357 triples
Finished Yale URI full graph check at 2020-01-31 22:25:06.199911, total time 0:00:00.630895 


## Delete all Yale resource in production

In [9]:
with open("/Users/jpnelson/2020/ld4p/sinopia_acl/.cognitoToken") as fo:
    cognitoToken = fo.read()

headers = { 
    "Authorization": f"Bearer {cognitoToken}"
}

In [10]:
yale_home = requests.get('https://trellis.sinopia.io/repository/yale')
yale_graph = rdflib.Graph()
yale_graph.parse(data=yale_home.text, format='turtle')

<Graph identifier=Naf9ca2a5dab04bc7a911747b7d9413b7 (<class 'rdflib.graph.Graph'>)>

In [12]:
for row in yale_graph.objects(predicate=LDP.contains):
    delete_result = requests.delete(str(row), headers=headers)
    print(delete_result.status_code)

204
204
204
204
204
204
204
204
204
204


In [14]:
for row in yale_graph.objects(predicate=LDP.contains):
    print(f"*  {row}")

*  https://trellis.sinopia.io/repository/yale/6c25143b-1100-4776-b0e0-5762a26adb54
*  https://trellis.sinopia.io/repository/yale/f91816ee-75ab-4b65-9805-9ea7300eb602
*  https://trellis.sinopia.io/repository/yale/96a2c97d-a26f-4226-8306-e1b37918e623
*  https://trellis.sinopia.io/repository/yale/71d36fdf-0d78-4f01-adfc-2aab6b709d0d
*  https://trellis.sinopia.io/repository/yale/57244fa2-caa9-484f-b00d-6a8ebf89f872
*  https://trellis.sinopia.io/repository/yale/174fbdc2-baaf-480a-8a14-8bb57ec4919a
*  https://trellis.sinopia.io/repository/yale/23f1ae25-1b95-4f16-8437-bd87a11eb5d6
*  https://trellis.sinopia.io/repository/yale/202d10f1-3fc5-45a2-a2d0-28fa5e7b60d1
*  https://trellis.sinopia.io/repository/yale/de8545b9-63d1-4d89-b140-7f4340df096d
*  https://trellis.sinopia.io/repository/yale/2138cd11-fe01-44f8-aa9b-c489bc87dd60
