In [2]:
import gzip
import json

import pandas as pd
from rdflib import Namespace, Graph, URIRef, Literal, BNode
from rdflib.namespace import RDFS
from UtilityFunctions.dictionary_functions import flatten_dictionary
from UtilityFunctions.get_data_path import get_path
from UtilityFunctions.string_functions import split_words, turn_words_singular, split_words_inc_slash
from UtilityFunctions.wikidata_functions import wikidata_query, retrieve_wikidata_claims, category_query, min_qid, get_all_wikidata_claims, compare_qids, categories_dict_singular
from UtilityFunctions.schema_functions import get_schema_predicate, get_schema_type, get_class_mappings
from UtilityFunctions.get_uri import get_uri

In [12]:
class_mappings = pd.read_csv(get_path("class_mappings.csv"))

In [13]:
class_mappings

Unnamed: 0,YelpCategory,SchemaType
0,Airport,Airport
1,Osteopath,Osteopathic
2,Repair,AutoRepair
3,RadioStation,RadioStation
4,HardwareStore,HardwareStore
...,...,...
213,Hostel,Hostel
214,EmploymentLaw,EmploymentAgency
215,Supplement,DietarySupplement
216,Trust,UKTrust


In [6]:
from shexer.shaper import Shaper
from shexer.consts import NT, SHEXC, SHACL_TURTLE

target_classes = [
    "http://example.org/Person",
    "http://example.org/Gender"
]

namespaces_dict = {"http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
                   "http://example.org/": "ex",
                   "http://weso.es/shapes/": "",
                   "http://www.w3.org/2001/XMLSchema#": "xsd"
                   }

raw_graph = """
<http://example.org/sarah> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Person> .
<http://example.org/sarah> <http://example.org/age> "30"^^<http://www.w3.org/2001/XMLSchema#int> .
<http://example.org/sarah> <http://example.org/name> "Sarah" .
<http://example.org/sarah> <http://example.org/gender> <http://example.org/Female> .
<http://example.org/sarah> <http://example.org/occupation> <http://example.org/Doctor> .
<http://example.org/sarah> <http://example.org/brother> <http://example.org/Jim> .

<http://example.org/jim> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Person> .
<http://example.org/jim> <http://example.org/age> "28"^^<http://www.w3.org/2001/XMLSchema#int> .
<http://example.org/jim> <http://example.org/name> "Jimbo".
<http://example.org/jim> <http://example.org/surname> "Mendes".
<http://example.org/jim> <http://example.org/gender> <http://example.org/Male> .

<http://example.org/Male> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Gender> .
<http://example.org/Male> <http://www.w3.org/2000/01/rdf-schema#label> "Male" .
<http://example.org/Female> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Gender> .
<http://example.org/Female> <http://www.w3.org/2000/01/rdf-schema#label> "Female" .
<http://example.org/Other> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Gender> .
<http://example.org/Other> <http://www.w3.org/2000/01/rdf-schema#label> "Other gender" .
"""



input_nt_file = "target_graph.nt"

shaper = Shaper(target_classes=target_classes,
                raw_graph=raw_graph,
                input_format=NT,
                namespaces_dict=namespaces_dict,  # Default: no prefixes
                instantiation_property="http://www.w3.org/1999/02/22-rdf-syntax-ns#type")  # Default rdf:type

output_file = "shaper_example.shex"

shaper.shex_graph(output_file=output_file,
                  acceptance_threshold=0.1,)

print("Done!")


Done!


In [16]:
from shexer.shaper import Shaper
from shexer.consts import NT, SHEXC, SHACL_TURTLE

target_classes = [
    "https://example.org/SchemaClass",
    "https://example.org/YelpCategory",
    "https://example.org/ExampleClass",
    "https://schema.org/Restaurant"
]


input_nt_file = "/home/ubuntu/vol1/virtuoso/import/yelp_business.nt"

shaper = Shaper(target_classes=target_classes,
                raw_graph=input_nt_file,
                input_format=NT,
                instantiation_property="http://www.w3.org/1999/02/22-rdf-syntax-ns#type")  # Default rdf:type

output_file = "shaper_example.shex"

shaper.shex_graph(output_file=output_file,
                  acceptance_threshold=0.1,)

print("Done!")


KeyboardInterrupt: 

In [5]:
from Code.UtilityFunctions.run_query import run_query
query = """PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
  
SELECT DISTINCT ?type
WHERE {
  ?s a ?type.
}"""
run_query(query=query, as_dataframe=True, do_print=False, include_types=False)

Unnamed: 0,type.value
0,http://www.openlinksw.com/schemas/virtrdf#QuadMapFormat
1,http://www.openlinksw.com/schemas/virtrdf#QuadStorage
2,http://www.openlinksw.com/schemas/virtrdf#array-of-QuadMapFormat
3,http://www.openlinksw.com/schemas/virtrdf#QuadMap
4,http://www.openlinksw.com/schemas/virtrdf#QuadMapValue
5,http://www.openlinksw.com/schemas/virtrdf#array-of-QuadMapColumn
6,http://www.openlinksw.com/schemas/virtrdf#QuadMapColumn
7,http://www.openlinksw.com/schemas/virtrdf#array-of-QuadMapATable
8,http://www.openlinksw.com/schemas/virtrdf#QuadMapATable
9,http://www.openlinksw.com/schemas/virtrdf#QuadMapFText


In [1]:
import torch
import torchtext

# The first time you run this will download a ~823MB file
glove = torchtext.vocab.GloVe(name="6B", # trained on Wikipedia 2014 corpus
                              dim=100)    # embedding size = 50

.vector_cache/glove.6B.zip: 862MB [02:49, 5.09MB/s]                               
100%|█████████▉| 399999/400000 [00:13<00:00, 29410.83it/s]


In [2]:
glove['cat']

tensor([ 0.2309,  0.2828,  0.6318, -0.5941, -0.5860,  0.6326,  0.2440, -0.1411,
         0.0608, -0.7898, -0.2910,  0.1429,  0.7227,  0.2043,  0.1407,  0.9876,
         0.5253,  0.0975,  0.8822,  0.5122,  0.4020,  0.2117, -0.0131, -0.7162,
         0.5539,  1.1452, -0.8804, -0.5022, -0.2281,  0.0239,  0.1072,  0.0837,
         0.5501,  0.5848,  0.7582,  0.4571, -0.2800,  0.2522,  0.6896, -0.6097,
         0.1958,  0.0442, -0.3114, -0.6883, -0.2272,  0.4618, -0.7716,  0.1021,
         0.5564,  0.0674, -0.5721,  0.2374,  0.4717,  0.8277, -0.2926, -1.3422,
        -0.0993,  0.2814,  0.4160,  0.1058,  0.6220,  0.8950, -0.2345,  0.5135,
         0.9938,  1.1846, -0.1636,  0.2065,  0.7385,  0.2406, -0.9647,  0.1348,
        -0.0072,  0.3302, -0.1236,  0.2719, -0.4095,  0.0219, -0.6069,  0.4076,
         0.1957, -0.4180,  0.1864, -0.0327, -0.7857, -0.1385,  0.0440, -0.0844,
         0.0491,  0.2410,  0.4527, -0.1868,  0.4618,  0.0891, -0.1819, -0.0152,
        -0.7368, -0.1453,  0.1510, -0.71

In [3]:
x = glove['cat']
y = glove['dog']
torch.norm(y - x)

tensor(2.6811)