In [1]:
import pandas as pd

from Code.UtilityFunctions.run_query import run_query

In [19]:
pd.set_option('display.max_rows', 120)
pd.set_option('display.max_colwidth', 500)

### Create category file

In [12]:
query = """
SELECT DISTINCT ?s
WHERE {
    ?s rdfs:Class yelpont:YelpCategory .
}
"""

categories = run_query(query, as_dataframe=True)
categories

Unnamed: 0,s.value
0,https://purl.archive.org/purl/yelp/business_categories#taxi
1,https://purl.archive.org/purl/yelp/business_categories#acai_bowl
2,https://purl.archive.org/purl/yelp/business_categories#accessory
3,https://purl.archive.org/purl/yelp/business_categories#accountant
4,https://purl.archive.org/purl/yelp/business_categories#acne_treatment
...,...
1299,https://purl.archive.org/purl/yelp/business_categories#natural_gas_supplier
1300,https://purl.archive.org/purl/yelp/business_categories#serbo_croatian
1301,https://purl.archive.org/purl/yelp/business_categories#bubble_soccer
1302,https://purl.archive.org/purl/yelp/business_categories#ceremonial_clothing


In [14]:
categories['s.value'] = categories['s.value'].str.replace('https://purl.archive.org/purl/yelp/business_categories#', '')

  categories['s.value'] = categories['s.value'].str.replace('https://purl.archive.org/purl/yelp/business_categories#', '')


In [15]:
categories

Unnamed: 0,s.value
0,taxi
1,acai_bowl
2,accessory
3,accountant
4,acne_treatment
...,...
1299,natural_gas_supplier
1300,serbo_croatian
1301,bubble_soccer
1302,ceremonial_clothing


In [16]:
# APPENDS to the .ttl file
with open(file="yelp_categories.ttl", mode="a") as file:
    file.write("@prefix yelpont: <https://purl.archive.org/purl/yelp/ontology#> \n@prefix yelpcat: <https://purl.archive.org/purl/yelp/business_categories#> \n@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> \n")
    for category in categories['s.value']:
        file.write(f'\nyelpcat:{category} a yelpont:YelpCategory ; \n    rdfs:label "{category} is a category in Yelp used to describe a business." . \n')

### Template for Yelp ontology file

In [17]:
query = """
SELECT DISTINCT ?p
WHERE {
    ?s ?p ?o .
    FILTER regex(?p, "^https://purl")
}
"""

predicates = run_query(query, as_dataframe=True)

In [20]:
predicates

Unnamed: 0,p.value
0,https://purl.archive.org/purl/yelp/yelp_ontology#hasMusic
1,https://purl.archive.org/purl/yelp/yelp_ontology#AcceptsInsurance
2,https://purl.archive.org/purl/yelp/yelp_ontology#Alcohol
3,https://purl.archive.org/purl/yelp/yelp_ontology#BYOB
4,https://purl.archive.org/purl/yelp/yelp_ontology#BYOBCorkage
5,https://purl.archive.org/purl/yelp/yelp_ontology#BikeParking
6,https://purl.archive.org/purl/yelp/yelp_ontology#BusinessAcceptsBitcoin
7,https://purl.archive.org/purl/yelp/yelp_ontology#BusinessAcceptsCreditCards
8,https://purl.archive.org/purl/yelp/yelp_ontology#ByAppointmentOnly
9,https://purl.archive.org/purl/yelp/yelp_ontology#Caters


In [None]:
# APPENDS to the .ttl file
with open(file="yelp_ontology.ttl", mode="a") as file:
    for predicate in predicates['p.value']:
        file.write(f'\n <{predicate}> \n a owl:ToDO ; \n rdfs:comment "Specifies something ..."@en ; \n rdfs:domain yelpont:X ; \n rdfs:label "{predicate}"@en ; \n rdfs:range yelpont:X . \n')

### Number of predicates in Yelp ontology

In [36]:
query = """
SELECT DISTINCT(?p)
FROM <http://www.yelpkg.com/yelp_kg>
WHERE {
    ?s ?p ?o .
    FILTER regex(?p, "^https://purl.archive.org/purl/yelp/yelp_ontology")
}
"""

pred = run_query(query, as_dataframe=True)

In [45]:
set(predicates['p.value']) - set(pred['p.value'])

{'https://purl.archive.org/purl/yelp/ontology#average_stars',
 'https://purl.archive.org/purl/yelp/ontology#compliment_cool',
 'https://purl.archive.org/purl/yelp/ontology#compliment_count',
 'https://purl.archive.org/purl/yelp/ontology#compliment_cute',
 'https://purl.archive.org/purl/yelp/ontology#compliment_funny',
 'https://purl.archive.org/purl/yelp/ontology#compliment_hot',
 'https://purl.archive.org/purl/yelp/ontology#compliment_list',
 'https://purl.archive.org/purl/yelp/ontology#compliment_more',
 'https://purl.archive.org/purl/yelp/ontology#compliment_note',
 'https://purl.archive.org/purl/yelp/ontology#compliment_photos',
 'https://purl.archive.org/purl/yelp/ontology#compliment_plain',
 'https://purl.archive.org/purl/yelp/ontology#compliment_profile',
 'https://purl.archive.org/purl/yelp/ontology#compliment_writer',
 'https://purl.archive.org/purl/yelp/ontology#cool',
 'https://purl.archive.org/purl/yelp/ontology#elite',
 'https://purl.archive.org/purl/yelp/ontology#fans',
 

In [47]:
query = """
SELECT COUNT(DISTINCT(?o))
FROM <http://www.yelpkg.com/yelp_kg>
WHERE {
    {?s schema:checkinTime ?o .}
    union
    {?s schema:knows ?o .}
    union
    {?s schema:category ?o .}
}
"""

run_query(query, as_dataframe=True)

Unnamed: 0,callret-0.value
0,32225386
