In [2]:
import pandas as pd
from rdflib import Graph, URIRef, BNode, Literal, Namespace, plugins
from rdflib.namespace import RDF, RDFS, OWL, XSD
from rdflib.serializer import Serializer
import rdflib
import owlrl
from lookup import DBpediaLookup
from isub import isub



# Task RDF

## Prepare ontology

### Load ontology

In [237]:
graph = Graph()
graph.parse("pizza-restaurants-ontology.ttl", format="ttl")
print("The graph contains " + str(len(graph)) + " triples.")

The graph contains 963 triples.


### Prepare ontology settings

In [175]:
cw_url = "http://www.semanticweb.org/city/in3067-inm713/2023/restaurants#"
cw_namespace = Namespace(cw_url)
graph.bind("cw", cw_namespace)

dbp_url = "http://dbpedia.org/resource/"
dbp_namespace = Namespace(dbp_url)
graph.bind("dbp", dbp_namespace)

graph.bind("rdf", RDF)
graph.bind("rdfs", RDFS)

## Prepare data

In [176]:
main_df = pd.read_csv("IN3067-INM713_coursework_data_pizza_500.csv")

In [177]:
main_df.shape

(501, 11)

After reading the data, we have a look how it looks like, what columns it contains.

In [178]:
main_df

Unnamed: 0,name,address,city,country,postcode,state,categories,menu item,item value,currency,item description
0,Little Pizza Paradise,Cascade Village Mall Across From Target,Bend,US,97701.0,OR,Pizza Place,Bianca Pizza,22.50,USD,
1,Little Pizza Paradise,Cascade Village Mall Across From Target,Bend,US,97701.0,OR,Pizza Place,Cheese Pizza,18.95,USD,
2,The Brentwood,148 S Barrington Ave,Los Angeles,US,90049.0,Brentwood,"American Restaurant,Bar,Bakery","Pizza, Margherita",12.00,USD,
3,The Brentwood,148 S Barrington Ave,Los Angeles,US,90049.0,Brentwood,"American Restaurant,Bar,Bakery","Pizza, Mushroom",13.00,USD,
4,The Brentwood,148 S Barrington Ave,Los Angeles,US,90049.0,Brentwood,"American Restaurant,Bar,Bakery","Pizza, Puttenesca",13.00,USD,"Olives, onions, capers, tomatoes"
...,...,...,...,...,...,...,...,...,...,...,...
496,Brando's Pizza,1042 Union Rd,Buffalo,US,14224.0,NY,Pizza Place,White Pizza Sub,7.49,USD,Garlic roll baked with mozzarella onions and t...
497,Woolworth Tower Kitchen,233 Broadway,New York,US,10279.0,Nyc,"Restaurant,New York City",Grilled Pizza,10.00,USD,
498,Ralph's Grocery Deli,2035 4th Ave,Seattle,US,98121.0,WA,"Food Products,Grocery Stores,Supermarkets and ...",Pizza Special,7.29,USD,Whole
499,John's of 12th Street,302 E 12th St,New York,US,10003.0,Manhattan,"Restaurant,Italian Restaurant,Italian Restaura...",Marinara Pizza,10.95,USD,"Tomato, Basil, Garlic"


Then we check for missing values.

In [179]:
print(main_df.isna().sum())

name                  0
address               0
city                  0
country               0
postcode             10
state                 0
categories            0
menu item             0
item value           78
currency             75
item description    325
dtype: int64


There are 10 rows without a postcode, 325 without item description and 78 rows without an item value, something we cannot deduce from other fields.

#### **Subtask RDF.0**:
Before you start the transformation, extend cw_data with a few new entries (e.g., 2 new restaurants selling 2 new pizzas each, with some defined in-gredients). This task is mandatory, but it does not give points.

In [180]:
cw_data_extension_data = [
    ["Alex' German Pizzeria", "21 Little Germany Plaza", "London", "US", "95353", "OR", "Pizza Place",
     "Meat Lover Pizza", 12, "USD", "Cheese, Sausage"],
    ["Nigerian Fusion Pizzeria", "22 Firewood Square", "London", "US", "95353", "OR", "Pizza,Restaurants",
     "Marinara Pizza", 10, "USD", "Tomato, Basil, Garlic"]]

cw_data_extension = pd.DataFrame(data=cw_data_extension_data, columns=main_df.columns, index=[501, 502])
main_df = pd.concat([main_df, cw_data_extension], axis=0)

In [181]:
main_df.shape

(503, 11)

#### **Subtask RDF.1**:
URI generation (20%). Use a good naming convention for the URIs of the created individuals. Tip: Different restaurants in different cities may share the same name. Similarly for Pizzas, a pizza bianca is served in both “Ciao Bella” and “Little Pizza Paradise” restaurant

### Preprocessing

#### Text processing

After running into issues with non-ASCII characters, we decided to remove them for guaranteed compatibility.

In [182]:
# convert all columns to string to avoid issues
cols_to_convert = [col for col in main_df.columns if col != "item value"]
main_df[cols_to_convert] = main_df[cols_to_convert].astype(str)

We discovered the numbers in the postcode column contains decimals for some reason, so we remove it using a regular expression to make sure there is no information loss.

In [183]:
main_df["postcode"]

0      97701.0
1      97701.0
2      90049.0
3      90049.0
4      90049.0
        ...   
498    98121.0
499    10003.0
500    10003.0
501      95353
502      95353
Name: postcode, Length: 503, dtype: object

In [184]:
main_df["postcode"] = main_df["postcode"].replace(r'\.0$', '', regex=True)

In [185]:
def get_full_df(included_df: pd.DataFrame, excluded_df: pd.DataFrame):
    return pd.concat([included_df, excluded_df], axis=1)


def remove_non_ascii(s):
    return s.encode('ascii', 'ignore').decode('ascii').lower()


excluded_cols = ["item value", "postcode", "currency"]
excluded_df = main_df[excluded_cols]
included_cols = [col for col in main_df.columns if col not in excluded_cols]
main_df = get_full_df(main_df[included_cols].applymap(remove_non_ascii), excluded_df)
main_df["postcode"] = main_df["postcode"]

# making currency guaranteed uppercase
main_df["currency"] = main_df["currency"].apply(lambda row: row.upper())

# remove commas from menu item name
main_df["menu item"] = main_df["menu item"].str.replace(",", "")
main_df.head()

Unnamed: 0,name,address,city,country,state,categories,menu item,item description,item value,postcode,currency
0,little pizza paradise,cascade village mall across from target,bend,us,or,pizza place,bianca pizza,,22.5,97701,USD
1,little pizza paradise,cascade village mall across from target,bend,us,or,pizza place,cheese pizza,,18.95,97701,USD
2,the brentwood,148 s barrington ave,los angeles,us,brentwood,"american restaurant,bar,bakery",pizza margherita,,12.0,90049,USD
3,the brentwood,148 s barrington ave,los angeles,us,brentwood,"american restaurant,bar,bakery",pizza mushroom,,13.0,90049,USD
4,the brentwood,148 s barrington ave,los angeles,us,brentwood,"american restaurant,bar,bakery",pizza puttenesca,"olives, onions, capers, tomatoes",13.0,90049,USD


### Fix spelling

In [186]:
def look_up_spelling(column: pd.Series, spelling: str):
    for name in column.unique():
        if spelling in name:
            print(name)


# find out mistakes in spelling "margherita"
look_up_spelling(main_df["menu item"], "marg")

main_df["menu item"] = main_df["menu item"].str.replace("margarita", "margherita")

look_up_spelling(main_df["item description"], "bbq")
main_df["item description"] = main_df["item description"].str.replace("bbq", "barbecue")
main_df["menu item"] = main_df["menu item"].str.replace("bbq", "barbecue")
main_df["country"] = main_df["country"].str.replace("us", "united_states")

pizza margherita
margarita pizza
margherita pizza
margarita flat bread pizza
classic margherita artisan crust pizza
margherita pizza (on honey wheat dough)
thin crust margarita pizza
vegan margherita pizza
bbq sauce, mozzarella cheese, diced chicken breast, onions
bbq sauce, chicken, red onions
sweet bbq sauce topped with bbq chicken breast, bacon, red onions and mozzarella and gouda cheeses. garnished with cilantro and crispy onions.
bbq sauce, grilled chicken breast, cheddar and mozzarella.
created here in 1985. our legendary bbq sauce, smoked gouda, red onions and fresh cilantro transform this original to iconic
your choice of bbq chicken or sante fe
our legendary bbq sauce, smoked gouda, red onions and fresh cilantro transform this original to iconic.


In the next step, we take the lower case of all strings to enable upcoming URI format creation.

In [187]:
import string

# getting a dict with punctuation mapping
punctuation_dict = dict.fromkeys(string.punctuation)

# building custom dict
punctuation_dict["-"] = "_"
punctuation_dict[" "] = "_"
punctuation_dict[","] = ","
# translator initiation
translator = str.maketrans(punctuation_dict)
main_df["item name"] = main_df["menu item"]
main_df["restaurant name"] = main_df["name"]
included_cols = ["name", "city", "address", "state", "categories", "menu item"]
excluded_cols = [col for col in main_df.columns if col not in included_cols]

excluded_df = main_df[excluded_cols]
main_df = main_df[included_cols]
main_df = main_df.applymap(lambda x: x.translate(translator))
# removes multiple underscores
main_df["menu item"] = main_df["menu item"].apply(lambda x: x.replace('_', ' ').replace('  ', '_').replace(' ', '_'))
main_df = get_full_df(main_df, excluded_df)
main_df.head()

Unnamed: 0,name,city,address,state,categories,menu item,country,item description,item value,postcode,currency,item name,restaurant name
0,little_pizza_paradise,bend,cascade_village_mall_across_from_target,or,pizza_place,bianca_pizza,united_states,,22.5,97701,USD,bianca pizza,little pizza paradise
1,little_pizza_paradise,bend,cascade_village_mall_across_from_target,or,pizza_place,cheese_pizza,united_states,,18.95,97701,USD,cheese pizza,little pizza paradise
2,the_brentwood,los_angeles,148_s_barrington_ave,brentwood,"american_restaurant,bar,bakery",pizza_margherita,united_states,,12.0,90049,USD,pizza margherita,the brentwood
3,the_brentwood,los_angeles,148_s_barrington_ave,brentwood,"american_restaurant,bar,bakery",pizza_mushroom,united_states,,13.0,90049,USD,pizza mushroom,the brentwood
4,the_brentwood,los_angeles,148_s_barrington_ave,brentwood,"american_restaurant,bar,bakery",pizza_puttenesca,united_states,"olives, onions, capers, tomatoes",13.0,90049,USD,pizza puttenesca,the brentwood


In [188]:
# concatenating venue name and city to avoid ambiguous entries
main_df["uri_restaurant"] = main_df["name"] + "_" + main_df["city"]
# concatenating item name, venue name and city to avoid ambiguous entries
main_df["uri_menu_item"] = main_df["menu item"] + "_at_" + main_df["uri_restaurant"]
#
main_df["uri_address"] = "address_" + main_df["address"] + "_" + main_df["city"]

# concatenating price and currency to create unique names
main_df["uri_item_value"] = ""
for index, row in main_df[["item value", "currency"]].iterrows():
    item_value = str(row["item value"])
    currency = row["currency"]
    if item_value == "nan" or currency == "NAN":
        main_df.loc[index, "uri_item_value"] = ""
    else:
        main_df.loc[index, "uri_item_value"] = item_value + currency

In [189]:
look_up_spelling(main_df["item description"], "bbq")

## Preparing the food dataset

Things that need to be done for **pizza**:
- pizza
 - by name
 - by ingredient
 - by style


### Fix spelling

In [190]:
pizza_names = {"margherita": "MargheritaPizza", "hawaii": "HawaiianPizza", "supreme": "PizzaSupreme",
               "maui": "HawaiianPizza", "ernesto": "PizzaErnesto", "napolitana": "PizzaNapolitana",
               "marinara": "PizzaMarinara", "romana": "PizzaRomana", "californian": "CalifornianPizza",
               "fruit": "FruitPizza", "vegetarian": "VegetarianPizza", "vegan": "VeganPizza",
               "mushroom": "MushroomPizza"}

bianca_pizza_items = ["bianca", "blanca", "white"]
for item in bianca_pizza_items:
    pizza_names[item] = "PizzaBianca"

sweet_pizza_items = ["nutella", "dessert", "chocolate"]
for item in sweet_pizza_items:
    pizza_names[item] = "SweetPizza"

greek_pizza_items = ["greek", "gyro", "pita"]
for item in greek_pizza_items:
    pizza_names[item] = "GreekPizza"

american_pizza_items = ["american", "new york style", "chicago", "buffalo", "ranch", "deep dish"]
for item in american_pizza_items:
    pizza_names[item] = "AmericanPizza"

mexican_pizza_items = ["taco", "carne", "mexican"]
for item in mexican_pizza_items:
    pizza_names[item] = "MexicanPizza"

meat_pizza_items = ["pork", "meatball", "meat lover", "steak", "gyro", "meat eater", "meat pizza", "chicken",
                    "pepperoni", "carne"]
for item in meat_pizza_items:
    pizza_names[item] = "MeatPizza"

not_pizza_items = ["bagel", "burger", "bread", "bruschetta", "fries", "dough", "logs", "pretzel", "roll", "sub"]
for item in not_pizza_items:
    pizza_names[item] = "MenuItem"

#### **Subtask RDF.2** + **Subtask RDF.4**:
RDF generation (60%). This task will be evaluated by the execution of a number of (blind) queries, which are expected to return results (not necessarily complete), over the generated data.4 For example, (i) list American restaurants or (ii) list vegan pizzas.

Reuse URIs from state-of-the art knowledge graphs (10%). For the cells in the columns city, country and state; instead of creating new URIs (e.g., new individuals) for the information in the table cells, reuse an entity URI from DBPedia, Wikidata or Google’s Knowledge Graph (e.g., dbr:Chicago instead of cw:chicago).

In [191]:
def add_ingredients_to_graph(ingredients):
    def _get_ingredient_uri_refs(ingredients):
        ingredients_uri_refs = [URIRef(cw_namespace + ingredient.translate(translator)) for ingredient in
                                ingredients]
        print(f"Created {len(ingredients_uri_refs)} triples.")
        return ingredients_uri_refs

    ingredients_uri_refs = _get_ingredient_uri_refs(ingredients)
    previous_graph_length = len(graph)
    for uri_ref in ingredients_uri_refs:
        class_name = uri_ref.title().split("#")[1].replace("_", "")
        class_uri_ref = URIRef(cw_namespace + class_name)
        graph.add((uri_ref, RDF.type, class_uri_ref))

    print(f"Added {len(graph) - previous_graph_length} triples.")

In [192]:
ingredients = [
    "mortadella", "prosciutto", "ham", "chicken", "beef", "meatballs", "chorizo", "salami", "bacon",
    "sausage", "pepperoni", "salmon", "shrimp", "crab meat", "scallops", "tuna", "anchovies",
    "mushroom", "zucchini", "root vegetable", "carrot", "potato", "sweet potato", "artichokes", "pesto",
    "vegan cheese", "tofu", "olive oil", "vegetable", "tomato", "plum tomato", "cherry tomato", "onion",
    "pepper", "green pepper", "red pepper", "yellow pepper", "jalapeno pepper", "winter squash", "pumpkin",
    "hot sauce", "barbecue sauce", "tomato sauce", "marinara", "parmesan",
    "provolone", "cheddar", "gorgonzola", "goat cheese", "ricotta", "mozzarella", "blue cheese", "feta",
    "butternut squash", "fruit", "pineapple", "fig", "broccoli", "eggplant", "garlic", "herbs", "oregano",
    "rosemary", "basil", "capers", "spinach", "seeds", "beans", "olives", "black olives", "green olives",
]

add_ingredients_to_graph(ingredients)

Created 71 triples.
Added 71 triples.


In [193]:
ingredients_dict = {
    "mortadella": [], "prosciutto": [], "ham": [], "chicken": ["ChickenPizza"], "beef": [], "meatballs": [],
    "chorizo": [], "salami": [], "bacon": [], "sausage": [], "pepperoni": [], "salmon": [], "shrimp": [],
    "crab meat": [], "scallops": [], "tuna": [], "anchovies": [], "mushroom": ["MushroomPizza"],
    "zucchini": [], "root vegetable": [], "carrot": [], "potato": [], "sweet potato": [], "artichokes": [], "pesto": [],
    "vegan cheese": [], "tofu": [], "olive oil": [], "vegetable": [], "tomato": [], "plum tomato": [],
    "cherry tomato": [], "onion": [], "pepper": [], "green pepper": [],
    "red pepper": [], "yellow pepper": [], "jalapeno pepper": [], "winter squash": [],
    "pumpkin": [], "hot sauce": [], "barbecue sauce": ["BarbecuePizza"],
    "tomato sauce": [], "marinara": [], "parmesan": [], "provolone": [], "cheddar": [], "gorgonzola": [],
    "goat cheese": [], "ricotta": [], "mozzarella": [], "blue cheese": [], "feta": ["FetaPizza", "GreekPizza"],
    "butternut squash": [], "fruit": [], "pineapple": ["PineapplePizza"],
}

seafood_ingredients = ["salmon", "shrimp", "scallops", "tuna", "anchovies", "crab meat"]
fruit_ingredients = ["fig", "pineapple"]
meat_ingredients = ["mortadella", "prosciutto", "ham", "chicken", "beef", "meatballs", "chorizo", "salami", "bacon",
                    "sausage", "pepperoni"]


def append_pizza_type_to_dict(ingredients, type: str):
    valid_types = ["seafood", "meat", "fruit"]
    if type not in valid_types:
        raise ValueError(f"Please provide one of the following parameters for type: {valid_types}")

    class_name = ""
    if type == "seafood":
        class_name = "SeafoodPizza"
    elif type == "fruit":
        class_name = "FruitPizza"
    elif type == "meat":
        class_name = "MeatPizza"

    for ingredient in ingredients:
        if ingredient in ingredients_dict.keys():
            ingredients_dict[ingredient].append(class_name)
            print(f"Appended {class_name} for {ingredient}")


append_pizza_type_to_dict(seafood_ingredients, "seafood")
append_pizza_type_to_dict(fruit_ingredients, "fruit")
append_pizza_type_to_dict(meat_ingredients, "meat")


Appended SeafoodPizza for salmon
Appended SeafoodPizza for shrimp
Appended SeafoodPizza for scallops
Appended SeafoodPizza for tuna
Appended SeafoodPizza for anchovies
Appended SeafoodPizza for crab meat
Appended FruitPizza for pineapple
Appended MeatPizza for mortadella
Appended MeatPizza for prosciutto
Appended MeatPizza for ham
Appended MeatPizza for chicken
Appended MeatPizza for beef
Appended MeatPizza for meatballs
Appended MeatPizza for chorizo
Appended MeatPizza for salami
Appended MeatPizza for bacon
Appended MeatPizza for sausage
Appended MeatPizza for pepperoni


#### Adding currency to the graph

In [194]:
currencies = main_df["currency"].unique()

currency_class = URIRef(cw_namespace + "Currency")
previous_graph_length = len(graph)
for currency_name in currencies:
    if currency_name == "NAN":
        continue

    currency_instance = URIRef(cw_namespace + currency_name)
    graph.add((currency_instance, RDF.type, currency_class))

print(f"Added {len(graph) - previous_graph_length} triples.")

Added 1 triples.


In [195]:
restaurant_types = ["american_restaurant", "mexican_restaurant", "asian_restaurant", "chinese_restaurant",
                    "indian_restaurant", "japanese_restaurant", "sushi_restaurant", "bakery", "bar_and_grill", "bar",
                    "cocktail_bar", "karaoke_bar", "sports_bar", "beer_place", "club", "pub", "gastro_pub",
                    "burger_place", "coffee_shop", "dietary_restaurant", "gluten_free_restaurant", "vegan_restaurant",
                    "vegetarian_restaurant", "gourmet_restaurants", "mediterranean_restaurant", "french_restaurant",
                    "greek_restaurant", "italian_restaurant", "spanish_restaurant", "pizza_place", "pizzeria",
                    "seafood_restaurant"]

In [196]:
import re


def string_to_upper_camel_case(string):
    return re.sub(r"(^|_)([a-z])", lambda m: m.group(2).upper(), string)

In [197]:
def get_ask_query_result(triple):
    return [row for row in graph.query("""ASK {""" + triple + """ }""")][0]


def has_state_already(instance):
    triple = "cw:" + instance + " cw:locatedInState " + "?state"
    cw_res = get_ask_query_result(triple)
    triple = "dbp:" + instance.title() + " cw:locatedInState " + "?state"
    dbp_res = get_ask_query_result(triple)

    return cw_res or dbp_res


def is_city_already(instance):
    triple = "cw:" + instance + " rdf:type " + "cw:City"
    cw_res = get_ask_query_result(triple)
    triple = "dbp:" + instance.title() + " rdf:type " + "cw:City"
    dbp_res = get_ask_query_result(triple)

    return cw_res or dbp_res


# def get_instance_information(instance, relationship, object):


def is_state_already(instance):
    triple = "cw:" + instance + " rdf:type " + "cw:State"
    cw_res = get_ask_query_result(triple)
    triple = "dbp:" + instance.title() + " rdf:type " + "cw:State"
    dbp_res = get_ask_query_result(triple)

    return cw_res or dbp_res

In [198]:
is_state_already("chicago")

False

In [199]:
def check_misclassification(instance, instance_class):
    triple = "cw:" + instance + " rdf:type " + "cw:" + instance_class
    cw_res = get_ask_query_result(triple)
    triple = "dbp:" + instance.title() + " rdf:type " + "cw:" + instance_class
    dbp_res = get_ask_query_result(triple)

    return cw_res or dbp_res


In [200]:
# adapted code from lab
def getExternalKGURI(name, place_type):
    def _update_current_entity(name, entity, current_uri, current_sim):
        isub_score = isub(name, entity.label)
        if current_sim < isub_score:
            current_uri = entity.ident
            current_sim = isub_score
        return current_uri, current_sim

    valid_place_types = ["city", "state", "country"]
    if place_type not in valid_place_types:
        raise ValueError(f"Please provide one of the following parameters: {valid_place_types}")

    dbpedia = DBpediaLookup()

    # identifying the entities that are similar lexically
    entities = dbpedia.getKGEntities(name, 5)
    # if there are no results
    if not entities:
        outcome = ''
        return outcome

    else:
        current_sim = -1
        current_uri = ''
        for entity in entities:

            types = entity.types  #  isolating the set of "types" each resource belongs to

            if place_type == "city":

                if 'http://dbpedia.org/ontology/City' in types:
                    current_uri, current_sim = _update_current_entity(name, entity, current_uri, current_sim)

            elif place_type == "state":
                if 'http://dbpedia.org/ontology/AdministrativeRegion' in types:
                    current_uri, current_sim = _update_current_entity(name, entity, current_uri, current_sim)

            elif place_type == "country":
                if 'http://dbpedia.org/ontology/Country' in types:
                    current_uri, current_sim = _update_current_entity(name, entity, current_uri, current_sim)

        return current_uri


In [201]:
# creating necessary classes
pizza_item_class = URIRef(cw_namespace + "Pizza")
item_value_class = URIRef(cw_namespace + "ItemValue")
address_class = URIRef(cw_namespace + "Address")
city_class = URIRef(cw_namespace + "City")
country_class = URIRef(cw_namespace + "Country")
state_class = URIRef(cw_namespace + "State")
restaurant_class = URIRef(cw_namespace + "Restaurant")
named_pizza_class = URIRef(cw_namespace + "NamedPizza")

# creating object properties
has_ingredient_property = URIRef(cw_namespace + "hasIngredient")
is_ingredient_of_object_property = URIRef(cw_namespace + "isIngredientOf")
amount_currency_object_property = URIRef(cw_namespace + "amountCurrency")
has_value_object_property = URIRef(cw_namespace + "hasValue")
serves_menu_item_object_property = URIRef(cw_namespace + "servesMenuItem")
serves_object_property = URIRef(cw_namespace + "serves")
served_in_object_property = URIRef(cw_namespace + "servedIn")
served_in_restaurant_object_property = URIRef(cw_namespace + "servedInRestaurant")
located_in_address_object_property = URIRef(cw_namespace + "locatedInAddress")
located_in_city_object_property = URIRef(cw_namespace + "locatedInCity")
located_in_state_object_property = URIRef(cw_namespace + "locatedInState")
located_in_country_object_property = URIRef(cw_namespace + "locatedInCountry")

# data properties
amount_data_property = URIRef(cw_namespace + "amount")
first_line_address_data_property = URIRef(cw_namespace + "firstLineAddress")
name_data_property = URIRef(cw_namespace + "name")
item_name_data_property = URIRef(cw_namespace + "itemName")
restaurant_name_data_property = URIRef(cw_namespace + "restaurantName")
postcode_data_property = URIRef(cw_namespace + "postCode")

INVALID_KG_RESPONSE = ''
previos_city_name = ""
previous_state_name = ""
previous_country_name = ""

previous_graph_length = len(graph)
for index, row in main_df.iterrows():

    # create menu item
    menu_item = URIRef(cw_namespace + row["uri_menu_item"])

    # add menu item type
    found_pizza_type = False

    for named_pizza_type in pizza_names:
        for name in pizza_names.keys():
            if name in row["item name"]:
                found_pizza_type = True
                pizza_type_class = URIRef(cw_namespace + pizza_names[name])
                graph.add((menu_item, RDF.type, pizza_type_class))

    # add menu item name
    graph.add((menu_item, item_name_data_property, Literal(row["item name"], datatype=XSD.string)))

    # add item value
    if not (row["uri_item_value"] == ""):
        value_instance = URIRef(cw_namespace + row["uri_item_value"])
        graph.add((value_instance, RDF.type, item_value_class))
        # add currency information to item value
        value_currency = URIRef(cw_namespace + row["currency"])
        graph.add((value_instance, amount_currency_object_property, value_currency))
        # add amount to item value
        graph.add((value_instance, amount_data_property, Literal(row["item value"], datatype=XSD.double)))
        # add item value to menu item
        graph.add((menu_item, has_value_object_property, value_instance))

    # add ingredients to item
    for ingredient in ingredients_dict.keys():
        if ingredient in row["item name"] or ingredient in row["item description"] and row["item description"] != "nan":
            # add appropriate type to pizza
            # if array contains something
            if ingredients_dict[ingredient]:
                found_pizza_type = True
                for name in ingredients_dict[ingredient]:
                    pizza_type_class = URIRef(cw_namespace + name)
                    graph.add((menu_item, RDF.type, pizza_type_class))

                # format name to create valid uri
            ingredient = ingredient.translate(translator)
            ingredient_instance = URIRef(cw_namespace + ingredient)
            graph.add((menu_item, has_ingredient_property, ingredient_instance))

    if not found_pizza_type:
        graph.add((menu_item, RDF.type, pizza_item_class))
    # create restaurant
    restaurant_instance = URIRef(cw_namespace + row["uri_restaurant"])
    # add restaurant name
    graph.add(
        (restaurant_instance, restaurant_name_data_property, Literal(row["restaurant name"], datatype=XSD.string)))
    # add address information
    restaurant_address = URIRef(row["uri_address"])
    graph.add((restaurant_address, RDF.type, address_class))
    # add address first line
    graph.add((restaurant_address, first_line_address_data_property, Literal(row["address"], datatype=XSD.string)))
    #add address postcode
    if row["postcode"] != "nan":
        graph.add((restaurant_address, postcode_data_property, Literal(row["postcode"], datatype=XSD.string)))

    restaurant_city_name = row['city']
    restaurant_state_name = row['state']
    restaurant_country_name = row['country']
    # create locations
    try:
        if restaurant_city_name != previos_city_name:
            city_lookup_name = restaurant_city_name.title()
            city_dbp_uri = getExternalKGURI(city_lookup_name, place_type="city")
            if city_dbp_uri != INVALID_KG_RESPONSE:
                restaurant_city_instance = URIRef(city_dbp_uri)
            else:
                restaurant_city_instance = URIRef(cw_namespace + restaurant_city_name)
        previous_city_name = restaurant_city_name

        if restaurant_state_name != previous_state_name:
            state_lookup_name = restaurant_state_name.upper()
            state_dbp_uri = getExternalKGURI(restaurant_state_name, place_type="state")
            if state_dbp_uri != INVALID_KG_RESPONSE:
                restaurant_state_instance = URIRef(state_dbp_uri)
            else:
                restaurant_state_instance = URIRef(cw_namespace + restaurant_state_name)
        previous_state_name = restaurant_state_name

        if previous_country_name != restaurant_country_name:
            country_lookup_name = restaurant_country_name.upper()
            country_dbp_uri = getExternalKGURI(country_lookup_name, place_type="country")
            if country_dbp_uri != INVALID_KG_RESPONSE:
                restaurant_country_instance = URIRef(country_dbp_uri)
            else:
                restaurant_country_instance = URIRef(cw_namespace + restaurant_country_name)
        previous_country_name = restaurant_country_name


    except Exception as e:
        restaurant_city_instance = URIRef(cw_namespace + restaurant_city_name)
        restaurant_state_instance = URIRef(cw_namespace + restaurant_state_name)
        restaurant_country_instance = URIRef(cw_namespace + restaurant_country_name)
        print(e)

    if not is_state_already(restaurant_city_name) and not is_city_already(restaurant_city_name):
        graph.add((restaurant_city_instance, RDF.type, city_class))
        graph.add((restaurant_instance, located_in_city_object_property, restaurant_city_instance))
        graph.add((restaurant_address, located_in_city_object_property, restaurant_city_instance))
    if not is_city_already(restaurant_state_name) and not is_state_already(restaurant_state_name):
        graph.add((restaurant_state_instance, RDF.type, state_class))
        graph.add((restaurant_state_instance, located_in_country_object_property, restaurant_country_instance))

    graph.add((restaurant_city_instance, located_in_state_object_property, restaurant_state_instance))
    graph.add((restaurant_country_instance, RDF.type, country_class))
    graph.add((restaurant_instance, located_in_address_object_property, restaurant_address))

    # add categories to restaurant
    found_restaurant_type = False
    for restaurant_type in restaurant_types:
        if restaurant_type in row["categories"]:
            found_restaurant_type = True
            restaurant_type = string_to_upper_camel_case(restaurant_type)
            restaurant_type_class = URIRef(cw_namespace + restaurant_type)
            graph.add((restaurant_instance, RDF.type, restaurant_type_class))

    # add restaurant type
    if not found_restaurant_type:
        graph.add((restaurant_instance, RDF.type, restaurant_class))

    graph.add((restaurant_instance, serves_menu_item_object_property, menu_item))
    graph.add((menu_item, served_in_restaurant_object_property, restaurant_instance))

print(f"Added {len(graph) - previous_graph_length} triples.")

Added 5135 triples.


idea 1: add city not in type
idea 2: add dbo:AdministrativeRegion

In [202]:
FILENAME = "swtkg_final_cw"
graph.serialize(destination=FILENAME + ".ttl", format="ttl")

In [203]:
print("The graph contains {} triples.".format(len(graph)))


The graph contains 6170 triples.


#### Subtask RDF.3:
Perform reasoning with cw_onto and the generated RDF data. Save the extended graph in turtle format (.ttl)(10%).
Tip: When reasoning with data, using OWL 2 reasoning is expensive. Using an approximate reasoner is typically more suitable (e.g., for OWL 2 RL)

In [204]:
# function taken from lab 6 solution
def check_entailment(graph, triple):
    qres = graph.query(
        """ASK {""" + triple + """ }""")

    #Single row with one boolean vale
    for row in qres:
        print("Does '" + triple + "' hold? " + str(row))


graph_extended = Graph()
graph_extended.parse(FILENAME + ".ttl", format="ttl")

print("Loaded '" + str(len(graph_extended)) + "' triples.")

owlrl.DeductiveClosure(owlrl.OWLRL_Semantics, axiomatic_triples=True, datatype_axioms=False).expand(graph_extended)

print("Rules after inference: '" + str(len(graph_extended)) + "' triples.")

print("Saving extended graph")
graph_extended.serialize(destination=FILENAME + '-extended.ttl', format='ttl')

Loaded '6170' triples.
Rules after inference: '35523' triples.
Saving extended graph


In [205]:
triple1 = "cw:margherita_flat_bread_pizza_at_art_cafe_bethlehem rdf:type cw:MenuItem ."
triple2 = "cw:pizza_marinara_at_euro_pizzeria_white_plains rdf:type cw:PizzaMarinara ."
triple3 = "cw:buffalo_chicken_pizza_at_franks_pizzeria_omaha cw:servedInRestaurant cw:franks_pizzeria_omaha ."

check_entailment(graph_extended, triple1)
check_entailment(graph_extended, triple2)
check_entailment(graph_extended, triple3)

Does 'cw:margherita_flat_bread_pizza_at_art_cafe_bethlehem rdf:type cw:MenuItem .' hold? True
Does 'cw:pizza_marinara_at_euro_pizzeria_white_plains rdf:type cw:PizzaMarinara .' hold? True
Does 'cw:buffalo_chicken_pizza_at_franks_pizzeria_omaha cw:servedInRestaurant cw:franks_pizzeria_omaha .' hold? True


#### Subtask RDF.5 (Optional)
Exploit an external Knowledge Graph to perform disambiguation (e.g., same city name in multiple states) and solve errors in the data (e.g., wrong state names). (extra 15%)

# SparQL and Reasoning

## Subtask SPARQL.1

Create a query with at least a triple pattern and a FILTER.

In [206]:
def save_sparql_results(query, task):
    query_results = graph_extended.query(query)
    [print(row) for row in query_results]
    results = pd.DataFrame(query_results)
    print(f"Sample results:\n {results.head()}")
    print(f"Result count: {results.shape[0]}")
    results.to_csv("results_" + task + ".csv", index=False)


query_1 = """PREFIX cw: <http://www.semanticweb.org/city/in3067-inm713/2023/restaurants#>
    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

    SELECT DISTINCT ?menuitem
    WHERE
    {
    ?menuitem rdf:type cw:MenuItem .
    FILTER EXISTS {?menuitem cw:hasValue ?value}
    }

    """

save_sparql_results(query_1, "1")

(rdflib.term.URIRef('http://www.semanticweb.org/city/in3067-inm713/2023/restaurants#mediterranean_pizza_at_original_giorgios_cockeysville'),)
(rdflib.term.URIRef('http://www.semanticweb.org/city/in3067-inm713/2023/restaurants#veggie_lovers_pizza_at_original_giorgios_cockeysville'),)
(rdflib.term.URIRef('http://www.semanticweb.org/city/in3067-inm713/2023/restaurants#veggie_pizza_at_hungry_howies_pizza_chandler'),)
(rdflib.term.URIRef('http://www.semanticweb.org/city/in3067-inm713/2023/restaurants#pizza_sub_at_blockbuster_gainesville'),)
(rdflib.term.URIRef('http://www.semanticweb.org/city/in3067-inm713/2023/restaurants#captain_jean_pizza_at_pirates_cove_mount_vernon'),)
(rdflib.term.URIRef('http://www.semanticweb.org/city/in3067-inm713/2023/restaurants#to_the_price_of_any_pizza_at_lake_bowl_inc_moses_lake'),)
(rdflib.term.URIRef('http://www.semanticweb.org/city/in3067-inm713/2023/restaurants#pizza_toppings_at_keyme_kent'),)
(rdflib.term.URIRef('http://www.semanticweb.org/city/in3067-inm

## Subtask SPARQL.2

Create a query that uses at least one triple pattern, a FILTER and AVG function.

In [207]:
# give the average price for cheap pizzas (<10$)
query_2 = """PREFIX cw: <http://www.semanticweb.org/city/in3067-inm713/2023/restaurants#>
    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

    SELECT (AVG(?amount) AS ?avg_pizza_price)
    WHERE
    {
    ?menuitem rdf:type cw:MenuItem .
    ?menuitem cw:hasValue ?value .
    ?value cw:amount ?amount
    FILTER (?amount < 10)
    }

    """

save_sparql_results(query_2, "2")

(rdflib.term.Literal('6.079672131147544', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#double')),)
Sample results:
                    0
0  6.079672131147544
Result count: 1


## Subtask SPARQL.3

Create a query that groups results, uses aggregates, and filters the results.

In [208]:
# give the average price for cheap pizzas (<10$) that are named, grouped by type
query_3 = """PREFIX cw: <http://www.semanticweb.org/city/in3067-inm713/2023/restaurants#>
    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

    SELECT ?type (AVG(?amount) AS ?avg_pizza_price)
    WHERE
    {
    ?menuitem rdf:type ?type .
    ?type rdfs:subClassOf cw:NamedPizza .
    ?menuitem cw:hasValue ?value .
    ?value cw:amount ?amount .
    FILTER (?amount < 10) .
    }
    GROUP BY ?type

    """

save_sparql_results(query_3, "3")

(rdflib.term.URIRef('http://www.semanticweb.org/city/in3067-inm713/2023/restaurants#MargheritaPizza'), rdflib.term.Literal('8.26', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#double')))
(rdflib.term.URIRef('http://www.semanticweb.org/city/in3067-inm713/2023/restaurants#PizzaSupreme'), rdflib.term.Literal('7.495', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#double')))
(rdflib.term.URIRef('http://www.semanticweb.org/city/in3067-inm713/2023/restaurants#HawaiianPizza'), rdflib.term.Literal('7.995', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#double')))
(rdflib.term.URIRef('http://www.semanticweb.org/city/in3067-inm713/2023/restaurants#NamedPizza'), rdflib.term.Literal('7.965714285714285', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#double')))
Sample results:
                                                    0                  1
0  http://www.semanticweb.org/city/in3067-inm713/...               8.26
1  http://www.sema

# Subtask SPARQL.4
Create a query (different from SPARQL.3) that group results, uses aggregates, filters the results and orders the results according to two variables.

In [209]:
# give avg price for cheap pizzas (<10$) grouped by restaurants, ordered by avg price and name
query_4 = """PREFIX cw: <http://www.semanticweb.org/city/in3067-inm713/2023/restaurants#>
    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

    SELECT ?name (AVG(?amount) AS ?avg_pizza_price)
    WHERE
    {
    ?menuitem rdf:type cw:Pizza .
    ?menuitem cw:hasValue ?value .
    ?value cw:amount ?amount .
    ?menuitem cw:servedInRestaurant ?restaurant .
    ?restaurant cw:restaurantName ?name .
    FILTER (?amount < 10) .
    }
    GROUP BY ?restaurant
    ORDER BY ?avg_pizza_price ?name

    """

save_sparql_results(query_4, "4")

(rdflib.term.Literal('keyme', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')), rdflib.term.Literal('1.75', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#double')))
(rdflib.term.Literal('rosa ii pizzeria & restaurant', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')), rdflib.term.Literal('1.75', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#double')))
(rdflib.term.Literal('taco pronto', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')), rdflib.term.Literal('1.99', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#double')))
(rdflib.term.Literal('ming garden cocktails chinese restaurant', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')), rdflib.term.Literal('2.5', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#double')))
(rdflib.term.Literal("abo's pizza", datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')), rdflib.term

## Subtask SPARQL.5
Create a query that uses the Union graph pattern and negation.

In [210]:
# return all named pizzas and pizzas by style that don't have a price
query_5 = """PREFIX cw: <http://www.semanticweb.org/city/in3067-inm713/2023/restaurants#>
    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

    SELECT ?name
    WHERE
    {
        {
            ?item rdf:type cw:NamedPizza .
            ?item cw:itemName ?name .
        }
        UNION
        {
            ?item rdf:type cw:PizzaByStyle .
            ?item cw:itemName ?name
        }
    FILTER NOT EXISTS {?item cw:hasValue ?value}
    }

    """

save_sparql_results(query_5, "5")

(rdflib.term.Literal('margherita pizza', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')),)
(rdflib.term.Literal('margherita pizza', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')),)
(rdflib.term.Literal('greek pizza', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')),)
(rdflib.term.Literal('philly cheese steak pizza', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')),)
(rdflib.term.Literal('chicago deep dish pizza', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')),)
(rdflib.term.Literal('chicken parmesan pizza', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')),)
(rdflib.term.Literal('barbecue chicken pizza', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')),)
(rdflib.term.Literal('taco pizza', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')),)
(rdflib.term.Literal('pork tenderloin pizzaiola', datatype=rdfl

# Ontology Alignment

In [246]:
import owlready2

def get_classes(onto):
    return onto.classes()


def get_data_properties(onto):
    return onto.data_properties()


def get_object_properties(onto):
    return onto.object_properties()


def get_ontology_classes(uri):
    onto = owlready2.get_ontology(uri).load()
    # number of classes
    print("Classes in {}: {}".format(str(uri), len(list(get_classes(onto)))))

    class_list = []
    for ontology_class in get_classes(onto):
        # class names
        print("\t" + ontology_class.name)
        class_list.append(ontology_class.name)

    return class_list


def get_ontology_data_property(uri):
    #Method from owlready
    onto = owlready2.get_ontology(uri).load()
    # number of data properties
    print("Data Properties in {}: {}".format(str(uri), len(list(get_data_properties(onto)))))

    data_properties_list = []
    for dp in get_data_properties(onto):
        print("\t" + dp.name)
        data_properties_list.append(dp.name)

    return data_properties_list


def get_ontology_object_property(uri):
    #Method from owlready
    onto = owlready2.get_ontology(uri).load()
    # number of object properties
    print("Object Properties in {}: {}".format(str(uri), len(list(get_object_properties(onto)))))

    object_properties_list = []
    for op in get_object_properties(onto):
        print("\t" + op.name)
        object_properties_list.append(op.name)

    return object_properties_list

In [247]:
from difflib import SequenceMatcher


def find_similar_elements(list_a, list_b, threshold):
    return list(set(a for a in list_a for b in list_b if is_similar(a, b, threshold)))


def is_similar(element_a, element_b, threshold):
    return SequenceMatcher(None, element_a, element_b).ratio() >= threshold

In [248]:
pizza_ontology = "pizza.owl"
cw_ontology = "pizza-restaurants-ontology.owl"

pizza_classes = get_ontology_classes(pizza_ontology)
cw_classes = get_ontology_classes(cw_ontology)

pizza_data_properties = get_ontology_data_property(pizza_ontology)
cw_data_properties = get_ontology_data_property(cw_ontology)

pizza_object_properties = get_ontology_object_property(pizza_ontology)
cw_object_properties = get_ontology_object_property(cw_ontology)

Classes in pizza.owl: 100
	Pizza
	PizzaBase
	Food
	Spiciness
	FoodTopping
	American
	NamedPizza
	MozzarellaTopping
	PeperoniSausageTopping
	TomatoTopping
	AmericanHot
	HotGreenPepperTopping
	JalapenoPepperTopping
	AnchoviesTopping
	FishTopping
	ArtichokeTopping
	VegetableTopping
	Mild
	AsparagusTopping
	Cajun
	OnionTopping
	PeperonataTopping
	PrawnsTopping
	TobascoPepperSauce
	CajunSpiceTopping
	HerbSpiceTopping
	Hot
	RosemaryTopping
	CaperTopping
	Capricciosa
	HamTopping
	OliveTopping
	Caprina
	GoatsCheeseTopping
	SundriedTomatoTopping
	CheeseTopping
	PizzaTopping
	CheeseyPizza
	CheeseyVegetableTopping
	ChickenTopping
	MeatTopping
	Country
	DomainConcept
	DeepPanBase
	ThinAndCrispyBase
	ValuePartition
	Fiorentina
	GarlicTopping
	ParmesanTopping
	SpinachTopping
	FourCheesesTopping
	FourSeasons
	MushroomTopping
	FruitTopping
	FruttiDiMare
	MixedSeafoodTopping
	Medium
	Giardiniera
	LeekTopping
	PetitPoisTopping
	SlicedTomatoTopping
	GorgonzolaTopping
	GreenPepperTopping
	PepperTopping
	H

In [249]:
similar_classes = find_similar_elements(pizza_classes, cw_classes, threshold=0.8)
print(similar_classes)
similar_data_properties = find_similar_elements(pizza_data_properties, cw_data_properties, threshold=0.8)
print(similar_data_properties)
similar_object_properties = find_similar_elements(pizza_object_properties, cw_object_properties, threshold=0.8)
print(similar_object_properties)

['Food', 'MeatyPizza', 'Country', 'JalapenoPepperTopping', 'NamedPizza', 'NonVegetarianPizza', 'VegetarianPizza', 'Mushroom', 'Pizza', 'Margherita']
[]
['hasIngredient', 'isIngredientOf']


In [257]:
graph_alignment = Graph()
pizza_uri = 'http://www.co-ode.org/ontologies/pizza#'
pizza_namespace = Namespace(pizza_uri)
graph_alignment.bind("pizza", pizza_namespace)
graph_alignment.bind("cw", cw_namespace)


def align_ontology(cw_elements, pizza_elements, type, threshold):
    valid_types = ["class", "property"]
    if type not in valid_types:
        raise ValueError(f"Please provide one of the following types: {valid_types}")

    relationship = OWL.equivalentClass if type == "class" else OWL.equivalentProperty
    for cw_element in cw_elements:
        for pizza_element in pizza_elements:
            if is_similar(cw_element, pizza_element, threshold):
                print(f"Adding {(cw_element, relationship, pizza_element)}")
                cw_uri = URIRef(cw_namespace + cw_element)
                pizza_uri = URIRef(pizza_namespace + pizza_element)
                graph_alignment.add((cw_uri, relationship, pizza_uri))


align_ontology(cw_classes, pizza_classes, type="class", threshold=0.95)
align_ontology(cw_data_properties, pizza_data_properties, type="property", threshold=0.9)
align_ontology(cw_object_properties, pizza_object_properties, type="property", threshold=0.9)

graph_alignment.serialize("swtkg_final_cw-alignment_01.ttl", format="ttl")

Adding ('Food', rdflib.term.URIRef('http://www.w3.org/2002/07/owl#equivalentClass'), 'Food')
Adding ('Country', rdflib.term.URIRef('http://www.w3.org/2002/07/owl#equivalentClass'), 'Country')
Adding ('Pizza', rdflib.term.URIRef('http://www.w3.org/2002/07/owl#equivalentClass'), 'Pizza')
Adding ('NamedPizza', rdflib.term.URIRef('http://www.w3.org/2002/07/owl#equivalentClass'), 'NamedPizza')
Adding ('Mushroom', rdflib.term.URIRef('http://www.w3.org/2002/07/owl#equivalentClass'), 'Mushroom')
Adding ('VegetarianPizza', rdflib.term.URIRef('http://www.w3.org/2002/07/owl#equivalentClass'), 'VegetarianPizza')
Adding ('hasIngredient', rdflib.term.URIRef('http://www.w3.org/2002/07/owl#equivalentProperty'), 'hasIngredient')
Adding ('isIngredientOf', rdflib.term.URIRef('http://www.w3.org/2002/07/owl#equivalentProperty'), 'isIngredientOf')


In [3]:
graph_alignment = Graph()
graph_alignment.parse("pizza-restaurants-ontology.ttl", format="ttl")
print(f"Now holds {len(graph_alignment)} triples")
graph_alignment.parse("swtkg_final_cw.ttl", format="ttl")
print(f"Now holds {len(graph_alignment)} triples")
graph_alignment.parse("swtkg_final_cw-alignment_01.ttl", format="ttl")
print(f"Now holds {len(graph_alignment)} triples")
graph_alignment.parse("pizza.ttl", format="ttl")
print(f"Now holds {len(graph_alignment)} triples")

owlrl.DeductiveClosure(owlrl.OWLRL_Semantics, axiomatic_triples=True, datatype_axioms=False).expand(graph_alignment)

print("Rules after inference: '" + str(len(graph_alignment)) + "' triples.")

print("Saving extended graph")
graph_alignment.serialize(destination='swtkg_final_cw-alignment_02.ttl', format='ttl')

Now holds 963 triples
Now holds 6592 triples
Now holds 6600 triples
Now holds 8543 triples
Rules after inference: '61059' triples.
Saving extended graph


In [4]:
alignment_query = """PREFIX cw: <http://www.semanticweb.org/city/in3067-inm713/2023/restaurants#>
    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    PREFIX pizza: <http://www.co-ode.org/ontologies/pizza#>

    SELECT ?name
    WHERE
    {
            ?item rdf:type pizza:NamedPizza .
            ?item cw:itemName ?name .
    }

    """

for row in graph_alignment.query(alignment_query):
    print(row)

(rdflib.term.Literal('napolitana pizza', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')),)
(rdflib.term.Literal('hawaiian pizza', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')),)
(rdflib.term.Literal('margherita pizza', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')),)
(rdflib.term.Literal('hawaiian pizza', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')),)
(rdflib.term.Literal('fire den supreme pizza', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')),)
(rdflib.term.Literal('margherita pizza', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')),)
(rdflib.term.Literal('vegan margherita pizza', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')),)
(rdflib.term.Literal('hawaiian pizza', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')),)
(rdflib.term.Literal('marinara pizza', datatype=rdflib.term.URIRef('http:/