In [None]:
# enables auto-reload of files (%...function MUST BE WITHOUT SPACE!)
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import food_KG_simplification.graph_utils as graph_utils
import nutrition_scores.score_util as score_util
import utils

In [None]:
# Define data locations
graph_location = '../../data/food_kg/'
data_location = '../../data/hummus_data/'
additional_location = '../../data/hummus_data/'

# 1 FoodKG Graph extraction
Construct the FoodKG as described in [1]. We use [GraphDB](https://graphdb.ontotext.com/) to store/visualize/query the KG.
Convert graph to single rdf file, in our case download graph from GraphDB.
First, open GraphDB and download the following queries to csv files:

1. all_entities.csv

```sql
    SELECT DISTINCT ?subject
    WHERE {
        {?subject ?predicate ?object} UNION {?object ?inverse_predicate ?subject}
    }
```

2. all_relations.csv

```sql
    SELECT DISTINCT ?predicate
    WHERE {
        {?subject ?predicate ?object} UNION {?object ?predicate ?subject}
    }
```

3. kg_data.csv

```sql
    SELECT DISTINCT ?subject ?predicate ?object
    WHERE {
        ?subject ?predicate ?object
    }
```

Store all files under the directory '/data/food_kg/'.

# 2 FoodKG simplification

In [None]:
# Load the FoodKG triples
kg_data = pd.read_csv(graph_location + 'kg_data.csv', sep=",")
print(f"KG #triples: {kg_data.shape[0]}")
kg_data.head()

In [None]:
# Removes all nodes not related directly to recipes (like provenance) or other unwanted ontology classes
print(f"KG #triples before: {kg_data.shape[0]}")
kg_data = graph_utils.reduce_graph(kg_data)
print(f"KG #triples after: {kg_data.shape[0]}")

## Merge foodKG_ingredients with foodKG_mapped_ingredients

In order to simplify the graph the following SPARQL query must be executed in the GraphDB ot get the required mapping:

```sql
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX recipe-kb: <http://idea.rpi.edu/heals/kb/>
SELECT DISTINCT ?ing_name ?recipe ?foodKG_ingredient ?foodKG_mapped_ingredient
WHERE {
    ?recipe recipe-kb:uses ?foodKG_ingredient.
    ?foodKG_ingredient recipe-kb:ing_name ?foodKG_mapped_ingredient.
    ?foodKG_mapped_ingredient rdfs:label ?ing_name.
}
```

Name the file `graph_ingredient_mapping.csv` and put it in the graph folder.

In [None]:
# read & display the file
graph_ingredient_mapping = pd.read_csv(graph_location + 'graph_ingredient_mapping.csv', sep=',')
print(f"Ingredient mapping size: {len(graph_ingredient_mapping)}")
graph_ingredient_mapping.head()

In [None]:
# Merge FoodKG_ingredients and Mapping_ingredients
print(f"KG #triples before: {kg_data.shape[0]}")
kg_data = graph_utils.merge_ingredient_nodes(kg_data, graph_ingredient_mapping)
print(f"KG #triples after: {kg_data.shape[0]}")

# 3 FoodKG extension

In [None]:
#Load recipes and reviews
recipes_df, reviews_df, users_df, recipes_dict, user_dict, food_locator_dict, food_com_dict, data= utils.load_and_clean_data(
    data_location, additional_location,
    add_review_columns=['review_id', 'review_url', 'member_url'],
    comment_relations=2.5,
    authorship_relations=6,
    recipe_tags=True, k_user=1,
    k_recipe=1, debug=True, keep_old_ids=True)

In [None]:
# Calculate scores
normalized_ingredients = utils.normalize_ingredients(recipes_df)
recipes_df = score_util.calculate_food_scores(recipes_df, normalized_ingredients, score_names=['who', 'fsa', 'nutri'])

In [None]:
# Add data
print(f"KG #triples: {kg_data.shape[0]}")
kg_data, user_data, recipe_data, review_data = graph_utils.add_data(recipes_df, reviews_df, users_df, food_locator_dict, kg_data)

# 4 Storing Results

In [None]:
# Store data
kg_data.to_csv(graph_location + 'FoodKG_simplified.csv', sep=',', index=False)
user_data.to_csv(graph_location + 'FoodKG_extension_users.csv', sep=',', index=False)
recipe_data.to_csv(graph_location + 'FoodKG_extension_recipes.csv', sep=',', index=False)
review_data.to_csv(graph_location + 'FoodKG_extension_reviews.csv', sep=',', index=False)

In [None]:
# load csv file
#kg_data = pd.read_csv(graph_location + 'simplified_FoodKG.csv', sep=',')
#user_data = pd.read_csv(graph_location + 'FoodKG_extension_users.csv', sep=',')
#recipe_data = pd.read_csv(graph_location + 'FoodKG_extension_recipes.csv', sep=',')
#review_data = pd.read_csv(graph_location + 'FoodKG_extension_reviews.csv', sep=',')

In [None]:
# Transform data to rdf
turtle_namespaces, kg_triples, user_triples, recipe_triples, review_triples = graph_utils.convert_to_triples(kg_data, user_data, recipe_data, review_data)

In [None]:
# Write graph
skipped_kg = graph_utils.write_triples(turtle_namespaces, kg_triples, 'FoodKG_simplified', graph_location)
skipped_users = graph_utils.write_triples(turtle_namespaces, user_triples, 'FoodKG_extension_users', graph_location)
skipped_recipes = graph_utils.write_triples(turtle_namespaces, recipe_triples, 'FoodKG_extension_recipes', graph_location)
skipped_reviews = graph_utils.write_triples(turtle_namespaces, review_triples, 'FoodKG_extension_reviews', graph_location)

# References
[1] Haussmann, S., Seneviratne, O., Chen, Y., Ne’eman, Y., Codella, J., Chen, C. H., … Zaki, M. J. (2019). FoodKG: A Semantics-Driven Knowledge Graph for Food Recommendation. In Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics) (Vol. 11779 LNCS, pp. 146–162). Springer. https://doi.org/10.1007/978-3-030-30796-7_10