**------------------------------------------------------------------------------------------------------------------------------------------------------**

**Input: Triples**

**Process Triples into Node and Edge Dataframes**

**Output: Node and Edge Dataframes**

**------------------------------------------------------------------------------------------------------------------------------------------------------**

# Libraries

In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.simplefilter("ignore")

# Get all Triples

In [2]:
f = open('../Input Data/statements.nq', 'r')
text = f.readlines()

In [None]:
rows = []
for line in text:
  split = line.split()
  s = split[0]
  s = s.replace('<', '')
  s = s.replace('>', '')
  p = split[1]
  p = p.replace('<', '')
  p = p.replace('>', '')
  o = split[2]
  o = o.replace('<', '')
  o = o.replace('>', '')
  rows.append([s, p, o])

In [None]:
triples = pd.DataFrame(rows, columns=['subject', 'predicate', 'object'])
triples.head()

In [None]:
print(f'# Triples: {len(triples)}')

# Split Triples 

**Get all Foods**

In [None]:
all_foods = triples[(triples['subject'].str.startswith('http://idea.rpi.edu/heals/kb/usda#'))]
all_foods.to_csv('../Input Data/data/all_foods.csv')

**(Food, containsNutrient, Nutrient)**

In [None]:
df_food_nutrient = triples[(triples['subject'].str.startswith('http://idea.rpi.edu/heals/kb/usda#')) &
                        (triples['predicate'] == 'http://www.w3id.org/foodkg/property#contains') &
                        (triples['object'].str.startswith('http://www.w3id.org/foodkg/usda#'))]   
df_food_nutrient.to_csv('../Input Data/data/df_food_nutrient.csv')

In [None]:
len(df_food_nutrient)

In [None]:
len(np.unique(df_food_nutrient['subject']))

In [None]:
len(np.unique(df_food_nutrient['object']))

**(Food, hasTag, Tag)**

In [None]:
df_food_tag = triples[(triples['subject'].str.startswith('http://idea.rpi.edu/heals/kb/usda#')) &
                      (triples['predicate'] == 'http://www.w3id.org/foodb/property/hasQuality') &
                      (triples['object'].str.startswith('https://w3id.org/foodkg/quality/high_'))]   
df_food_tag.to_csv('../Input Data/data/df_food_tag.csv')

In [None]:
len(df_food_tag)

In [None]:
len(np.unique(df_food_tag['subject']))

In [None]:
len(np.unique(df_food_tag['object']))

**(Food, isInCategory, Category)**

In [None]:
df_food_cat = triples[(triples['subject'].str.startswith('http://idea.rpi.edu/heals/kb/usda#')) &
                      (triples['predicate'] == 'http://www.w3id.org/foodb/property/hasCategory') &
                      (triples['object'].str.startswith('http://idea.rpi.edu/heals/kb/usda#'))]   
df_food_cat.to_csv('../Input Data/data/df_food_cat.csv')

**(Food, hasFlavor, Flavor)**

In [None]:
df_food_flavor = triples[(triples['subject'].str.startswith('http://idea.rpi.edu/heals/kb/usda#')) &
                      (triples['predicate'] == 'http://www.w3id.org/foodb/property/hasFlavor') &
                      (triples['object'].str.startswith('https://w3id.org/foodkg/flavor/'))]   
df_food_flavor.to_csv('../Input Data/data/df_food_flavor.csv')

In [None]:
len(df_food_flavor)

In [None]:
len(np.unique(df_food_flavor['subject']))

In [None]:
len(np.unique(df_food_flavor['object']))

**(Product, containsIngredient, Ingredient)**

In [None]:
df_product_ingredient = triples[(triples['subject'].str.startswith('https://w3id.org/um/ken4256/product/')) &
                       (triples['predicate'] == 'https://www.bbc.co.uk/ontologies/fo/ingredients') &
                       (triples['object'].str.startswith('https://w3id.org/um/ken4256/ingredient/'))] 
df_product_ingredient.to_csv('../Input Data/data/df_product_ingredient.csv')

In [None]:
len(df_product_ingredient)

In [None]:
len(np.unique(df_product_ingredient['subject']))

In [None]:
len(np.unique(df_product_ingredient['object']))

**(Food, sameAs, Ingredient) - (Ingredient, sameAs, Food)**

In [None]:
df_food_ingredient = triples[(triples['subject'].str.startswith('http://idea.rpi.edu/heals/kb/usda#')) &
                      (triples['predicate'] == 'https://schema.org/isSimilarTo') &
                      (triples['object'].str.startswith('https://w3id.org/um/ken4256/ingredient/'))]   
df_food_ingredient.to_csv('../Input Data/data/df_food_ingredient.csv')

**Get Ground Truth**

In [None]:
df_food_subs = pd.read_csv('../Input Data/final_substitution.csv', sep=';')

df_food_subs['source_id'] = df_food_subs['Food id'].tolist()
df_food_subs['destination_id'] = df_food_subs['Substitution id'].tolist()
df_food_subs.to_csv('../Input Data/data/df_food_subs.csv')

In [None]:
len(df_food_subs)

In [None]:
len(np.unique(df_food_subs['Food label']))

In [None]:
len(np.unique(df_food_subs['Substitution label']))