# Imports

In [74]:
import pandas as pd
import re
import json
import os
import nltk
from tfidf_ranker import get_most_relevant_sentence_index
import random

In [75]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\klouc\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

# Constants

In [76]:
SUBSTITUTIONS_FILE_PATH = 'data/recipe_substitution_10Sept.xlsx'
RECIPES_FILE_PATH = 'data/recipe_collection/recipe_collection'
KB_PATH = 'data/KB.json'
OUTPUT_CSV_FILE_PATH = 'data/triplets.csv'
 
NEGATIVE_PER_PAIR = 10

# Load Dataset

In [77]:
# Load substitution dataset

df = pd.read_excel(SUBSTITUTIONS_FILE_PATH)
print("Number of rows before NaN removal:", df.shape[0])

# Drop rows with missing critical vals
df.dropna(subset=['recipe_id',
                  'ingredient_name',
                  'MasterDB match for subsitutable ingredient',
                  'substitution_text',
                  'DB Food #1'], inplace=True)
print("Number of rows:", df.shape[0])
df


Number of rows before NaN removal: 893
Number of rows: 892


Unnamed: 0,participant_id,recipe_id,ingredient_name,MasterDB match for subsitutable ingredient,substitution_text,DB Food #1,DB Food #2,DB Food #3,DB Food #4,DB Food #5,Unnamed: 10,Addition #1,Addition #2,Addition #3,Addition #4,Addition #5,Brand needed for substitute model?
1,1.256701e+10,400.0,pork sausages,Sausage,Quorn sausages,Quorn:Sausage,,,,,,,,,,,
2,1.256701e+10,40.0,chicken,Chicken:Breast,Quorn roast,Quorn:Mince,,,,,,,,,,,
3,1.256701e+10,38.0,minced beef,Beef:Mince,Quorn mince,Quorn:Mince,,,,,,,,,,,
4,1.256701e+10,39.0,chicken breasts,Chicken:Breast,Quorn pieces,Quorn:Pieces,,,,,,,,,,,
5,1.256701e+10,243.0,beef mince,Beef:Mince,Quorn mince,Quorn:Mince,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
888,1.271014e+10,551.0,chopped tomatoes,Tomato:Tinned,Passata,Tomato:Passata_SAME,,,,,,,,,,,
889,1.271014e+10,550.0,aubergine,Aubergine,"Leek, mushroom, red cabbage, chickpeas",Leek,Mushroom,Cabbage,Chickpea,,,,,,,,
890,1.271014e+10,550.0,tomato,Tomato:Fresh,"Tinned chopped tomatoes, red pepper",Tomato:Tinned_SAME,Pepper,,,,,,,,,,
891,1.271014e+10,550.0,penne pasta,Pasta,"Spaghetti, macaroni, couscous, rice, other pasta",Spaghetti,Pasta_SAME,Couscous,White rice,,,,,,,,


# Simplifying Dataset (by removing high-level elements for now)

In [78]:
def term_contains_banned_term(term):
    banned_terms = ['SAME', 'MORE', 'REMOVE', 'REFUSAL', 'COMPLEX']
    for banned_term in banned_terms:
        if(banned_term in term):
            return True
    return False

# remove those which suggest substituions with SAME, MORE, REMOVE, REFUSAL and COMPLEX subtitutions
# if more than on sub suggested, take first one that is not SAME, MORE, REMOVE, REFUSAL and COMPLEX
def simplify_dataset(row):
    # FIELD NAMES:
    # participant_id
    # recipe_id
    # ingredient_name
    # MasterDB match for subsitutable ingredient
    # substitution_text
    # DB Food #1
    # DB Food #2
    # DB Food #3
    # DB Food #4
    # DB Food #5
    # Unnamed: 10
    # Addition #1
    # Addition #2
    # Addition #3
    # Addition #4
    # Addition #5
    # Brand needed for substitute model?

    # We care only about participant_id, recipe_id, ingredient_name, MasterDB match for subsitutable ingredient,
    # and the ingredient we will choose to substitute (it's raw and matched form)
    raw_subs = row['substitution_text'].split(',')
    for idx, sub_suggestion_key in enumerate(['DB Food #1', 'DB Food #2', 'DB Food #3', 'DB Food #4', 'DB Food #5']):
        sub_suggestion = row[sub_suggestion_key]
        if(not (pd.isna(sub_suggestion) or term_contains_banned_term(sub_suggestion))):
            # Here we attempt to extract the raw sub text very crudely (sorry), since the raw sub text (usually)
            # is just a list separted by commas, so we choose the index of the DB food wrt the commas, if that doesn't work,
            # take the first one on the list
            row['chosen_sub_match'] = row[sub_suggestion_key]
            try:
                row['chosen_sub_raw'] = raw_subs[idx]
            except:
                row['chosen_sub_raw'] = raw_subs[0]
            return row


In [79]:
simple_df = df.apply(lambda row: simplify_dataset(row), axis=1)
simple_df

Unnamed: 0,participant_id,recipe_id,ingredient_name,MasterDB match for subsitutable ingredient,substitution_text,DB Food #1,DB Food #2,DB Food #3,DB Food #4,DB Food #5,Unnamed: 10,Addition #1,Addition #2,Addition #3,Addition #4,Addition #5,Brand needed for substitute model?,chosen_sub_match,chosen_sub_raw
1,1.256701e+10,400.0,pork sausages,Sausage,Quorn sausages,Quorn:Sausage,,,,,,,,,,,,Quorn:Sausage,Quorn sausages
2,1.256701e+10,40.0,chicken,Chicken:Breast,Quorn roast,Quorn:Mince,,,,,,,,,,,,Quorn:Mince,Quorn roast
3,1.256701e+10,38.0,minced beef,Beef:Mince,Quorn mince,Quorn:Mince,,,,,,,,,,,,Quorn:Mince,Quorn mince
4,1.256701e+10,39.0,chicken breasts,Chicken:Breast,Quorn pieces,Quorn:Pieces,,,,,,,,,,,,Quorn:Pieces,Quorn pieces
5,1.256701e+10,243.0,beef mince,Beef:Mince,Quorn mince,Quorn:Mince,,,,,,,,,,,,Quorn:Mince,Quorn mince
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
888,,,,,,,,,,,,,,,,,,,
889,1.271014e+10,550.0,aubergine,Aubergine,"Leek, mushroom, red cabbage, chickpeas",Leek,Mushroom,Cabbage,Chickpea,,,,,,,,,Leek,Leek
890,1.271014e+10,550.0,tomato,Tomato:Fresh,"Tinned chopped tomatoes, red pepper",Tomato:Tinned_SAME,Pepper,,,,,,,,,,,Pepper,red pepper
891,1.271014e+10,550.0,penne pasta,Pasta,"Spaghetti, macaroni, couscous, rice, other pasta",Spaghetti,Pasta_SAME,Couscous,White rice,,,,,,,,,Spaghetti,Spaghetti


In [80]:
simple_df = simple_df.drop(labels=['DB Food #1', 'DB Food #2', 'DB Food #3', 'DB Food #4', 'DB Food #5',
                'Unnamed: 10', 'Addition #1', 'Addition #2', 'Addition #3', 'Addition #4',
                'Addition #5', 'Brand needed for substitute model?'], axis=1)

In [81]:
simple_df = simple_df.dropna()
simple_df

Unnamed: 0,participant_id,recipe_id,ingredient_name,MasterDB match for subsitutable ingredient,substitution_text,chosen_sub_match,chosen_sub_raw
1,1.256701e+10,400.0,pork sausages,Sausage,Quorn sausages,Quorn:Sausage,Quorn sausages
2,1.256701e+10,40.0,chicken,Chicken:Breast,Quorn roast,Quorn:Mince,Quorn roast
3,1.256701e+10,38.0,minced beef,Beef:Mince,Quorn mince,Quorn:Mince,Quorn mince
4,1.256701e+10,39.0,chicken breasts,Chicken:Breast,Quorn pieces,Quorn:Pieces,Quorn pieces
5,1.256701e+10,243.0,beef mince,Beef:Mince,Quorn mince,Quorn:Mince,Quorn mince
...,...,...,...,...,...,...,...
887,1.271014e+10,551.0,paneer,Paneer,"Aubergine, tofu, mushroom",Aubergine,Aubergine
889,1.271014e+10,550.0,aubergine,Aubergine,"Leek, mushroom, red cabbage, chickpeas",Leek,Leek
890,1.271014e+10,550.0,tomato,Tomato:Fresh,"Tinned chopped tomatoes, red pepper",Pepper,red pepper
891,1.271014e+10,550.0,penne pasta,Pasta,"Spaghetti, macaroni, couscous, rice, other pasta",Spaghetti,Spaghetti


# Makeshift Knowledge Base

In [82]:
# Get all formal entries by removing SAME, MORE, REMOVE, REFUSAL and COMPLEX
def clean_ingred(ingred):
    if(pd.isna(ingred)):
        return ''
    
    # Almond_SAME => Almond
    # Almond_MORE => Almond
    # REMOVE, REFUSAL, COMPLEX => ''
    return re.sub(r'_SAME|_MORE|REMOVE|REFUSAL|COMPLEX', '', ingred)

def extract_ingred_set(collection):
    return set([clean_ingred(sub) for sub in collection.tolist() if clean_ingred(sub) != ''])


In [83]:
ingred_collections = ['MasterDB match for subsitutable ingredient',
                      'DB Food #1',
                      'DB Food #2',
                      'DB Food #3',
                      'DB Food #4',
                      'DB Food #5']

makeshift_kb = set()
for collection_key in ingred_collections:
    makeshift_kb.update(extract_ingred_set(df[collection_key]))
    print('After update, KB has ', len(makeshift_kb), ' elements.')

After update, KB has  133  elements.
After update, KB has  216  elements.
After update, KB has  242  elements.
After update, KB has  252  elements.
After update, KB has  256  elements.
After update, KB has  258  elements.


# Building Triplet Dataset

In [84]:
simple_df

Unnamed: 0,participant_id,recipe_id,ingredient_name,MasterDB match for subsitutable ingredient,substitution_text,chosen_sub_match,chosen_sub_raw
1,1.256701e+10,400.0,pork sausages,Sausage,Quorn sausages,Quorn:Sausage,Quorn sausages
2,1.256701e+10,40.0,chicken,Chicken:Breast,Quorn roast,Quorn:Mince,Quorn roast
3,1.256701e+10,38.0,minced beef,Beef:Mince,Quorn mince,Quorn:Mince,Quorn mince
4,1.256701e+10,39.0,chicken breasts,Chicken:Breast,Quorn pieces,Quorn:Pieces,Quorn pieces
5,1.256701e+10,243.0,beef mince,Beef:Mince,Quorn mince,Quorn:Mince,Quorn mince
...,...,...,...,...,...,...,...
887,1.271014e+10,551.0,paneer,Paneer,"Aubergine, tofu, mushroom",Aubergine,Aubergine
889,1.271014e+10,550.0,aubergine,Aubergine,"Leek, mushroom, red cabbage, chickpeas",Leek,Leek
890,1.271014e+10,550.0,tomato,Tomato:Fresh,"Tinned chopped tomatoes, red pepper",Pepper,red pepper
891,1.271014e+10,550.0,penne pasta,Pasta,"Spaghetti, macaroni, couscous, rice, other pasta",Spaghetti,Spaghetti


In [85]:
class Ingredient:
    def __init__(self, rawName=None, matched=None):
        self.rawRecipeText = None
        self.rawName = rawName
        self.matched = matched

    def __repr__(self):
        return f'{self.rawRecipeText}, {self.rawName}, {self.matched}'


class TripletPoint:
    def __init__(self, recipe_id, anchor, pos, neg=None):
        self.recipe_id = recipe_id
        self.anchor = anchor
        self.pos = pos
        self.neg = neg

    def __repr__(self):
        return f'id: {self.recipe_id}\nanchor: ({self.anchor})\npos: ({self.pos})\nneg: ({self.neg})'
    
    def getDeepCopy(self):
        copy = TripletPoint(recipe_id=self.recipe_id,
                            anchor=Ingredient(
                                    rawName=self.anchor.rawName,
                                    matched=self.anchor.matched),
                            pos=Ingredient(
                                    rawName=self.pos.rawName,
                                    matched=self.pos.matched))
        return copy


In [86]:
tripletPoints = set()

for index, row in simple_df.iterrows():
    dataPoint = TripletPoint(
                             recipe_id=int(row['recipe_id']),
                             anchor=Ingredient(
                                        rawName=row['ingredient_name'],
                                        matched=row['MasterDB match for subsitutable ingredient']),
                             pos=Ingredient(
                                        rawName=row['chosen_sub_raw'],
                                        matched=row['chosen_sub_match']))
    tripletPoints.add(dataPoint)

print('Constructed', len(tripletPoints), 'valid anchor-pos pairs.')

Constructed 745 valid anchor-pos pairs.


In [87]:
list(tripletPoints)[0]

id: 423
anchor: (None, mature cheddar, Cheddar)
pos: (None, Pecorino, Cheese)
neg: (None)

In [88]:
recipeJsons = dict()

# Go through recipe direcory
for recipe in os.listdir(RECIPES_FILE_PATH):
    recipeId = recipe.replace('.json', '')

    # If file empty, do not attempt to open
    if(os.stat(RECIPES_FILE_PATH + '/' + recipe).st_size == 0):
        recipeJsons[recipeId] = None
        continue

    with open(RECIPES_FILE_PATH + '/' + recipe, 'r', encoding='utf-8') as file:
        recipeData = json.load(file)
        recipeJsons[recipeId] = recipeData

print('Extracted', len(recipeJsons.items()), 'recipes,',
      len([recipe for recipe in recipeJsons.items() if recipe[1] != None]), 'out of which have content.')


Extracted 583 recipes, 546 out of which have content.


In [89]:
recipeJsons['132']

{'url': 'https://www.simplyrecipes.com/recipes/lasagna/',
 'title': 'Lasagna',
 'total_CO2': 97.40525113491401,
 'servings': 8,
 'CO2_per_portion': 12.175656391864251,
 'E.Mission coefficient': 4.31582763436642,
 'ingredients': [{'ingredient_number': 1,
   'original_string': 'A large skillet to brown the beef, bell pepper, and onion',
   'amount': '1.0',
   'unit': 'large',
   'ingredient_name': 'beef',
   'matched_ingredient': 'Beef',
   'kg_equivalent': 0.272,
   'carbon_per_kg': 59.57,
   'ingredient_carbon': 16.20304},
  {'ingredient_number': 2,
   'original_string': '1 28-ounce can good quality tomato sauce',
   'amount': '29.0',
   'unit': 'ounce',
   'ingredient_name': 'tomato sauce',
   'matched_ingredient': 'Tomato sauce',
   'kg_equivalent': 0.822136170625,
   'carbon_per_kg': 1.427,
   'ingredient_carbon': 1.173188315481875},
  {'ingredient_number': 3,
   'original_string': '1 14-ounce can crushed tomatoes',
   'amount': '15.0',
   'unit': 'ounce',
   'ingredient_name': 'cru

# Extract KB items

In [90]:
with open(KB_PATH, 'r', encoding='utf-8') as file:
    KB = json.load(file)

print(f'Read {len(KB)} items in KB.')

Read 664 items in KB.


In [91]:
KB

[{'ingredient': 'Apple',
  'foodon_ids': ['FOODON_00002473'],
  'alternate_names': ['apple (whole)', 'apple'],
  'foodon_foodex2_id': 'FOODON_03541473'},
 {'ingredient': 'Apple juice',
  'foodon_ids': ['FOODON_00001059'],
  'alternate_names': ['apple juice'],
  'foodon_foodex2_id': 'FOODON_03543171'},
 {'ingredient': 'Apple sauce',
  'foodon_ids': ['FOODON_03301126'],
  'alternate_names': ['applesauce'],
  'foodon_foodex2_id': 'FOODON_03544148'},
 {'ingredient': 'Cider',
  'foodon_ids': ['FOODON_00001276'],
  'alternate_names': ['apple cider (alcoholic)', 'cider'],
  'foodon_foodex2_id': 'FOODON_03543357'},
 {'ingredient': 'Pear',
  'foodon_ids': ['FOODON_03301724'],
  'alternate_names': ['pear (whole, raw)', 'pear'],
  'foodon_foodex2_id': 'FOODON_03541477'},
 {'ingredient': 'Capers',
  'foodon_ids': ['FOODON_00001681', 'FOODON_00003493'],
  'alternate_names': ['caper food product',
   'caper',
   'capers',
   'caper (whole)',
   'Flinders rose',
   'caper bush'],
  'foodon_foodex2_id

In [92]:
KB_ingreds = [entry['ingredient'].lower() for entry in KB]
KB_ingreds

['apple',
 'apple juice',
 'apple sauce',
 'cider',
 'pear',
 'capers',
 'laverbread',
 'nori',
 'salsify',
 'seaweed',
 'banana',
 'matoki',
 'plantain',
 'ale',
 'beer',
 'lager',
 'nutritional yeast',
 'stout',
 'yeast',
 'yeast extract',
 'barbeque sauce',
 'caster sugar',
 'chutney',
 'club soda',
 'cola',
 'golden syrup',
 'granulated sugar',
 'icing',
 'icing sugar',
 'marshmallow',
 'sugar',
 'amla',
 'blackberries',
 'blackcurrant',
 'blueberries',
 'cherry',
 'cranberries',
 'cranberry sauce',
 'currant',
 'damson',
 'date',
 'elderberries',
 'gooseberries',
 'grape',
 'grape leaves',
 'greengage',
 'jam',
 'loganberries',
 'mixed fruit',
 'mulberries',
 'physalis',
 'pimento',
 'raisin',
 'raspberries',
 'redcurrant jelly',
 'redcurrants',
 'strawberries',
 'sultanas',
 'whitecurrant',
 'beef',
 'dripping',
 'oxtail',
 'pastrami',
 'suet',
 'sweetbread',
 'tongue',
 'tripe',
 'veal',
 'beef extract',
 'beef stock',
 'beef stock cube',
 'amaranth',
 'arugula',
 'asparagus',
 

# Construct triplets

In [93]:
def get_negative_examples(triplet):
    anchor = triplet.anchor.matched
    pos = triplet.pos.matched
    orig = [anchor, pos]
    used_negs = []

    while True:
        neg = random.choice(KB_ingreds)
        if(neg not in orig and neg not in used_negs):
            used_negs.append(neg)
        if(len(used_negs) == NEGATIVE_PER_PAIR):
            break
    
    newTriplets = []
    for neg in used_negs:
        newTriplet = triplet.getDeepCopy()
        newTriplet.neg = Ingredient(
            matched=neg
        )
        newTriplets.append(newTriplet)
    return newTriplets
    

In [94]:
def replace_source_ingrd_with_target(text, source, target):

    # Separate recipe name
    name_index = text.find(':')
    recipe_name = text[0:name_index + 1]
    text_instruction_only = text[name_index + 1:]

    # Apply temp string
    for source_word in source.split(' '):
        text_instruction_only = text_instruction_only.replace(source_word, "TEMP_STRING")
    
    text = recipe_name + text_instruction_only
    
    # Remove white spaces between temp string
    text = re.sub(re.compile(r'TEMP_STRING+(?:\sTEMP_STRING+)*'), target, text)
    return text

# # Example usage
# text = "This is ground beef with a bit of lean beef"
# source = "lean ground beef"
# target = "raw chicken"

# new_text = replace_source_ingrd_with_target(text, source, target)
# print(new_text)

In [99]:
validTripletPoints = list()

for tripletPoint in list(tripletPoints):
    currRecipeId = tripletPoint.recipe_id
    currRecipe = recipeJsons[f'{currRecipeId}']
    rawAnchor = tripletPoint.anchor.rawName
    
    instructions = []
    try:
        instructions = currRecipe['instructions']
    except:
        print(f'Recipe with id {currRecipeId} has no instructions! Skipping...')
        continue
    
    if(len(instructions) == 0):
        print(f'Recipe with id {currRecipeId} has no instructions! Skipping...')
        continue

    currRecipeName = currRecipe['title']

    sentenceCandidates = []
    for instruction in currRecipe['instructions']:
        instruction = instruction['instruction']
        # An instruction may contain multiple 
        sentenceCandidates.extend(nltk.sent_tokenize(instruction))

    best_sent_index = get_most_relevant_sentence_index(rawAnchor, sentenceCandidates)

    # If can't choose best sentence, skip
    if(best_sent_index == -1):
        print(f'Can\'t choose best sentence in recipe with id {currRecipeId}! Skipping...')
        continue

    # Add recipe name
    sentenceCandidates = [f'{currRecipeName}: {sent}' for sent in sentenceCandidates]
    
    # Save anchor sentence
    anchorSent = sentenceCandidates[best_sent_index]
    tripletPoint.anchor.rawRecipeText = anchorSent

    # Construct pos sentence and save
    posSent = replace_source_ingrd_with_target(anchorSent, rawAnchor, tripletPoint.pos.rawName.lower())
    tripletPoint.pos.rawRecipeText = posSent
    
    # Add neg examples
    newTriplets = get_negative_examples(tripletPoint)
    for newTriplet in newTriplets:
        # Construct neg sentence and save
        negSent = replace_source_ingrd_with_target(anchorSent, rawAnchor, newTriplet.neg.matched.lower())
        
        newTriplet.anchor.rawRecipeText = anchorSent
        newTriplet.pos.rawRecipeText = posSent
        newTriplet.neg.rawRecipeText = negSent
        validTripletPoints.append(newTriplet)
    
    print(anchorSent, rawAnchor)
print(f'Constructed {len(validTripletPoints)} valid triplets.')

    

Can't choose best sentence in recipe with id 423! Skipping...
Can't choose best sentence in recipe with id 424! Skipping...
Can't choose best sentence in recipe with id 137! Skipping...
Filling & Nourishing Red Lentil Curry with Spinach: Serve alongside coconut rice. fat coconut milk
Classic jacket potatoes: Cut a cross on top of each potato, squeeze the sides, add the soured cream and your favourite toppings. soured cream
Chinese Vegetable Stir-Fry: Add the broccoli and cook for 2-3 minutes, or until tender-crisp. broccoli
Homemade Vegetarian Chili: Add the diced tomatoes and their juices, the drained black beans and pinto beans, vegetable broth and bay leaf. black beans
Crispy Sesame Chicken with a Sticky Asian Sauce: Add the chicken back in and toss in the sauce to coat. chicken breast fillets
Kale, tofu, mushroom and quinoa protein bowl: Rinse the cooked kale with ice-cold water and drain well, set aside. cooked tri-colour quinoa
Creamy Garlic Herb Mushroom Spaghetti: Add the milk 

In [96]:
validTripletPoints

[id: 579
 anchor: (Filling & Nourishing Red Lentil Curry with Spinach: Serve alongside coconut rice., fat coconut milk, Coconut milk)
 pos: (Filling & Nourishing Red Lentil Curry with Spinach: Serve alongside  oat yoghurt rice.,  oat yoghurt, Plant-based yogurt)
 neg: (Filling & Nourishing Red Lentil Curry with Spinach: Serve alongside agave rice., None, agave),
 id: 579
 anchor: (Filling & Nourishing Red Lentil Curry with Spinach: Serve alongside coconut rice., fat coconut milk, Coconut milk)
 pos: (Filling & Nourishing Red Lentil Curry with Spinach: Serve alongside  oat yoghurt rice.,  oat yoghurt, Plant-based yogurt)
 neg: (Filling & Nourishing Red Lentil Curry with Spinach: Serve alongside lemon sole rice., None, lemon sole),
 id: 579
 anchor: (Filling & Nourishing Red Lentil Curry with Spinach: Serve alongside coconut rice., fat coconut milk, Coconut milk)
 pos: (Filling & Nourishing Red Lentil Curry with Spinach: Serve alongside  oat yoghurt rice.,  oat yoghurt, Plant-based yogur

# Save output

In [98]:
import csv

def write_triplets_to_csv(validTripletPoints, output_file):
    
    # Write data to CSV file
    with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['anchor', 'anchor_raw_ingred_name', 'anchor_matched_ingred_name',
                         'pos', 'pos_raw_ingred_name', 'pos_matched_ingred_name',
                         'neg', 'neg_raw_ingred_name'])
        for point in validTripletPoints:
            writer.writerow([
                point.anchor.rawRecipeText,
                point.anchor.rawName,
                point.anchor.matched,
                point.pos.rawRecipeText,
                point.pos.rawName,
                point.pos.matched,
                point.neg.rawRecipeText,
                point.neg.matched,
            ])

# Example usage:
write_triplets_to_csv(validTripletPoints, OUTPUT_CSV_FILE_PATH)

print(validTripletPoints[0])


id: 579
anchor: (Filling & Nourishing Red Lentil Curry with Spinach: Serve alongside coconut rice., fat coconut milk, Coconut milk)
pos: (Filling & Nourishing Red Lentil Curry with Spinach: Serve alongside  oat yoghurt rice.,  oat yoghurt, Plant-based yogurt)
neg: (Filling & Nourishing Red Lentil Curry with Spinach: Serve alongside agave rice., None, agave)
