In [1]:
# Importing necessary modules
import re
import spacy
from spacy import displacy
#python -m spacy download en_core_web_sm
nlp = spacy.load("en_core_web_sm")
import pandas as pd

In [2]:
# Setting up notebook to display multiple outputs in one cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [3]:
# Setting list of directions, as would be obtained in web_scraping.ipynb
directions = ['Preheat the oven to 450 degrees F',
              'Pulse the carrots, celery and shallots in a food processor until coarsely chopped',
              'Heat 1 tablespoon of the olive oil in a large nonstick skillet over medium high',
              'Add the chopped vegetables and cook, stirring frequently, until light golden and soft, 8 to 10 minutes',
              '(Add a splash of water if the mixture begins to stick',
              ') Add the ground beef and 4 teaspoons of the tomato paste and cook, breaking the mixture up with a wooden spoon, until browned, about 4 minutes',
              'Add 1/2 cup water, the parsley, raisins, 1 tablespoon of the vinegar and 3/4 teaspoon salt',
              'Bring to a simmer and cook until most of the water is absorbed and the mixture gets saucy, about 1 minute',
              'Let cool slightly',
              'Meanwhile, toss the pepper halves with the remaining 1/2 tablespoon oil in a microwave-safe bowl',
              'Cover with plastic wrap and microwave until the peppers are pliable, 10 to 12 minutes',
              'Carefully uncover the bowl and pour out any liquid that has accumulated',
              'Stir the couscous into the beef mixture',
              'Whisk together the remaining 2 tablespoons tomato paste, 1 tablespoon vinegar and 3/4 cup water in the bottom of a large baking dish',
              'Carefully transfer the peppers to the baking dish cut-side up and fill each pepper with the couscous mixture',
              'Sprinkle with the cheese',
              'Cover with foil and bake until the peppers are tender and the stuffing is hot, 20 to 25 minutes',
              'Serve warm or at room temperature drizzled with the tomato cooking liquid']

In [4]:
direction_dep_df = pd.DataFrame(columns = ['direction_index', 'word', 'pos_coarse', 'pos_fine', 'dependency', 'parent'])

for i in range(len(directions)):
    # Creating a Doc object from the directions
    direction_doc = nlp(directions[i])
    # Visualize the dependency relations
    #displacy.render(direction_doc, style = "dep")
    # Dependency parsing the directions
    for token in direction_doc:
        new_row = {'direction_index': i, 'word': token.text, 'pos_coarse': token.pos_, 'pos_fine': token.tag_, 'dependency': token.dep_, 'parent': token.head}
        direction_dep_df = direction_dep_df.append(new_row, ignore_index = True)

print(direction_dep_df.to_string())

    direction_index         word pos_coarse pos_fine dependency       parent
0                 0      Preheat       VERB       VB       ROOT      Preheat
1                 0          the        DET       DT        det         oven
2                 0         oven      PROPN      NNP       dobj      Preheat
3                 0           to        ADP       IN       prep      Preheat
4                 0          450        NUM       CD     nummod      degrees
5                 0      degrees       NOUN      NNS       pobj           to
6                 0            F       NOUN       NN       dobj      Preheat
7                 1        Pulse      PROPN      NNP       ROOT        Pulse
8                 1          the        DET       DT        det      carrots
9                 1      carrots       NOUN      NNS      appos        Pulse
10                1            ,      PUNCT        ,      punct      carrots
11                1       celery       NOUN       NN       conj      carrots

In [6]:
units_list = ["ounce", "pinch", "dash", "cup", "gallon", "pint", "milliliter", "liter", "gram", "pound", "fluid ounce", "kilogram",
             "spoon", "quart", "container", "can", "box", "package", "packet", "tablespoon", "teaspoon"]
unit_conversion = {"oz": "ounce", "c": "cup", "pt": "pint", "gal": "gallon", "ml": "milliliter", "mL": "milliliter",
                  "g": "gram", "lb": "pound", "kg": "kilogram", "kilo": "kilogram", "qt": "quart", "l": "liter", "L": "liter",
                  "tb": "tablespoon", "tbs": "tablespoon", "tbsp": "tablespoon", "tsp": "teaspoon"}
equipment_list = ["pot", "pan", "sheet pan", "tray",
                  "dish", "pressure cooker", "blender", "toaster",
                  "microwave", "stove", "oven", "range", "burner",
                  "cooktop", "measuring cup", "cup", "bowl", "mixing bowl", 
                  "spoon", "fork", "knife", "ladle", "spatula", "tongs", "paddle", "strainer", "mixer", "whisk",
                  "toothpick", "foil", "parchment", "paper", "plastic wrap", "paper towel", "food processor", "skillet", "baking dish"]

direction_df = pd.DataFrame(columns = ['direction_index', 'raw_text', 'action', 'action_duration', 'action_temperature', 'action_details', 'ingredient_amount', 'ingredient_unit', 'ingredient_name', 'equipment'])

# Creating a Doc object from the directions
direction_doc = nlp(directions[0])
# Pulling out the relevant information from the dependency parsed directions
direction_index = 0
raw_text = direction_doc.text
action = ""
action_duration = ""
action_temperature = ""
action_details = ""
ingredient_amount = ""
ingredient_unit = ""
ingredient_name = ""
equipment = ""
for token in direction_doc:
    if token.dep_ == 'ROOT':
        action = token.text
    if token.dep_ == 'nummod' and token.head.text == 'degrees':
        action_temperature += token.text
    if token.text == 'degrees':
        action_temperature = action_temperature + ' ' + token.text
    if token.text == 'F' or re.match('[Ff]ahrenheit', token.text):
        action_temperature = action_temperature + ' ' + token.text
    if token.text in equipment_list:
        equipment = token.text

new_row = {'direction_index': direction_index, 'raw_text': raw_text, 'action': action, 'action_duration': action_duration, 'action_temperature': action_temperature, 'action_details': action_details, 'ingredient_amount': ingredient_amount, 'ingredient_unit': ingredient_unit, 'ingredient_name': ingredient_name, 'equipment': equipment}
direction_df = direction_df.append(new_row, ignore_index = True)

direction_df

Unnamed: 0,direction_index,raw_text,action,action_duration,action_temperature,action_details,ingredient_amount,ingredient_unit,ingredient_name,equipment
0,0,Preheat the oven to 450 degrees F,Preheat,,450 degrees F,,,,,oven
