# HW4 - Annotate Your PDDL with WikiHow Mentions

In this part of the homework we will data about how the elements in your PDDL domain map onto the text of the wikiHow articles.

## Install the PDDL Parser

In [25]:
# Install the PDDL parser
!git clone https://github.com/pucrs-automated-planning/pddl-parser
!cd pddl-parser

# Make the directory available to Python so you can import packages from it.
import sys
sys.path.append("pddl-parser")

fatal: destination path 'pddl-parser' already exists and is not an empty directory.


### Helper functions

In [26]:
import os
import json
import textwrap


## Create a file from the contents of a string. 
def write_to_file(string, path, filename):
  """This is a helper function to create a file from a string."""
  if not os.path.exists(path) and path != "":
    os.makedirs(path)
  with open(os.path.join(path, filename), "w") as f:
      f.write(string)


# Load a JSON file given a filename
# If the file doesn't exist, then return an empty dictionary instead
def load_json(filename):
    try:
        with open(filename, 'r') as f:
            return json.load(f)
    except FileNotFoundError:
        return {}

# Save a JSON file given a full path filename and a dictionary
# If the path doesn't exist, create the directories before saving the file.
def save_json(filename, data):
    if not os.path.exists(os.path.dirname(filename)):
        os.makedirs(os.path.dirname(filename))
    with open(filename, 'w') as f:
        json.dump(data, f, indent=4)


def wrap_text(text):
  wrapped_text = ""
  lines = text.strip().split("\n")

  for line in lines:
    if len(line) > 90:
        w = textwrap.TextWrapper(width=90, break_long_words=False)
        line = '\n'.join(w.wrap(line))
    wrapped_text += line + "\n"
  return wrapped_text

## TODO: Create Annotations for your Domain

After you have finished creating your PDDL files, please use this notebook to annotate data and save a JSON file that links the phrases in the wikiHow article that you selected with the different elements of your PDDL elements.  

If anyone is interested in doing a term project focused on automatically converting wikiHow to PDDL, then we'll share this JSON data with your classmates.


TODO: Update these varables, to specify your domain, and problem files, plus the path on your Google drive where they are stored. 

In [27]:
# Mount your Google Drive so that you can save your PDDL files.
from google.colab import drive
drive.mount('/content/drive')
path = '/content/drive/My Drive/CIS 700/HW4'

domain_filename = "domain-survive_in_the_woods.pddl"
problem_filenames = [ "problem-collect_water.pddl" ]

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [28]:
from PDDL import PDDL_Parser

def get_pddl_elements(domain_filename, problem_filenames, path):
  """
  Parses the PDDL domain file and extracts the following information:
  * The name of the domain
  * A list of types used in the domain
  * A list of action schemata names
  * A list of the domain's predicates
  Returns a tuple of all of these (all are strings).
  """
  parser = PDDL_Parser()
  parser.parse_domain(os.path.join(path, domain_filename))
  parser.parse_problem(os.path.join(path, problem_filenames[0]))

  domain_name = parser.domain_name
  types = []
  for ty in parser.types:
    if not ty == "object":
      types.append(ty)
    types.extend(parser.types[ty])
  actions = [action.name for action in parser.actions]
  predicates = [predicate for predicate in parser.predicates] 

  return domain_name, types, actions, predicates

def get_problem_names(domain_filename, problem_filenames, path):
  """
  Parses your PDDL problem files and extracts the name for each problem.
  Returns a list of problem names (strings).
  """
  problem_names = []
  for i in range(len(problem_filenames)):
    parser = PDDL_Parser()
    parser.parse_domain(os.path.join(path, domain_filename))
    parser.parse_problem(os.path.join(path, problem_filenames[i]))
    problem_names.append(parser.problem_name)
  return problem_names


domain_name, types, actions, predicates = get_pddl_elements(domain_filename, problem_filenames, path)
problem_names = get_problem_names(domain_filename, problem_filenames, path)

In [None]:
def load_annotation_json(domain_filename, problem_filenames, path):

  domain, types, actions, predicates = get_pddl_elements(domain_filename, problem_filenames, path)
  problem_names = get_problem_names(domain_filename, problem_filenames, path)

  json_filename = os.path.join(path, domain + ".json")
  data = load_json(json_filename)

  if not domain in data:
    data[domain] = {}
  if not 'problems' in data[domain]:
    data[domain]['problems'] = {}
  # Ask the user for what wikiHow article they used.
  if not 'wikihow_url' in data[domain]:
    wikihow_url = input("What is URL of the wikiHow article you used to create the domain {domain}?\n".format(domain=domain.upper()))
    wikihow_title = input("What is the title of the wikiHow article?\n")
    data[domain]['wikihow_url'] = wikihow_url
    data[domain]['wikihow_title'] = wikihow_title

  if not 'permissions' in data:
    data['permissions'] = {}
    print("Would you be willing to share your PDDL and Annotations for research purposes under a Creative Commons CC-BY license?\nhttps://creativecommons.org/about/cclicenses/")
    permissions = input()
    print("If so, please type your name if you'd like to be attributed, or 'Anonymous' if you want to be Anonymous.")
    name = input()
    data['permissions']['agreed_to_CC-BY'] = permissions
    data['permissions']['name'] = name


  # Update the eleents for the domain.
  data[domain]['types'] = types
  data[domain]['actions'] = actions
  data[domain]['predicates'] = predicates

  for problem in problem_names:
    if not problem in data[domain]['problems']:
      data[domain]['problems'][problem] = {}
      # Get the step number in the wikiHow article for this problem
      print("For the WikiHow article '{title}'\n{url}\n".format
            (title = data[domain]['wikihow_title'], url = data[domain]['wikihow_url']))
      print("Tell us what part you used to create the problem {problem}".format(problem = problem.upper()))
      print("We'll ask for the part number, method number and step number from the article.\n")
#      print("If the article isn't broken into PARTs or METHODs, you can just enter 1 for those questions.\n")

      part_number = input("If the article has multiple PARTS, which PART did you use?\n(Enter 0 if there are not multiple parts.)\n")
      method_number = input("If the article has multiple METHODs, which METHOD did you use?\n(Enter 0 if there are not multiple methods.)\n")
      step_number = input("What STEP number did you use?\n")
      print("Please copy-and-paste the step's CONTENT.\nPress Ctrl-D (or Ctrl-Z on Windows) to save it.")
      step_contents = []
      while True:
        try:
          line = input()
        except EOFError:
          break
        step_contents.append(line)
      
      data[domain]['problems'][problem]['part_number'] = part_number
      data[domain]['problems'][problem]['method_number'] = method_number
      data[domain]['problems'][problem]['step_number'] = step_number
      data[domain]['problems'][problem]['step_contents'] = "\n".join(step_contents)
  # Save the json file
  save_json(json_filename, data)  
  return json_filename

json_filename = load_annotation_json(domain_filename, problem_filenames, path)
  

In [34]:
def annotate_mentions(json_filename):
  """
  Walk through the json file and have the user copy-and-paste mentions for the 
  PDDL elements that are mentioned in the text of the wikihow article.  This
  is useful for later projects that focus on automatically deriving PDDL from 
  text. 
  """
  data = load_json(json_filename)
  for domain in data:
    for problem in data[domain]['problems']:

      print("Problem:", problem.upper(), '\n')
      wrapped_text = wrap_text(data[domain]['problems'][problem]['step_contents'])
      print(wrapped_text)
      print('----\n')

      print("Please copy-and-paste phrases from the step that mention types, actions, and predicates from the PDDL.")
      print("A mention can be indirect. If there is more than one mention describing the same thing, then put one per line.")
      print("\nEnter a blank line when you're done.\n")

      for pddl_element in ['types', 'predicates', 'actions']:
        if not 'mentions' in data[domain]['problems'][problem]:
          data[domain]['problems'][problem]['mentions'] = {}
        if not pddl_element in data[domain]['problems'][problem]['mentions']:
          data[domain]['problems'][problem]['mentions'][pddl_element] = {}
        for item in data[domain][pddl_element]:
          if not item in data[domain]['problems'][problem]['mentions'][pddl_element]:
            data[domain]['problems'][problem]['mentions'][pddl_element][item] = []
          else:
            #skip asking for annotation if we have already annotated this one.
            pass
          mentions = data[domain]['problems'][problem]['mentions'][pddl_element][item]

          print("Mentions related to the", pddl_element[:-1], item.upper())
          if len(mentions) > 0:
            print("So far you have entered:")
            print("\n".join(mentions))

          while True:
            mention = input()
            if mention == "":
              break
            mentions.append(mention)
            data[domain]['problems'][problem]['mentions'][pddl_element][item] = mentions
            # Save the json file
            save_json(json_filename, data)
    save_json(json_filename, data)
    print("\nYou're done annotating!".upper())
    print("Your file has been saved as {filename}".format(filename=json_filename))

annotate_mentions(json_filename)

Problem: COLLECT_WATER 

Search for a source of fresh water.[1]  The first thing that you'll need in order to
survive in the woods is water that you can drink. Look for signs of fresh water nearby
like areas of green foliage that indicate water is nearby, low-lying areas where water
could be collected, and signs of wildlife like animal tracks. It could mean that a creek,
stream, or pond is nearby. While finding water is important for survival, be aware some
water sources will not be safe - if possible treat all drinking water before using it. [2]
If there are mountains nearby, look for water collected at the foot of the cliffs. The
presence of insects like mosquitoes and flies means that water is nearby. Water from
heavily oxygenated water (such as from a big waterfall or rapids) typically is safer than
that from a slow or still water source. Freshwater springs are typically safer water
sources, although these can be contaminated by mineral or bacteria as well. Remember that
all untrea