allenai · MarcCote · Dec 18, 2023 · Dec 17, 2023 · Dec 17, 2023 · Dec 17, 2023
diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
@@ -0,0 +1,32 @@
+name: Tests
+
+on:
+  - push
+  - pull_request
+
+jobs:
+  test:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, windows-latest, macos-latest]
+        python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
+
+    steps:
+    - name: Set git to use LF
+      run: |
+        git config --global core.autocrlf false
+        git config --global core.eol lf
+
+    - uses: actions/checkout@v4
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run:
+        python -m pip install --upgrade pip
+        pip install tox tox-gh-actions
+    - name: Test with tox
+      run: tox
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,6 +1,7 @@
 include LICENSE
 include README.md
 include requirements.txt
+include requirements-dev.txt
 
 include scienceworld/scienceworld-*.jar
 include scienceworld/object_type_ids.tsv
diff --git a/examples/human.py b/examples/human.py
@@ -12,7 +12,7 @@ def userConsole(args):
     simplificationStr = args['simplification_str']
 
     # Initialize environment
-    env = ScienceWorldEnv("", args['jar_path'], envStepLimit = args['env_step_limit'])
+    env = ScienceWorldEnv("", args['jar_path'], envStepLimit=args['env_step_limit'])
     taskNames = env.getTaskNames()
     print("Task Names: " + str(taskNames))
 
@@ -22,50 +22,49 @@ def userConsole(args):
     print("Starting Task " + str(taskIdx) + ": " + taskName)
     time.sleep(2)
 
-
     # Reset the environment
     initialObs, initialDict = env.reset()
 
-
-
     #
     #   Examples of how to access much of the environment information that the API exposes.
     #   (Many of these are similar to the Jericho API)
     #
     print("Task Names: " + str(taskNames))
-    print("Possible actions: " + str(env.getPossibleActions()) )
-    print("Possible objects: " + str(env.getPossibleObjects()) )
+    print("Possible actions: " + str(env.getPossibleActions()))
+    print("Possible objects: " + str(env.getPossibleObjects()))
     templates, lut = env.getPossibleActionObjectCombinations()
     print("Possible action/object combinations: " + str(templates))
-    #print("Object IDX to Object Referent LUT: " + str(lut))
-    print("Vocabulary: " + str(env.getVocabulary()) )
+    # print("Object IDX to Object Referent LUT: " + str(lut))
+    print("Vocabulary: " + str(env.getVocabulary()))
     print("Possible actions (with IDs): " + str(env.getPossibleActionsWithIDs()))
     print("Possible object types: " + str(env.getObjectTypes()))
     print("Object IDX to Object Referent LUT: " + str(lut))
     print("\n")
     print("Possible object referents LUT: " + str(env.getPossibleObjectReferentLUT()))
     print("\n")
-    print("Valid action-object combinations: " + str(env.getValidActionObjectCombinations()))
+    print("Valid action-object combinations: " +
+          str(env.getValidActionObjectCombinations()))
     print("\n")
     print("Object_ids to type_ids: " + str(env.getAllObjectTypesLUTJSON()))
     print("\n")
-    print("All objects, their ids, types, and referents: " + str(env.getAllObjectIdsTypesReferentsLUTJSON() ))
+    print("All objects, their ids, types, and referents: " +
+          str(env.getAllObjectIdsTypesReferentsLUTJSON()))
     print("\n")
-    print("Valid action-object combinations (with templates): " + str(env.getValidActionObjectCombinationsWithTemplates() ))
+    print("Valid action-object combinations (with templates): " +
+          str(env.getValidActionObjectCombinationsWithTemplates()))
     print("\n")
     print("Object Type LUT: " + str(env.getPossibleObjectReferentTypesLUT()))
-    print("Variations (train): " + str(env.getVariationsTrain() ))
+    print("Variations (train): " + str(env.getVariationsTrain()))
 
     print("")
     print("----------------------------------------------------------------------------------")
     print("")
 
-
     print("Gold Path:" + str(env.getGoldActionSequence()))
 
     print("Task Name: " + taskName)
     print("Variation: " + str(args['var_num']) + " / " + str(env.getMaxVariations(taskName)))
-    print("Task Description: " + str(env.getTaskDescription()) )
+    print("Task Description: " + str(env.getTaskDescription()))
 
     #
     #   Main user input loop
@@ -97,9 +96,10 @@ def userConsole(args):
             print("Reward: " + str(reward))
             print("Score: " + str(score))
             print("isCompleted: " + str(isCompleted))
-            #print("info: " + str(info))
+            # print("info: " + str(info))
 
-        print("'help' lists valid action templates, 'objects' lists valid objects, 'valid' lists valid action-object combinations (long!). ")
+        print("'help' lists valid action templates, 'objects' lists valid" +
+              " objects, 'valid' lists valid action-object combinations (long!). ")
         print("'goals' lists progress on subgoals.")
         print("type 'exit' to quit.")
 
@@ -108,7 +108,6 @@ def userConsole(args):
         # Sanitize input
         userInputStr = userInputStr.lower().strip()
 
-
     # Display run history
     runHistory = env.getRunHistory()
     print("Run History:")
@@ -143,9 +142,9 @@ def build_simplification_str(args):
 
     return args["simplifications_preset"] or ",".join(simplifications)
 
-#
 #   Parse command line arguments
-#
+
+
 def parse_args():
     desc = "Play through a game using the console."
     parser = argparse.ArgumentParser(desc)

diff --git a/examples/random_agent.py b/examples/random_agent.py
@@ -9,7 +9,6 @@ def randomModel(args):
     """ Example random agent -- randomly picks an action at each step. """
     exitCommands = ["quit", "exit"]
 
-
     taskIdx = args['task_num']
     simplificationStr = args['simplification_str']
     numEpisodes = args['num_episodes']
@@ -18,14 +17,15 @@ def randomModel(args):
     finalScores = []
 
     # Initialize environment
-    env = ScienceWorldEnv("", args['jar_path'], envStepLimit = args['env_step_limit'])
+    env = ScienceWorldEnv("", args['jar_path'], envStepLimit=args['env_step_limit'])
 
     taskNames = env.getTaskNames()
     print("Task Names: " + str(taskNames))
 
     # Choose task
     taskName = taskNames[taskIdx]        # Just get first task
-    env.load(taskName, 0, "")            # Load the task, so we have access to some extra accessors e.g. getRandomVariationTrain() )
+    # Load the task, we we have access to some extra accessors e.g. get_random_variation_train()
+    env.load(taskName, 0, "")
     maxVariations = env.getMaxVariations(taskName)
     print("Starting Task " + str(taskIdx) + ": " + taskName)
     time.sleep(2)
@@ -40,28 +40,27 @@ def randomModel(args):
         initialObs, initialDict = env.reset()
 
         # Example accessors
-        print("Possible actions: " + str(env.getPossibleActions()) )
-        print("Possible objects: " + str(env.getPossibleObjects()) )
+        print("Possible actions: " + str(env.getPossibleActions()))
+        print("Possible objects: " + str(env.getPossibleObjects()))
         templates, lut = env.getPossibleActionObjectCombinations()
         print("Possible action/object combinations: " + str(templates))
         print("Object IDX to Object Referent LUT: " + str(lut))
         print("Task Name: " + taskName)
         print("Task Variation: " + str(randVariationIdx) + " / " + str(maxVariations))
-        print("Task Description: " + str(env.getTaskDescription()) )
-        print("look: " + str(env.look()) )
-        print("inventory: " + str(env.inventory()) )
-        print("taskdescription: " + str(env.taskdescription()) )
-
+        print("Task Description: " + str(env.getTaskDescription()))
+        print("look: " + str(env.look()))
+        print("inventory: " + str(env.inventory()))
+        print("taskdescription: " + str(env.taskdescription()))
 
         score = 0.0
         isCompleted = False
         curIter = 0
 
         # Run one episode until we reach a stopping condition (including exceeding the maximum steps)
         userInputStr = "look around"        # First action
-        while (userInputStr not in exitCommands) and (isCompleted == False):
+        while (userInputStr not in exitCommands) and (isCompleted is False):
             print("----------------------------------------------------------------")
-            print ("Step: " + str(curIter))
+            print("Step: " + str(curIter))
 
             # Send user input, get response
             observation, reward, isCompleted, info = env.step(userInputStr)
@@ -72,27 +71,27 @@ def randomModel(args):
             print("Score: " + str(score))
             print("isCompleted: " + str(isCompleted))
 
-            # The environment will make isCompleted `True` when a stop condition has happened, or the maximum number of steps is reached.
+            # The environment will make isCompleted `True` when a stop condition
+            # has happened, or the maximum number of steps is reached.
             if (isCompleted):
                 break
 
             # Randomly select action
 
-            ## Any action (valid or not)
-            #templates, lut = env.getPossibleActionObjectCombinations()
-            #print("Possible action/object combinations: " + str(templates))
-            #print("Object IDX to Object Referent LUT: " + str(lut))
-            #randomTemplate = random.choice( templates )
-            #print("Next random action: " + str(randomTemplate))
-            #userInputStr = randomTemplate["action"]
+            # Any action (valid or not)
+            # templates, lut = env.getPossibleActionObjectCombinations()
+            # print("Possible action/object combinations: " + str(templates))
+            # print("Object IDX to Object Referent LUT: " + str(lut))
+            # randomTemplate = random.choice( templates )
+            # print("Next random action: " + str(randomTemplate))
+            # userInputStr = randomTemplate["action"]
 
-            ## Only valid actions
+            # Only valid actions
             validActions = env.getValidActionObjectCombinationsWithTemplates()
-            randomAction = random.choice( validActions )
+            randomAction = random.choice(validActions)
             print("Next random action: " + str(randomAction))
             userInputStr = randomAction["action"]
 
-
             print(list(lut.keys())[-1])
 
             # Sanitize input
@@ -110,19 +109,20 @@ def randomModel(args):
         finalScores.append(score)
 
         # Report progress of model
-        print ("Final score: " + str(score))
-        print ("isCompleted: " + str(isCompleted))
+        print("Final score: " + str(score))
+        print("isCompleted: " + str(isCompleted))
 
         # Save history -- and when we reach maxPerFile, export them to file
         filenameOutPrefix = args['output_path_prefix'] + str(taskIdx)
-        env.storeRunHistory(episodeIdx, notes = {'text':'my notes here'} )
+        env.storeRunHistory(episodeIdx, notes={'text': 'my notes here'})
         env.saveRunHistoriesBufferIfFull(filenameOutPrefix, maxPerFile=args['max_episode_per_file'])
 
     # Episodes are finished -- manually save any last histories still in the buffer
     env.saveRunHistoriesBufferIfFull(filenameOutPrefix, maxPerFile=args['max_episode_per_file'], forceSave=True)
 
-    # Show final episode scores to user:
-    avg = sum([x for x in finalScores if x >=0]) / len(finalScores)     # Clip negative scores to 0 for average calculation
+    # Show final episode scores to user
+    # Clip negative scores to 0 for average calculation
+    avg = sum([x for x in finalScores if x >= 0]) / len(finalScores)
     print("")
     print("---------------------------------------------------------------------")
     print(" Summary (Random Agent)")