workflow and performance tests for github actions (#246)

* workflow and performance tests for github actions * make regression tests go immediately after the python build * fix each step should have atleast one users or run * Address comments Co-authored-by: Fernando Yordan <fernando@Fernandos-MacBook-Pro.local>
bcollazo · Jan 17, 2023 · 050384a · 050384a
1 parent 5892a87
commit 050384a
Show file tree

Hide file tree

Showing 3 changed files with 129 additions and 5 deletions.
diff --git a/.github/workflows/perf-regression.yml b/.github/workflows/perf-regression.yml
@@ -0,0 +1,53 @@
+
+# This workflow will compare the current branch with the main branch and evaluate for performance differences.
+
+name: Performance Regression Test
+
+on:
+  pull_request:
+    branches: master
+
+jobs:
+
+  build-python-and-test-performance:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python 3.8
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.8
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r dev-requirements.txt
+          pip install -e catanatron_core
+          pip install -e catanatron_gym
+          pip install -e catanatron_server
+          pip install -e catanatron_experimental
+
+      - name: Checkout PR and Master Branch
+        uses: actions/checkout@v2
+        with:
+          path: pr
+      - uses: actions/checkout@v2
+        with:
+          ref: master
+          path: master
+
+      - name: Run Benchmark on Master (baseline)
+        run: cd master && pytest tests/integration_tests/test_speed.py --benchmark-only --benchmark-json=benchmark.json
+      - name: Run Benchmark on PR
+        run: cd pr && pytest tests/integration_tests/test_speed.py --benchmark-only --benchmark-json=benchmark.json
+
+      - name: Compare Results
+        uses: openpgpjs/github-action-pull-request-benchmark@v1
+        with:
+          tool: 'pytest'
+          pr-benchmark-file-path: pr/benchmark.json
+          base-benchmark-file-path: master/benchmark.json
+          alert-threshold: '130%'
+          # Workflow will fail if `fail-threshold` is exceeded
+          fail-on-alert: true
+          fail-threshold: '150%'
diff --git a/catanatron_experimental/catanatron_experimental/machine_learning/players/minimax.py b/catanatron_experimental/catanatron_experimental/machine_learning/players/minimax.py
@@ -74,6 +74,8 @@ def decide(self, game: Game, playable_actions):
         # if game.state.num_turns > 10:
         #     render_debug_tree(node)
         #     breakpoint()
+        if result[0] is None:
+            return playable_actions[0]
         return result[0]
 
     def __repr__(self) -> str:

diff --git a/tests/integration_tests/test_speed.py b/tests/integration_tests/test_speed.py
@@ -1,9 +1,13 @@
 import json
 
-from catanatron.json import GameEncoder
 from catanatron.game import Game
-from catanatron.models.player import SimplePlayer, Color
+from catanatron.json import GameEncoder
+from catanatron.models.player import Color, SimplePlayer, RandomPlayer
+from catanatron.players.weighted_random import WeightedRandomPlayer
 from catanatron_gym.features import create_sample
+from catanatron_experimental.machine_learning.players.minimax import AlphaBetaPlayer, SameTurnAlphaBetaPlayer
+
+RANDOM_SEED = 0
 
 
 # Things to benchmark. create_sample(), game.play() (random game), .to_json(), .copy()
@@ -14,7 +18,7 @@ def test_to_json_speed(benchmark):
         SimplePlayer(Color.ORANGE),
         SimplePlayer(Color.WHITE),
     ]
-    game = Game(players)
+    game = Game(players, seed=RANDOM_SEED)
 
     result = benchmark(json.dumps, game, cls=GameEncoder)
     assert isinstance(result, str)
@@ -27,7 +31,7 @@ def test_copy_speed(benchmark):
         SimplePlayer(Color.ORANGE),
         SimplePlayer(Color.WHITE),
     ]
-    game = Game(players)
+    game = Game(players, seed=RANDOM_SEED)
 
     result = benchmark(game.copy)
     assert result.seed == game.seed
@@ -40,10 +44,75 @@ def test_create_sample_speed(benchmark):
         SimplePlayer(Color.WHITE),
         SimplePlayer(Color.ORANGE),
     ]
-    game = Game(players)
+    game = Game(players, seed=RANDOM_SEED)
     for _ in range(30):
         game.play_tick()
 
     sample = benchmark(create_sample, game, players[1].color)
     assert isinstance(sample, dict)
     assert len(sample) > 0
+
+
+# Benchmarking individual player speeds
+def test_simpleplayer_speed(benchmark):
+    players = [
+        SimplePlayer(Color.RED),
+        SimplePlayer(Color.BLUE),
+        SimplePlayer(Color.WHITE),
+        SimplePlayer(Color.ORANGE),
+    ]
+    game = Game(players, seed=RANDOM_SEED)
+    def _play_game(game):
+        for _ in range(100):
+            game.play_tick()
+        return game
+
+    result = benchmark(_play_game, game)
+
+
+def test_weightedrandom_speed(benchmark):
+    players = [
+        RandomPlayer(Color.RED),
+        RandomPlayer(Color.BLUE),
+        RandomPlayer(Color.WHITE),
+        WeightedRandomPlayer(Color.ORANGE),
+    ]
+    game = Game(players, seed=RANDOM_SEED)
+    def _play_game(game):
+        for _ in range(100):
+            game.play_tick()
+        return game
+
+    result = benchmark(_play_game, game)
+
+
+def test_alphabeta_speed(benchmark):
+    players = [
+        RandomPlayer(Color.RED),
+        RandomPlayer(Color.BLUE),
+        RandomPlayer(Color.WHITE),
+        AlphaBetaPlayer(Color.ORANGE),
+    ]
+    game = Game(players, seed=RANDOM_SEED)
+    def _play_game(game):
+        for _ in range(100):
+            game.play_tick()
+        return game
+
+    result = benchmark(_play_game, game)
+
+
+def test_same_turn_alphabeta_speed(benchmark):
+    players = [
+        RandomPlayer(Color.RED),
+        RandomPlayer(Color.BLUE),
+        RandomPlayer(Color.WHITE),
+        SameTurnAlphaBetaPlayer(Color.ORANGE),
+    ]
+    game = Game(players, seed=RANDOM_SEED)
+    def _play_game(game):
+        for _ in range(100):
+            game.play_tick()
+        return game
+
+    result = benchmark(_play_game, game)