Merge pull request #11 from gradescope/ibrahim/stdin-stdout-diff

Stdin stdout diffing autograder
gradescope · Sep 4, 2018 · d04f2a5 · d04f2a5
2 parents 4a0246b + 3fa93c1
commit d04f2a5
Show file tree

Hide file tree

Showing 30 changed files with 270 additions and 0 deletions.
diff --git a/diff_general/.gitignore b/diff_general/.gitignore
@@ -0,0 +1,58 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
diff --git a/diff_general/README.md b/diff_general/README.md
@@ -0,0 +1,52 @@
+# Overview
+
+[View project source on GitHub](https://github.com/gradescope/autograder_samples/tree/master/diff)
+
+This is an example of using Python and the gradescope-utils library to
+implement diff-style autograding of a C assignment. The idea is that
+you can compile the student's code, and then execute it in a
+subprocess using Python. Then you can communicate with the subprocess
+by providing arguments via the command line, or via standard input,
+and read standard output to see what the program produced. The
+student's output is checked against a reference answer to decide
+whether the test case passed or failed.
+
+This type of testing helps with testing assignments that are not
+easily amenable to unit testing, such as assignments where students
+don't necessarily write specific functions.
+
+## Building and executing code
+
+- **compile.sh**: This script should do whatever is necessary to
+  compile the student's code. Leave this blank if nothing needs to be compiled.
+- **run.sh**: This script should run the student's program. This can
+  be overridden for a given test case.
+
+## Adding test cases
+
+This example is driven entirely by the files that are in the `test_data`
+directory, i.e. to add test cases you only have to add directories to the
+`test_data` directory. Each test case should have the following files:
+
+- **input**: This file will be fed to the program over standard input.
+- **output**: This file will serve as the reference output for the
+  test, and must be matched for the test to pass.
+- **settings.yml**: This file holds various settings, such as the
+  weight assigned to a test case.
+- **err**: Optionally, this file can be used to compare any output
+  that is expected to be printed to standard error.
+- **run.sh**: Optionally, you can override the command used to execute
+  this test case. This can be used to provide different command line
+  arguments.
+
+## The example program
+
+The C program in question ([`fib.c`](https://github.com/gradescope/autograder_samples/blob/master/diff_general/fib.c)) computes the nth Fibonacci number
+(1-indexed), as specified as the first command line argument
+(i.e. `argv[1]`). This is just a simple example to demonstrate how you
+might structure such an autograder.
+
+## Providing input to the program
+
+You can provide command line arguments in run.sh, or you can send
+input to standard input using the `input` file.
diff --git a/diff_general/compile.sh b/diff_general/compile.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+
+# Prepare submission
+cp /autograder/submission/fib.c /autograder/source/fib.c
+
+cd /autograder/source
+
+make fib
diff --git a/diff_general/fib.c b/diff_general/fib.c
@@ -0,0 +1,19 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+int fib(int n) {
+    if (n <= 2){
+        return 1;
+    }
+    return fib(n-1) + fib(n-2);
+}
+
+int main(int argc, char** argv) {
+    if (argc < 2) {
+        fprintf(stderr, "Error: Insufficient arguments.\n");
+        return -1;
+    }
+    int arg = atoi(argv[1]);
+    printf("%d\n", fib(arg));
+    return 0;
+}
diff --git a/diff_general/index.md b/diff_general/index.md
@@ -0,0 +1 @@
+README.md
diff --git a/diff_general/requirements.txt b/diff_general/requirements.txt
@@ -0,0 +1,2 @@
+gradescope-utils>=0.2.6
+subprocess32
diff --git a/diff_general/run.sh b/diff_general/run.sh
@@ -0,0 +1 @@
+./fib
diff --git a/diff_general/run_autograder b/diff_general/run_autograder
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+./compile.sh
+
+cd /autograder/source
+
+python run_tests.py > /autograder/results/results.json
diff --git a/diff_general/run_tests.py b/diff_general/run_tests.py
@@ -0,0 +1,12 @@
+import unittest
+from gradescope_utils.autograder_utils.json_test_runner import JSONTestRunner
+from test_generator import find_data_directories, build_test_class, TestMetaclass
+
+if __name__ == '__main__':
+    suite = unittest.TestSuite()
+
+    for name in find_data_directories():
+        klass = build_test_class(name)
+        suite.addTest(klass(TestMetaclass.test_name(name)))
+
+    JSONTestRunner(visibility='visible').run(suite)
diff --git a/diff_general/setup.sh b/diff_general/setup.sh
@@ -0,0 +1,5 @@
+#!/usr/bin/env bash
+
+apt-get install -y python python-pip python-dev
+
+pip install -r /autograder/source/requirements.txt
diff --git a/diff_general/test_data/1/input b/diff_general/test_data/1/input
diff --git a/diff_general/test_data/1/output b/diff_general/test_data/1/output
@@ -0,0 +1 @@
+1
diff --git a/diff_general/test_data/1/run.sh b/diff_general/test_data/1/run.sh
@@ -0,0 +1 @@
+./fib 1
diff --git a/diff_general/test_data/1/settings.yml b/diff_general/test_data/1/settings.yml
@@ -0,0 +1,2 @@
+weight: 2.0
+msg: Failed to correctly compute the first Fibonacci number.
diff --git a/diff_general/test_data/2/input b/diff_general/test_data/2/input
diff --git a/diff_general/test_data/2/output b/diff_general/test_data/2/output
@@ -0,0 +1 @@
+1
diff --git a/diff_general/test_data/2/run.sh b/diff_general/test_data/2/run.sh
@@ -0,0 +1 @@
+./fib 2
diff --git a/diff_general/test_data/3/input b/diff_general/test_data/3/input
diff --git a/diff_general/test_data/3/output b/diff_general/test_data/3/output
@@ -0,0 +1 @@
+2
diff --git a/diff_general/test_data/3/run.sh b/diff_general/test_data/3/run.sh
@@ -0,0 +1 @@
+./fib 3
diff --git a/diff_general/test_data/4/input b/diff_general/test_data/4/input
diff --git a/diff_general/test_data/4/output b/diff_general/test_data/4/output
@@ -0,0 +1 @@
+3
diff --git a/diff_general/test_data/4/run.sh b/diff_general/test_data/4/run.sh
@@ -0,0 +1 @@
+./fib 4
diff --git a/diff_general/test_data/5/input b/diff_general/test_data/5/input
diff --git a/diff_general/test_data/5/output b/diff_general/test_data/5/output
@@ -0,0 +1 @@
+5
diff --git a/diff_general/test_data/5/run.sh b/diff_general/test_data/5/run.sh
@@ -0,0 +1 @@
+./fib 5
diff --git a/diff_general/test_data/err/err b/diff_general/test_data/err/err
@@ -0,0 +1 @@
+Error: Insufficient arguments.
diff --git a/diff_general/test_data/err/input b/diff_general/test_data/err/input
diff --git a/diff_general/test_data/err/output b/diff_general/test_data/err/output
diff --git a/diff_general/test_generator.py b/diff_general/test_generator.py
@@ -0,0 +1,92 @@
+import unittest
+import os
+import os.path
+import subprocess32 as subprocess
+from subprocess32 import PIPE
+from gradescope_utils.autograder_utils.decorators import weight
+import yaml
+
+BASE_DIR = './test_data'
+
+
+class TestMetaclass(type):
+    """
+    Metaclass that allows generating tests based on a directory.
+    """
+    def __new__(cls, name, bases, attrs):
+        data_dir = attrs['data_dir']
+        attrs[cls.test_name(data_dir)] = cls.generate_test(data_dir)
+        return super(TestMetaclass, cls).__new__(cls, name, bases, attrs)
+
+    @classmethod
+    def generate_test(cls, dir_name):
+        """ Returns a testcase for the given directory """
+        command = cls.generate_command(dir_name)
+
+        def load_test_file(path):
+            full_path = os.path.join(BASE_DIR, dir_name, path)
+            if os.path.isfile(full_path):
+                with open(full_path) as f:
+                    return f.read()
+            return None
+
+        def load_settings():
+            settings_yml = load_test_file('settings.yml')
+
+            if settings_yml is not None:
+                return yaml.safe_load(settings_yml) or {}
+            else:
+                return {}
+
+        settings = load_settings()
+
+        @weight(settings.get('weight', 1))
+        def fn(self):
+            proc = subprocess.Popen(command, stdin=PIPE, stdout=PIPE, stderr=PIPE)
+            stdin = load_test_file('input')
+
+            output, err = proc.communicate(stdin, settings.get('timeout', 1))
+
+            expected_output = load_test_file('output')
+            expected_err = load_test_file('err')
+
+            msg = settings.get('msg', "Output did not match expected")
+            self.assertEqual(expected_output, output, msg=msg)
+            if expected_err is not None:
+                self.assertEqual(expected_err, err, msg=msg)
+        fn.__doc__ = 'Test {0}'.format(dir_name)
+        return fn
+
+    @staticmethod
+    def generate_command(dir_name):
+        """Generates the command passed to Popen"""
+        test_specific_script = os.path.join(BASE_DIR, dir_name, 'run.sh')
+        if os.path.isfile(test_specific_script):
+            return ["bash", test_specific_script]
+        return ["bash", "./run.sh"]
+
+    @staticmethod
+    def klass_name(dir_name):
+        return 'Test{0}'.format(''.join([x.capitalize() for x in dir_name.split('_')]))
+
+    @staticmethod
+    def test_name(dir_name):
+        return 'test_{0}'.format(dir_name)
+
+
+def build_test_class(data_dir):
+    klass = TestMetaclass(
+        TestMetaclass.klass_name(data_dir),
+        (unittest.TestCase,),
+        {
+            'data_dir': data_dir
+        }
+    )
+    return klass
+
+
+def find_data_directories():
+    return filter(
+        lambda x: os.path.isdir(os.path.join(BASE_DIR, x)),
+        os.listdir(BASE_DIR)
+    )