allenai · kernelmachine · Jul 27, 2020 · Jul 27, 2020 · Jul 27, 2020 · Jul 27, 2020
diff --git a/README.md b/README.md
@@ -15,6 +15,9 @@ If you use this repository for your research, please cite:
 }
 ```
 
+**NOTE**: This repository works with allennlp 1.0.0. 
+
+
 ## Generating expected validation curves
 
 If you are interested in plotting expected validation curves without using AllenTune, we've extracted the code for plotting here: https://github.com/dodgejesse/show_your_work
@@ -25,21 +28,13 @@ Run distributed, parallel hyperparameter search on GPUs or CPUs. See the [associ
 
 This library was inspired by [tuna](https://github.com/ChristophAlt/tuna), thanks to @ChristophAlt for the work!
 
-To get started, 
-
-1.  First install allentune with:
-
-    ```bash
-    pip install git+git://github.com/allenai/allennlp@27ebcf6ba3e02afe341a5e62cb1a7d5c6906c0c9
-    ```
-
-    Then, clone the `allentune` repository, cd into root folder, and run `pip install --editable .`
+To get started, clone the `allentune` repository, cd into root folder, and run `pip install --editable .`s  
 
-2.  Then, make sure all tests pass: 
+Then, make sure all tests pass: 
 
-    ```bash
-    pytest -v .
-    ```
+```bash
+pytest -v .
+```
 
 Now you can test your installation by running `allentune -h`.
 
@@ -80,7 +75,7 @@ allentune search \
     --cpus-per-trial 1 \
     --gpus-per-trial 1 \
     --search-space examples/search_space.json \
-    --num-samples 30 \
+    --num-samples 50 \
     --base-config examples/classifier.jsonnet
 ```
 
@@ -103,12 +98,39 @@ This command will generate a dataset of resulting hyperparameter assignments and
 allentune report \
     --log-dir logs/classifier_search/ \
     --performance-metric best_validation_accuracy \
-    --model cnn
+    --model "CNN Classifier"
 ```
 
 This command will create a file `results.jsonl` in `logs/classifier_search`. Each line has the hyperparameter assignments and resulting training metrics from each experiment of your search.
 
-`allentune report` will also tell you the currently best performing model, and the path to its serialization directory.
+`allentune report` will also tell you the currently best performing model, and the path to its serialization directory:
+
+```
+-------------------------  ----------------------------------------------------------------------------------------
+Model Name                 CNN Classifier                                                            
+Performance Metric         best_validation_accuracy                                                          
+Total Experiments          44
+Best Performance           0.8844
+Min Performance            0.8505 +- 0.08600000000000008
+Mean +- STD Performance    0.8088454545454546 +- 0.08974256581128731
+Median +- IQR Performance  0.8505 +- 0.08600000000000008
+Best Model Directory Path /home/suching/allentune/logs/classifier_search/run_18_2020-07-27_14-57-28lfw_dbkq/trial/
+------------------------- ----------------------------------------------------------------------------------------
+```
+
+
+
+## Merge multiple reports
+
+To merge the reports of multiple models, we've added a simple convenience command `merge`.
+
+The following command will merge the results of multiple runs into a single file `merged_results.jsonl` for further analysis.
+
+```bash
+allentune merge \
+    --input-files logs/classifier_1_search/results.jsonl logs/classifier_2_search/results.jsonl  \
+    --output-file merged_results.jsonl \
+```
 
 ## Plot expected performance
 

diff --git a/allentune/.DS_Store b/allentune/.DS_Store
diff --git a/allentune/commands/merge.py b/allentune/commands/merge.py
@@ -0,0 +1,48 @@
+import argparse
+import glob
+import json
+import logging
+import os
+import re
+import sys
+from collections import ChainMap
+from typing import Optional
+
+import pandas as pd
+
+from allentune.commands.subcommand import Subcommand
+
+logger = logging.getLogger(__name__)
+
+class Merge(Subcommand):
+    def add_subparser(self, name: str, parser: argparse._SubParsersAction) -> argparse.ArgumentParser:
+        subparser = parser.add_parser(
+                name, description="generate report from experiment", help='Generate a report from hyperparameter search experiments.')
+        subparser.add_argument(
+            "--input-files",
+            nargs="+",
+            type=str,
+            required=True,
+        )
+        subparser.add_argument(
+            '--output-file',
+            type=str,
+            required=True,
+        )
+        subparser.set_defaults(func=merge_reports)
+        return subparser
+
+def merge_reports(args: argparse.Namespace):
+    dfs = []
+    for file in args.input_files:
+        dfs.append(pd.read_json(file, lines=True))
+    master = pd.concat(dfs, 0)
+
+    try:
+        os.makedirs(os.path.dirname(args.output_file))
+    except FileExistsError:
+        logger.error(f"{args.output_file} already exists, aborting.")
+
+    master.to_json(args.output_file, lines=True, orient='records')
+
+    logger.info(f"Merged files in {args.output_file}.")
diff --git a/allentune/commands/report.py b/allentune/commands/report.py
@@ -9,6 +9,7 @@
 from typing import Optional
 
 import pandas as pd
+from tabulate import tabulate
 
 from allentune.commands.subcommand import Subcommand
 
@@ -63,7 +64,7 @@ def generate_report(args: argparse.Namespace):
 
     master_dicts = [dict(ChainMap(*item)) for item in master]
 
-    df = pd.io.json.json_normalize(master_dicts)
+    df = pd.json_normalize(master_dicts)
     try:
         df['training_duration'] = pd.to_timedelta(df['training_duration']).dt.total_seconds()
     except KeyError:
@@ -73,13 +74,26 @@ def generate_report(args: argparse.Namespace):
         df['model'] = args.model
     output_file = os.path.join(experiment_dir, "results.jsonl")
     df.to_json(output_file, lines=True, orient='records')
-    print("results written to {}".format(output_file))
-    print(f"total experiments: {df.shape[0]}")
-
+    logger.info("results written to {}".format(output_file))
     try:
-        best_experiment = df.iloc[df[args.performance_metric].idxmax()]
+        best_performance = df[args.performance_metric].max()
+        median_performance = df[args.performance_metric].median()
+        worst_performance = df[args.performance_metric].min()
+        mean_performance = df[args.performance_metric].mean()
+        std_performance = df[args.performance_metric].std()
+        iqr_performance = df[args.performance_metric].quantile(0.75) - df[args.performance_metric].quantile(0.25)
     except KeyError:
         logger.error(f"No performance metric {args.performance_metric} found in results of {args.log_dir}")
         sys.exit(0)
-    print(f"best model performance: {best_experiment[args.performance_metric]}")
-    print(f"best model directory path: {best_experiment['directory']}")
+    results = [
+     ["Model Name", args.model],
+     ["Performance Metric", args.performance_metric],
+     ['Total Experiments', f"{df.shape[0]}"],
+     ["Best Performance", f"{best_performance}"], 
+     ["Min Performance", f"{median_performance} +- {iqr_performance}"],
+     ["Mean +- STD Performance", f"{mean_performance} +- {std_performance}"],
+     ["Median +- IQR Performance", f"{median_performance} +- {iqr_performance}"],
+     ["Best Model Directory Path", f"{df.iloc[df[args.performance_metric].idxmax()]['directory']}"],
+     ]
+
+    logger.info('\n' + tabulate(results))
diff --git a/allentune/modules/ray_executor.py b/allentune/modules/ray_executor.py
@@ -28,15 +28,15 @@ def parse_search_config(self, search_config: Dict) -> Dict:
                 ray_sampler = val
             elif val['sampling strategy'] == 'loguniform':
                 low, high = val['bounds'][0], val['bounds'][1]
-                ray_sampler = function(RandomSearch.random_loguniform(low, high))
+                ray_sampler = RandomSearch.random_loguniform(low, high)
             elif val['sampling strategy'] == 'integer':
                 low, high = val['bounds'][0], val['bounds'][1]
-                ray_sampler = function(RandomSearch.random_integer(low, high))
+                ray_sampler = RandomSearch.random_integer(low, high)
             elif val['sampling strategy'] == 'choice':
-                ray_sampler = function(RandomSearch.random_choice(val['choices']))
+                ray_sampler = RandomSearch.random_choice(val['choices'])
             elif val['sampling strategy'] == 'uniform':
                 low, high = val['bounds'][0], val['bounds'][1]
-                ray_sampler = function(RandomSearch.random_uniform(low, high))
+                ray_sampler = RandomSearch.random_uniform(low, high)
             else:
                 raise KeyError(f"sampling strategy {val['sampling strategy']} does not exist")
             search_config[hyperparameter] = ray_sampler

diff --git a/allentune/util/__pycache__/random_search.cpython-37.pyc b/allentune/util/__pycache__/random_search.cpython-37.pyc
diff --git a/allentune/util/random_search.py b/allentune/util/random_search.py
@@ -75,8 +75,8 @@ def __init__(self, **kwargs):
             self.search_space[key] = val
 
     def parse(self, val: Any):
-        if isinstance(val, ray.tune.suggest.variant_generator.function):
-            val = val.func()
+        if isinstance(val, type(lambda x: x)):
+            val = val()
             if isinstance(val, (int, np.int)):
                 return int(val)
             elif isinstance(val, (float, np.float)):

diff --git a/classifier_performance.pdf b/classifier_performance.pdf
diff --git a/examples/classifier.jsonnet b/examples/classifier.jsonnet
@@ -59,7 +59,7 @@ local TOKEN_EMBEDDERS = GLOVE_FIELDS(GLOVE_TRAINABLE)['glove_embedder'];
 
 local EMBEDDING_DIM = GLOVE_FIELDS(GLOVE_TRAINABLE)['embedding_dim'];
 
-local ENCODER = CNN_FIELDS(std.parseInt(std.extVar("MAX_FILTER_SIZE")), EMBEDDING_DIM, std.parseInt(std.extVar("HIDDEN_SIZE")), std.extVar("NUM_FILTERS"));
+local ENCODER = CNN_FIELDS(std.parseInt(std.extVar("MAX_FILTER_SIZE")), EMBEDDING_DIM, std.parseInt(std.extVar("HIDDEN_SIZE")), std.parseInt(std.extVar("NUM_FILTERS")));
 
 local OUTPUT_LAYER_DIM = std.parseInt(std.extVar("HIDDEN_SIZE"));
 
@@ -71,10 +71,8 @@ local OUTPUT_LAYER_HIDDEN_DIM = if std.parseInt(std.extVar("NUM_OUTPUT_LAYERS"))
 local BASE_READER(TOKEN_INDEXERS) = {
   "lazy": false,
   "type": "text_classification_json",
-  "tokenizer": {
-    "word_splitter": "just_spaces",
-  },
   "token_indexers": TOKEN_INDEXERS,
+  "max_sequence_length": 400
 };
 
 {
@@ -94,27 +92,24 @@ local BASE_READER(TOKEN_INDEXERS) = {
         "token_embedders": TOKEN_EMBEDDERS
       },
       "seq2vec_encoder": ENCODER, 
-      "dropout": DROPOUT
+      "dropout": std.parseJson(DROPOUT)
    },	
-    "iterator": {
-      "batch_size": BATCH_SIZE,
-      "type": "basic"
-   },
+    "data_loader": {
+        "batch_sampler": {
+            "type": "basic",
+            "sampler": "sequential",
+            "batch_size": BATCH_SIZE,
+            "drop_last": false
+        }
+    },
 
    "trainer": {
       "cuda_device": CUDA_DEVICE,
       "num_epochs": NUM_EPOCHS,
       "optimizer": {
-         "lr": LEARNING_RATE,
+         "lr": std.parseJson(LEARNING_RATE),
          "type": "adam"
       },
-      "learning_rate_scheduler": {
-          "type": "reduce_on_plateau",
-          "factor": 0.5, 
-          "patience": 2
-      },
-      "patience": 10,
-      "num_serialized_models_to_keep": 1,
       "validation_metric": "+accuracy"
    }
 }
diff --git a/figs/classifier_performance.png b/figs/classifier_performance.png
diff --git a/requirements.txt b/requirements.txt
@@ -1,9 +1,10 @@
 # External dependencies
-ray==0.6.2
-git+git://github.com/allenai/allennlp@27ebcf6ba3e02afe341a5e62cb1a7d5c6906c0c9
+ray==0.8.6
 seaborn
 pandas
 torch
+allennlp==1.0.0
+
 
 # Testing
 pytest

diff --git a/setup.py b/setup.py
@@ -34,7 +34,9 @@
     license="Apache",
     url="https://github.com/kernelmachine/allentune",
     packages=find_packages(exclude=["*.tests", "*.tests.*", "tests.*", "tests"]),
-    install_requires=["ray==0.6.2",
+    install_requires=["ray==0.8.6",
+                      "allennlp==1.0.0",
+                      "tabulate",
                       "seaborn",
                       "pandas",
                       "pytest",

diff --git a/tests/fixtures/classifier.jsonnet b/tests/fixtures/classifier.jsonnet
@@ -4,9 +4,9 @@ local USE_LAZY_DATASET_READER = std.parseInt(std.extVar("LAZY_DATASET_READER"))
 local CUDA_DEVICE = std.parseInt(std.extVar("CUDA_DEVICE"));
 
 // Paths to data.
-local TRAIN_PATH = "/Users/suching/Github/allentune/tests/fixtures/imdb/train.jsonl";
-local DEV_PATH = "/Users/suching/Github/allentune/tests/fixtures/imdb/dev.jsonl";
-local TEST_PATH = "/Users/suching/Github/allentune/tests/fixtures/imdb/test.jsonl";
+local TRAIN_PATH = "tests/fixtures/imdb/train.jsonl";
+local DEV_PATH = "tests/fixtures/imdb/dev.jsonl";
+local TEST_PATH = "tests/fixtures/imdb/test.jsonl";
 
 // learning rate of overall model.
 local LEARNING_RATE = std.extVar("LEARNING_RATE");