From 23176d3e42ddb5a894a1214574780479b7b8ebe2 Mon Sep 17 00:00:00 2001
From: Tanay Soni <tanaysoni12@gmail.com>
Date: Fri, 4 Oct 2019 12:26:28 +0200
Subject: [PATCH 01/11] Add method to run inference from a file

---
 docs/examples.rst                         |  2 +-
 examples/doc_classification.py            |  2 +-
 examples/doc_classification_multilabel.py |  2 +-
 examples/doc_regression.py                |  2 +-
 examples/ner.py                           |  2 +-
 examples/question_answering.py            |  2 +-
 farm/data_handler/data_silo.py            |  2 +-
 farm/data_handler/processor.py            | 19 ++---
 farm/infer.py                             | 97 ++++++++++++++++++-----
 test/test_doc_classification.py           |  2 +-
 test/test_doc_regression.py               |  2 +-
 test/test_ner.py                          |  2 +-
 test/test_question_answering.py           |  2 +-
 13 files changed, 97 insertions(+), 41 deletions(-)

diff --git a/docs/examples.rst b/docs/examples.rst
index ef41dd644..76959b2c1 100644
--- a/docs/examples.rst
+++ b/docs/examples.rst
@@ -79,5 +79,5 @@ b) and a prediction head on top that is suited for our task => Text classificati
         {"text": "Martin Müller spielt Fussball"},
     ]
     model = Inferencer(save_dir)
-    result = model.run_inference(dicts=basic_texts)
+    result = model.inference_from_dicts(dicts=basic_texts)
     print(result)
diff --git a/examples/doc_classification.py b/examples/doc_classification.py
index 8b84f8c1c..c991ae5a6 100644
--- a/examples/doc_classification.py
+++ b/examples/doc_classification.py
@@ -102,7 +102,7 @@
     {"text": "Martin Müller spielt Handball in Berlin"},
 ]
 model = Inferencer.load(save_dir)
-result = model.run_inference(dicts=basic_texts)
+result = model.inference_from_dicts(dicts=basic_texts)
 print(result)
 
 # fmt: on
diff --git a/examples/doc_classification_multilabel.py b/examples/doc_classification_multilabel.py
index 3962052b6..b35c73e9d 100644
--- a/examples/doc_classification_multilabel.py
+++ b/examples/doc_classification_multilabel.py
@@ -106,7 +106,7 @@
     {"text": "What a lovely world"},
 ]
 model = Inferencer.load(save_dir)
-result = model.run_inference(dicts=basic_texts)
+result = model.inference_from_dicts(dicts=basic_texts)
 print(result)
 
 
diff --git a/examples/doc_regression.py b/examples/doc_regression.py
index d2a3078c2..c2b8903ce 100644
--- a/examples/doc_regression.py
+++ b/examples/doc_regression.py
@@ -94,7 +94,7 @@
     {"text": ""},
 ]
 model = Inferencer.load(save_dir)
-result = model.run_inference(dicts=basic_texts)
+result = model.inference_from_dicts(dicts=basic_texts)
 
 print(result)
 
diff --git a/examples/ner.py b/examples/ner.py
index 1d02eb8fc..fe4c528dc 100644
--- a/examples/ner.py
+++ b/examples/ner.py
@@ -96,5 +96,5 @@
     {"text": "Martin Müller spielt Handball in Berlin"},
 ]
 model = Inferencer.load(save_dir)
-result = model.run_inference(dicts=basic_texts)
+result = model.inference_from_dicts(dicts=basic_texts)
 print(result)
diff --git a/examples/question_answering.py b/examples/question_answering.py
index ae61dcb2c..f6b12c0fe 100644
--- a/examples/question_answering.py
+++ b/examples/question_answering.py
@@ -104,7 +104,7 @@
         }]
 
 model = Inferencer.load(save_dir)
-result = model.run_inference(dicts=QA_input)
+result = model.inference_from_dicts(dicts=QA_input)
 
 for x in result:
     pprint.pprint(x)
diff --git a/farm/data_handler/data_silo.py b/farm/data_handler/data_silo.py
index 256b91bc4..5b18d9b75 100644
--- a/farm/data_handler/data_silo.py
+++ b/farm/data_handler/data_silo.py
@@ -56,7 +56,7 @@ def _multiproc(cls, chunk, processor):
         return dataset
 
     def _get_dataset(self, filename):
-        dicts = self.processor._file_to_dicts(filename)
+        dicts = self.processor.file_to_dicts(filename)
         #shuffle list of dicts here if we later want to have a random dev set splitted from train set
         if filename == self.processor.train_filename:
             if not self.processor.dev_filename:
diff --git a/farm/data_handler/processor.py b/farm/data_handler/processor.py
index 642a600db..7cc301788 100644
--- a/farm/data_handler/processor.py
+++ b/farm/data_handler/processor.py
@@ -4,6 +4,7 @@
 import random
 import logging
 import json
+import time
 import inspect
 from inspect import signature
 import numpy as np
@@ -41,7 +42,7 @@ class Processor(ABC):
     """
     Is used to generate PyTorch Datasets from input data. An implementation of this abstract class should be created
     for each new data source.
-    Implement the abstract methods: _file_to_dicts(), _dict_to_samples(), _sample_to_features()
+    Implement the abstract methods: file_to_dicts(), _dict_to_samples(), _sample_to_features()
     to be compatible with your data format
     """
 
@@ -236,7 +237,7 @@ def add_task(self, name,  metric, label_list, label_column_name=None, label_name
         }
 
     @abc.abstractmethod
-    def _file_to_dicts(self, file: str) -> [dict]:
+    def file_to_dicts(self, file: str) -> [dict]:
         raise NotImplementedError()
 
     @abc.abstractmethod
@@ -248,7 +249,7 @@ def _sample_to_features(cls, sample: Sample) -> dict:
         raise NotImplementedError()
 
     def _init_baskets_from_file(self, file):
-        dicts = self._file_to_dicts(file)
+        dicts = self.file_to_dicts(file)
         dataset_name = os.path.splitext(os.path.basename(file))[0]
         baskets = [
             SampleBasket(raw=tr, id=f"{dataset_name}-{i}") for i, tr in enumerate(dicts)
@@ -406,7 +407,7 @@ def __init__(
                           label_column_name=label_column_name,
                           task_type=task_type)
 
-    def _file_to_dicts(self, file: str) -> [dict]:
+    def file_to_dicts(self, file: str) -> [dict]:
         column_mapping = {task["label_column_name"]: task["label_name"] for task in self.tasks.values()}
         dicts = read_tsv(
             filename=file,
@@ -497,7 +498,7 @@ def load_from_dir(cls, load_dir):
         return processor
 
 
-    def _file_to_dicts(self, file: str) -> [dict]:
+    def file_to_dicts(self, file: str) -> [dict]:
       raise NotImplementedError
 
     def _dict_to_samples(self, dictionary: dict, **kwargs) -> [Sample]:
@@ -554,7 +555,7 @@ def __init__(
         if metric and label_list:
             self.add_task("ner", metric, label_list)
 
-    def _file_to_dicts(self, file: str) -> [dict]:
+    def file_to_dicts(self, file: str) -> [dict]:
         dicts = read_ner_file(filename=file, sep=self.delimiter)
         return dicts
 
@@ -616,7 +617,7 @@ def __init__(
             self.add_task("nextsentence", "acc", ["False", "True"])
 
 
-    def _file_to_dicts(self, file: str) -> list:
+    def file_to_dicts(self, file: str) -> list:
         dicts = read_docs_from_txt(filename=file, delimiter=self.delimiter, max_docs=self.max_docs)
         return dicts
 
@@ -750,7 +751,7 @@ def _convert_inference(self, infer_dict):
         ]
         return converted
 
-    def _file_to_dicts(self, file: str) -> [dict]:
+    def file_to_dicts(self, file: str) -> [dict]:
         dict = read_squad_file(filename=file)
         return dict
 
@@ -822,7 +823,7 @@ def __init__(
         self.add_task(name="regression", metric="mse", label_list= [scaler_mean, scaler_scale], label_column_name=label_column_name, task_type="regression", label_name=label_name)
 
 
-    def _file_to_dicts(self, file: str) -> [dict]:
+    def file_to_dicts(self, file: str) -> [dict]:
         column_mapping = {task["label_column_name"]: task["label_name"] for task in self.tasks.values()}
         dicts = read_tsv(
             rename_columns=column_mapping,
diff --git a/farm/infer.py b/farm/infer.py
index b4a5b74ba..9e0b71feb 100644
--- a/farm/infer.py
+++ b/farm/infer.py
@@ -1,8 +1,14 @@
 import os
 import torch
 import logging
+import multiprocessing as mp
+from contextlib import ExitStack
+from functools import partial
 
+
+from torch.utils.data import ConcatDataset
 from torch.utils.data.sampler import SequentialSampler
+from tqdm import tqdm
 
 from farm.data_handler.dataloader import NamedDataLoader
 from farm.modeling.adaptive_model import AdaptiveModel
@@ -10,6 +16,7 @@
 from farm.utils import initialize_device_settings
 from farm.data_handler.processor import Processor, InferenceProcessor
 from farm.utils import set_all_seeds
+from farm.utils import log_ascii_workers
 
 
 logger = logging.getLogger(__name__)
@@ -29,13 +36,14 @@ class Inferencer:
            {"text": "Martin Müller spielt Handball in Berlin"},
        ]
        model = Inferencer.load(your_model_dir)
-       model.run_inference(dicts=basic_texts)
+       model.inference_from_dicts(dicts=basic_texts)
        # LM embeddings
        model.extract_vectors(dicts=basic_texts)
 
     """
 
-    def __init__(self, model, processor, batch_size=4, gpu=False, name=None, return_class_probs=False):
+    def __init__(self, model, processor, batch_size=4, gpu=False, name=None, return_class_probs=False,
+                 multiprocessing_chunk_size=100):
         """
         Initializes inferencer from an AdaptiveModel and a Processor instance.
 
@@ -75,6 +83,7 @@ def __init__(self, model, processor, batch_size=4, gpu=False, name=None, return_
         #     raise NotImplementedError("A model with multiple prediction heads is currently not supported by the Inferencer")
         self.name = name if name != None else f"anonymous-{self.prediction_type}"
         self.return_class_probs = return_class_probs
+        self.multiprocessing_chunk_size = multiprocessing_chunk_size
 
         model.connect_heads_with_processor(processor.tasks, require_labels=False)
         set_all_seeds(42, n_gpu)
@@ -110,27 +119,48 @@ def load(cls, load_dir, batch_size=4, gpu=False, embedder_only=False, return_cla
         name = os.path.basename(load_dir)
         return cls(model, processor, batch_size=batch_size, gpu=gpu, name=name, return_class_probs=return_class_probs)
 
-    def run_inference(self, dicts):
-        """
-        Runs down-stream inference using the prediction head.
+    def inference_from_file(self, file):
+        dicts = self.processor.file_to_dicts(file)
 
-        :param dicts: Samples to run inference on provided as a list of dicts. One dict per sample.
-        :type dicst: [dict]
-        :return: dict of predictions
+        dict_batches_to_process = int(len(dicts) / self.multiprocessing_chunk_size)
+        num_cpus = min(mp.cpu_count(), dict_batches_to_process) or 1
 
-        """
-        if self.prediction_type == "embedder":
-            raise TypeError(
-                "You have called run_inference for a model without any prediction head! "
-                "If you want to: "
-                "a) ... extract vectors from the language model: call `Inferencer.extract_vectors(...)`"
-                f"b) ... run inference on a downstream task: make sure your model path {self.name} contains a saved prediction head"
+        with ExitStack() as stack:
+            p = stack.enter_context(mp.Pool(processes=num_cpus))
+
+            logger.info(
+                f"Got ya {num_cpus} parallel workers to do inference on {len(dicts)}dicts (chunksize = {self.multiprocessing_chunk_size})..."
             )
-        dataset, tensor_names = self.processor.dataset_from_dicts(dicts, from_inference=True)
+            log_ascii_workers(num_cpus, logger)
+
+            results = p.imap(
+                partial(self._multiproc_dict_to_samples, processor=self.processor),
+                dicts,
+                chunksize=1,
+            )
+
+            samples = []
+            datasets = []
+            for dataset, tensor_names, sample in tqdm(results, total=dict_batches_to_process):
+                datasets.append(dataset)
+                samples.extend(sample)
+
+            concat_datasets = ConcatDataset(datasets)
+
+        preds_all = self._run_inference(concat_datasets, tensor_names, samples)
+        return preds_all
+
+    @classmethod
+    def _multiproc_dict_to_samples(cls, dicts, processor):
+        dicts_list = [dicts]
+        dataset, tensor_names = processor.dataset_from_dicts(dicts_list, from_inference=True)
         samples = []
-        for dict in dicts:
-            samples.extend(self.processor._dict_to_samples(dict))
+        for d in dicts_list:
+            samples.extend(processor._dict_to_samples(d))
+        
+        return dataset, tensor_names, samples
 
+    def _run_inference(self, dataset, tensor_names, samples):
         data_loader = NamedDataLoader(
             dataset=dataset,
             sampler=SequentialSampler(dataset),
@@ -141,7 +171,7 @@ def run_inference(self, dicts):
         preds_all = []
         for i, batch in enumerate(data_loader):
             batch = {key: batch[key].to(self.device) for key in batch}
-            batch_samples = samples[i * self.batch_size : (i + 1) * self.batch_size]
+            batch_samples = samples[i * self.batch_size: (i + 1) * self.batch_size]
             with torch.no_grad():
                 logits = self.model.forward(**batch)
                 preds = self.model.formatted_preds(
@@ -155,6 +185,31 @@ def run_inference(self, dicts):
 
         return preds_all
 
+    def inference_from_dicts(self, dicts):
+        """
+        Runs down-stream inference using the prediction head.
+
+        :param dicts: Samples to run inference on provided as a list of dicts. One dict per sample.
+        :type dicts: [dict]
+        :return: dict of predictions
+
+        """
+        if self.prediction_type == "embedder":
+            raise TypeError(
+                "You have called inference_from_dicts for a model without any prediction head! "
+                "If you want to: "
+                "a) ... extract vectors from the language model: call `Inferencer.extract_vectors(...)`"
+                f"b) ... run inference on a downstream task: make sure your model path {self.name} contains a saved prediction head"
+            )
+        dataset, tensor_names = self.processor.dataset_from_dicts(dicts, from_inference=True)
+        samples = []
+        for dict in dicts:
+            samples.extend(self.processor._dict_to_samples(dict))
+
+        preds_all = self._run_inference(dataset, tensor_names, samples)
+
+        return preds_all
+
     def extract_vectors(
         self, dicts, extraction_strategy="cls_token", extraction_layer=-1
     ):
@@ -163,8 +218,8 @@ def extract_vectors(
 
         :param dicts: Samples to run inference on provided as a list of dicts. One dict per sample.
         :type dicts: [dict]
-        :param extraction_strategy: Strategy to extract vectors. Choices: 'cls_token' (sentence vector),
-        'reduce_mean' (sentence vector), reduce_max (sentence vector), 'per_token' (individual token vectors)
+        :param extraction_strategy: Strategy to extract vectors. Choices: 'cls_token' (sentence vector), 'reduce_mean'
+                               (sentence vector), reduce_max (sentence vector), 'per_token' (individual token vectors)
         :type extraction_strategy: str
         :param extraction_layer: number of layer from which the embeddings shall be extracted. Default: -1 (very last layer).
         :type: int
diff --git a/test/test_doc_classification.py b/test/test_doc_classification.py
index c51484b54..dd63fe3bd 100644
--- a/test/test_doc_classification.py
+++ b/test/test_doc_classification.py
@@ -79,7 +79,7 @@ def test_doc_classification(caplog):
 
 
     inf = Inferencer.load(save_dir,batch_size=2)
-    result = inf.run_inference(dicts=basic_texts)
+    result = inf.inference_from_dicts(dicts=basic_texts)
     assert isinstance(result[0]["predictions"][0]["probability"],np.float32)
 
 
diff --git a/test/test_doc_regression.py b/test/test_doc_regression.py
index 4fe37ffee..604e798c8 100644
--- a/test/test_doc_regression.py
+++ b/test/test_doc_regression.py
@@ -75,7 +75,7 @@ def test_doc_regression(caplog):
     ]
 
     model = Inferencer.load(save_dir)
-    result = model.run_inference(dicts=basic_texts)
+    result = model.inference_from_dicts(dicts=basic_texts)
     assert isinstance(result[0]["predictions"][0]["pred"], np.float32)
 
 if(__name__=="__main__"):
diff --git a/test/test_ner.py b/test/test_ner.py
index d2353321e..6f270698e 100644
--- a/test/test_ner.py
+++ b/test/test_ner.py
@@ -75,7 +75,7 @@ def test_ner(caplog):
         {"text": "Schartau sagte dem Tagesspiegel, dass Fischer ein Idiot sei"},
     ]
     model = Inferencer.load(save_dir)
-    result = model.run_inference(dicts=basic_texts)
+    result = model.inference_from_dicts(dicts=basic_texts)
     assert result[0]["predictions"][0]["context"] == "sagte"
     assert isinstance(result[0]["predictions"][0]["probability"], np.float32)
 
diff --git a/test/test_question_answering.py b/test/test_question_answering.py
index 01fe2e387..60377e7cc 100644
--- a/test/test_question_answering.py
+++ b/test/test_question_answering.py
@@ -78,7 +78,7 @@ def test_qa(caplog):
     ]
 
     model = Inferencer.load(save_dir)
-    result = model.run_inference(dicts=QA_input)
+    result = model.inference_from_dicts(dicts=QA_input)
     assert isinstance(result[0]["predictions"][0]["end"],int)
 
 if(__name__=="__main__"):

From 79eb0b17c2713323b6ddafb0537249c3af8ddb18 Mon Sep 17 00:00:00 2001
From: Tanay Soni <tanaysoni12@gmail.com>
Date: Fri, 4 Oct 2019 18:11:05 +0200
Subject: [PATCH 02/11] Process preds chunkwise to avoid memory issues for
 large datasets

---
 farm/infer.py | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/farm/infer.py b/farm/infer.py
index 9e0b71feb..43e6ee849 100644
--- a/farm/infer.py
+++ b/farm/infer.py
@@ -5,8 +5,6 @@
 from contextlib import ExitStack
 from functools import partial
 
-
-from torch.utils.data import ConcatDataset
 from torch.utils.data.sampler import SequentialSampler
 from tqdm import tqdm
 
@@ -136,18 +134,13 @@ def inference_from_file(self, file):
             results = p.imap(
                 partial(self._multiproc_dict_to_samples, processor=self.processor),
                 dicts,
-                chunksize=1,
+                chunksize=self.multiprocessing_chunk_size,
             )
 
-            samples = []
-            datasets = []
+            preds_all = []
             for dataset, tensor_names, sample in tqdm(results, total=dict_batches_to_process):
-                datasets.append(dataset)
-                samples.extend(sample)
-
-            concat_datasets = ConcatDataset(datasets)
+                preds_all.append(self._run_inference(dataset, tensor_names, sample))
 
-        preds_all = self._run_inference(concat_datasets, tensor_names, samples)
         return preds_all
 
     @classmethod

From c98c983d57bc2eae94125332acb9f17d1efdf98f Mon Sep 17 00:00:00 2001
From: Tanay Soni <tanaysoni12@gmail.com>
Date: Thu, 10 Oct 2019 15:20:03 +0200
Subject: [PATCH 03/11] Rename params

---
 farm/data_handler/processor.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/farm/data_handler/processor.py b/farm/data_handler/processor.py
index 7cc301788..01a1721ce 100644
--- a/farm/data_handler/processor.py
+++ b/farm/data_handler/processor.py
@@ -717,10 +717,10 @@ def __init__(
         if metric and labels:
             self.add_task("question_answering", metric, labels)
 
-    def dataset_from_dicts(self, dicts, index=None, from_inference=False):
-        if(from_inference):
-            dicts = [self._convert_inference(x) for x in dicts]
-        if(from_inference):
+    def dataset_from_dicts(self, dicts, index=None, rest_api_schema=False):
+        if rest_api_schema:
+            dicts = [self._convert_rest_api_dict(x) for x in dicts]
+        if rest_api_schema:
             id_prefix = "infer"
         else:
             id_prefix = "train"
@@ -735,7 +735,7 @@ def dataset_from_dicts(self, dicts, index=None, from_inference=False):
         dataset, tensor_names = self._create_dataset()
         return dataset, tensor_names
 
-    def _convert_inference(self, infer_dict):
+    def _convert_rest_api_dict(self, infer_dict):
         # convert input coming from inferencer to SQuAD format
         converted = {}
         converted["paragraphs"] = [
@@ -757,7 +757,7 @@ def file_to_dicts(self, file: str) -> [dict]:
 
     def _dict_to_samples(self, dictionary: dict, **kwargs) -> [Sample]:
         if "paragraphs" not in dictionary:  # TODO change this inference mode hack
-            dictionary = self._convert_inference(infer_dict=dictionary)
+            dictionary = self._convert_rest_api_dict(infer_dict=dictionary)
         samples = create_samples_squad(entry=dictionary)
         for sample in samples:
             tokenized = tokenize_with_metadata(

From bd0ebbed2c31b377087c5e4525f91e390a018dff Mon Sep 17 00:00:00 2001
From: Tanay Soni <tanaysoni12@gmail.com>
Date: Thu, 10 Oct 2019 15:20:53 +0200
Subject: [PATCH 04/11] Adjust progress bar range

---
 farm/infer.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/farm/infer.py b/farm/infer.py
index 43e6ee849..e600d1c74 100644
--- a/farm/infer.py
+++ b/farm/infer.py
@@ -138,15 +138,17 @@ def inference_from_file(self, file):
             )
 
             preds_all = []
-            for dataset, tensor_names, sample in tqdm(results, total=dict_batches_to_process):
-                preds_all.append(self._run_inference(dataset, tensor_names, sample))
+            with tqdm(total=len(dicts), unit=' Dicts') as pbar:
+                for dataset, tensor_names, sample in results:
+                    preds_all.append(self._run_inference(dataset, tensor_names, sample))
+                    pbar.update(self.multiprocessing_chunk_size)
 
         return preds_all
 
     @classmethod
     def _multiproc_dict_to_samples(cls, dicts, processor):
         dicts_list = [dicts]
-        dataset, tensor_names = processor.dataset_from_dicts(dicts_list, from_inference=True)
+        dataset, tensor_names = processor.dataset_from_dicts(dicts_list)
         samples = []
         for d in dicts_list:
             samples.extend(processor._dict_to_samples(d))

From 17bb8b6b40279a1bf86ed768e9f89327ee0dd17b Mon Sep 17 00:00:00 2001
From: Tanay Soni <tanaysoni12@gmail.com>
Date: Thu, 10 Oct 2019 16:46:23 +0200
Subject: [PATCH 05/11] Rename param

---
 examples/question_answering.py | 164 ++++++++++++++++-----------------
 farm/data_handler/processor.py |   4 +-
 farm/infer.py                  |   4 +-
 3 files changed, 86 insertions(+), 86 deletions(-)

diff --git a/examples/question_answering.py b/examples/question_answering.py
index f6b12c0fe..69017d0bf 100644
--- a/examples/question_answering.py
+++ b/examples/question_answering.py
@@ -22,89 +22,89 @@
 ml_logger = MLFlowLogger(tracking_uri="https://public-mlflow.deepset.ai/")
 ml_logger.init_experiment(experiment_name="Public_FARM", run_name="Run_question_answering")
 
-##########################
-########## Settings
-##########################
-set_all_seeds(seed=42)
-device, n_gpu = initialize_device_settings(use_cuda=True)
-batch_size = 24
-n_epochs = 2
-evaluate_every = 500
-base_LM_model = "bert-base-cased"
-train_filename="train-v2.0.json"
-dev_filename="dev-v2.0.json"
-
-# 1.Create a tokenizer
-tokenizer = BertTokenizer.from_pretrained(
-    pretrained_model_name_or_path=base_LM_model, do_lower_case=False
-)
-# 2. Create a DataProcessor that handles all the conversion from raw text into a pytorch Dataset
-label_list = ["start_token", "end_token"]
-metric = "squad"
-processor = SquadProcessor(
-    tokenizer=tokenizer,
-    max_seq_len=256,
-    labels=label_list,
-    metric=metric,
-    train_filename=train_filename,
-    dev_filename=dev_filename,
-    test_filename=None,
-    data_dir="../data/squad20",
-)
-
-
-# 3. Create a DataSilo that loads several datasets (train/dev/test), provides DataLoaders for them and calculates a few descriptive statistics of our datasets
-data_silo = DataSilo(processor=processor, batch_size=batch_size, distributed=False)
-
-# 4. Create an AdaptiveModel
-# a) which consists of a pretrained language model as a basis
-language_model = Bert.load(base_LM_model)
-# b) and a prediction head on top that is suited for our task => Question Answering
-prediction_head = QuestionAnsweringHead(layer_dims=[768, len(label_list)])
-
-model = AdaptiveModel(
-    language_model=language_model,
-    prediction_heads=[prediction_head],
-    embeds_dropout_prob=0.1,
-    lm_output_types=["per_token"],
-    device=device,
-)
-
-# 5. Create an optimizer
-optimizer, warmup_linear = initialize_optimizer(
-    model=model,
-    learning_rate=1e-5,
-    warmup_proportion=0.2,
-    n_batches=len(data_silo.loaders["train"]),
-    n_epochs=n_epochs,
-)
-# 6. Feed everything to the Trainer, which keeps care of growing our model and evaluates it from time to time
-trainer = Trainer(
-    optimizer=optimizer,
-    data_silo=data_silo,
-    epochs=n_epochs,
-    n_gpu=n_gpu,
-    warmup_linear=warmup_linear,
-    evaluate_every=evaluate_every,
-    device=device,
-)
-# 7. Let it grow! Watch the tracked metrics live on the public mlflow server: https://public-mlflow.deepset.ai
-model = trainer.train(model)
-
-# 8. Hooray! You have a model. Store it:
-save_dir = "../saved_models/bert-english-qa-tutorial"
-model.save(save_dir)
-processor.save(save_dir)
-
-# 9. Load it & harvest your fruits (Inference)
-QA_input = [
-        {
-            "questions": ["Who counted the game among the best ever made?"],
-            "text":  "Twilight Princess was released to universal critical acclaim and commercial success. It received perfect scores from major publications such as 1UP.com, Computer and Video Games, Electronic Gaming Monthly, Game Informer, GamesRadar, and GameSpy. On the review aggregators GameRankings and Metacritic, Twilight Princess has average scores of 95% and 95 for the Wii version and scores of 95% and 96 for the GameCube version. GameTrailers in their review called it one of the greatest games ever created."
-        }]
-
+# ##########################
+# ########## Settings
+# ##########################
+# set_all_seeds(seed=42)
+# device, n_gpu = initialize_device_settings(use_cuda=True)
+# batch_size = 24
+# n_epochs = 2
+# evaluate_every = 500
+# base_LM_model = "bert-base-cased"
+# train_filename="train-v2.0.json"
+# dev_filename="dev-v2.0.json"
+#
+# # 1.Create a tokenizer
+# tokenizer = BertTokenizer.from_pretrained(
+#     pretrained_model_name_or_path=base_LM_model, do_lower_case=False
+# )
+# # 2. Create a DataProcessor that handles all the conversion from raw text into a pytorch Dataset
+# label_list = ["start_token", "end_token"]
+# metric = "squad"
+# processor = SquadProcessor(
+#     tokenizer=tokenizer,
+#     max_seq_len=256,
+#     labels=label_list,
+#     metric=metric,
+#     train_filename=train_filename,
+#     dev_filename=dev_filename,
+#     test_filename=None,
+#     data_dir="../data/squad20",
+# )
+#
+#
+# # 3. Create a DataSilo that loads several datasets (train/dev/test), provides DataLoaders for them and calculates a few descriptive statistics of our datasets
+# data_silo = DataSilo(processor=processor, batch_size=batch_size, distributed=False)
+#
+# # 4. Create an AdaptiveModel
+# # a) which consists of a pretrained language model as a basis
+# language_model = Bert.load(base_LM_model)
+# # b) and a prediction head on top that is suited for our task => Question Answering
+# prediction_head = QuestionAnsweringHead(layer_dims=[768, len(label_list)])
+#
+# model = AdaptiveModel(
+#     language_model=language_model,
+#     prediction_heads=[prediction_head],
+#     embeds_dropout_prob=0.1,
+#     lm_output_types=["per_token"],
+#     device=device,
+# )
+#
+# # 5. Create an optimizer
+# optimizer, warmup_linear = initialize_optimizer(
+#     model=model,
+#     learning_rate=1e-5,
+#     warmup_proportion=0.2,
+#     n_batches=len(data_silo.loaders["train"]),
+#     n_epochs=n_epochs,
+# )
+# # 6. Feed everything to the Trainer, which keeps care of growing our model and evaluates it from time to time
+# trainer = Trainer(
+#     optimizer=optimizer,
+#     data_silo=data_silo,
+#     epochs=n_epochs,
+#     n_gpu=n_gpu,
+#     warmup_linear=warmup_linear,
+#     evaluate_every=evaluate_every,
+#     device=device,
+# )
+# # 7. Let it grow! Watch the tracked metrics live on the public mlflow server: https://public-mlflow.deepset.ai
+# model = trainer.train(model)
+#
+# # 8. Hooray! You have a model. Store it:
+# save_dir = "../saved_models/bert-english-qa-tutorial"
+# model.save(save_dir)
+# processor.save(save_dir)
+#
+# # 9. Load it & harvest your fruits (Inference)
+# QA_input = [
+#         {
+#             "questions": ["Who counted the game among the best ever made?"],
+#             "text":  "Twilight Princess was released to universal critical acclaim and commercial success. It received perfect scores from major publications such as 1UP.com, Computer and Video Games, Electronic Gaming Monthly, Game Informer, GamesRadar, and GameSpy. On the review aggregators GameRankings and Metacritic, Twilight Princess has average scores of 95% and 95 for the Wii version and scores of 95% and 96 for the GameCube version. GameTrailers in their review called it one of the greatest games ever created."
+#         }]
+save_dir = "base_models/bert-base-cased-english-SQUAD20"
 model = Inferencer.load(save_dir)
-result = model.inference_from_dicts(dicts=QA_input)
+result = model.inference_from_file(file="/Users/tanay/data/squad20/dev-v2.0.json")
 
 for x in result:
     pprint.pprint(x)
diff --git a/farm/data_handler/processor.py b/farm/data_handler/processor.py
index 01a1721ce..0a6b2df2c 100644
--- a/farm/data_handler/processor.py
+++ b/farm/data_handler/processor.py
@@ -308,8 +308,8 @@ def _create_dataset(self, keep_baskets=False):
     #     dataset, tensor_names = self._create_dataset()
     #     return dataset, tensor_names
 
-    #TODO remove useless from_inference flag after refactoring squad processing
-    def dataset_from_dicts(self, dicts, index=None, from_inference=False):
+    #TODO remove useless rest_api_schema flag after refactoring squad processing
+    def dataset_from_dicts(self, dicts, index=None, rest_api_schema=False):
         """
         Contains all the functionality to turn a list of dict objects into a PyTorch Dataset and a
         list of tensor names. This can be used for inference mode.
diff --git a/farm/infer.py b/farm/infer.py
index e600d1c74..e7728a68b 100644
--- a/farm/infer.py
+++ b/farm/infer.py
@@ -196,7 +196,7 @@ def inference_from_dicts(self, dicts):
                 "a) ... extract vectors from the language model: call `Inferencer.extract_vectors(...)`"
                 f"b) ... run inference on a downstream task: make sure your model path {self.name} contains a saved prediction head"
             )
-        dataset, tensor_names = self.processor.dataset_from_dicts(dicts, from_inference=True)
+        dataset, tensor_names = self.processor.dataset_from_dicts(dicts, rest_api_schema=True)
         samples = []
         for dict in dicts:
             samples.extend(self.processor._dict_to_samples(dict))
@@ -221,7 +221,7 @@ def extract_vectors(
         :return: dict of predictions
         """
 
-        dataset, tensor_names = self.processor.dataset_from_dicts(dicts, from_inference=True)
+        dataset, tensor_names = self.processor.dataset_from_dicts(dicts, rest_api_schema=True)
         samples = []
         for dict in dicts:
             samples.extend(self.processor._dict_to_samples(dict))

From f8a686685b295e814ab93f67b6920f0db8777f00 Mon Sep 17 00:00:00 2001
From: Tanay Soni <tanaysoni12@gmail.com>
Date: Fri, 11 Oct 2019 15:54:55 +0200
Subject: [PATCH 06/11] Refactor inference methods

---
 farm/infer.py | 61 ++++++++++++++++++++++++---------------------------
 1 file changed, 29 insertions(+), 32 deletions(-)

diff --git a/farm/infer.py b/farm/infer.py
index e7728a68b..5f279c532 100644
--- a/farm/infer.py
+++ b/farm/infer.py
@@ -15,6 +15,7 @@
 from farm.data_handler.processor import Processor, InferenceProcessor
 from farm.utils import set_all_seeds
 from farm.utils import log_ascii_workers
+from farm.data_handler.utils import grouper
 
 
 logger = logging.getLogger(__name__)
@@ -119,6 +120,27 @@ def load(cls, load_dir, batch_size=4, gpu=False, embedder_only=False, return_cla
 
     def inference_from_file(self, file):
         dicts = self.processor.file_to_dicts(file)
+        preds_all = self.inference_from_dicts(dicts, rest_api_schema=False)
+        return preds_all
+
+    def inference_from_dicts(self, dicts, rest_api_schema=True):
+        """
+        Runs down-stream inference using the prediction head.
+
+        :param dicts: Samples to run inference on provided as a list of dicts. One dict per sample.
+        :type dicts: [dict]
+        :param rest_api_schema: whether conform to the schema used for dicts in the HTTP API for Inference.
+        :type rest_api_schema: bool
+        :return: dict of predictions
+
+        """
+        if self.prediction_type == "embedder":
+            raise TypeError(
+                "You have called inference_from_dicts for a model without any prediction head! "
+                "If you want to: "
+                "a) ... extract vectors from the language model: call `Inferencer.extract_vectors(...)`"
+                f"b) ... run inference on a downstream task: make sure your model path {self.name} contains a saved prediction head"
+            )
 
         dict_batches_to_process = int(len(dicts) / self.multiprocessing_chunk_size)
         num_cpus = min(mp.cpu_count(), dict_batches_to_process) or 1
@@ -132,9 +154,9 @@ def inference_from_file(self, file):
             log_ascii_workers(num_cpus, logger)
 
             results = p.imap(
-                partial(self._multiproc_dict_to_samples, processor=self.processor),
-                dicts,
-                chunksize=self.multiprocessing_chunk_size,
+                partial(self._multiproc, processor=self.processor, rest_api_schema=rest_api_schema),
+                grouper(dicts, self.multiprocessing_chunk_size),
+                1
             )
 
             preds_all = []
@@ -146,11 +168,11 @@ def inference_from_file(self, file):
         return preds_all
 
     @classmethod
-    def _multiproc_dict_to_samples(cls, dicts, processor):
-        dicts_list = [dicts]
-        dataset, tensor_names = processor.dataset_from_dicts(dicts_list)
+    def _multiproc(cls, chunk, processor, rest_api_schema):
+        dicts = [d[1] for d in chunk]
+        dataset, tensor_names = processor.dataset_from_dicts(dicts, rest_api_schema)
         samples = []
-        for d in dicts_list:
+        for d in dicts:
             samples.extend(processor._dict_to_samples(d))
         
         return dataset, tensor_names, samples
@@ -180,31 +202,6 @@ def _run_inference(self, dataset, tensor_names, samples):
 
         return preds_all
 
-    def inference_from_dicts(self, dicts):
-        """
-        Runs down-stream inference using the prediction head.
-
-        :param dicts: Samples to run inference on provided as a list of dicts. One dict per sample.
-        :type dicts: [dict]
-        :return: dict of predictions
-
-        """
-        if self.prediction_type == "embedder":
-            raise TypeError(
-                "You have called inference_from_dicts for a model without any prediction head! "
-                "If you want to: "
-                "a) ... extract vectors from the language model: call `Inferencer.extract_vectors(...)`"
-                f"b) ... run inference on a downstream task: make sure your model path {self.name} contains a saved prediction head"
-            )
-        dataset, tensor_names = self.processor.dataset_from_dicts(dicts, rest_api_schema=True)
-        samples = []
-        for dict in dicts:
-            samples.extend(self.processor._dict_to_samples(dict))
-
-        preds_all = self._run_inference(dataset, tensor_names, samples)
-
-        return preds_all
-
     def extract_vectors(
         self, dicts, extraction_strategy="cls_token", extraction_layer=-1
     ):

From d903ae15477f95922659715a65ca53d80a028b52 Mon Sep 17 00:00:00 2001
From: Tanay Soni <tanaysoni12@gmail.com>
Date: Fri, 11 Oct 2019 15:55:18 +0200
Subject: [PATCH 07/11] Update method name for Inference

---
 farm/inference_rest_api.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/farm/inference_rest_api.py b/farm/inference_rest_api.py
index 6db07d596..f5854e434 100644
--- a/farm/inference_rest_api.py
+++ b/farm/inference_rest_api.py
@@ -79,7 +79,7 @@ def post(self, model_id):
         dicts = request.get_json().get("input", None)
         if not dicts:
             return {}
-        results = model.run_inference(dicts=dicts)
+        results = model.inference_from_dicts(dicts=dicts, rest_api_schema=True)
         return results[0]
 
 

From 57ddc8c224cdedb9edb70d88c8ee269eef3a0c57 Mon Sep 17 00:00:00 2001
From: Tanay Soni <tanaysoni12@gmail.com>
Date: Fri, 11 Oct 2019 16:13:52 +0200
Subject: [PATCH 08/11] Revert change

---
 examples/question_answering.py | 164 ++++++++++++++++-----------------
 1 file changed, 82 insertions(+), 82 deletions(-)

diff --git a/examples/question_answering.py b/examples/question_answering.py
index 69017d0bf..f6b12c0fe 100644
--- a/examples/question_answering.py
+++ b/examples/question_answering.py
@@ -22,89 +22,89 @@
 ml_logger = MLFlowLogger(tracking_uri="https://public-mlflow.deepset.ai/")
 ml_logger.init_experiment(experiment_name="Public_FARM", run_name="Run_question_answering")
 
-# ##########################
-# ########## Settings
-# ##########################
-# set_all_seeds(seed=42)
-# device, n_gpu = initialize_device_settings(use_cuda=True)
-# batch_size = 24
-# n_epochs = 2
-# evaluate_every = 500
-# base_LM_model = "bert-base-cased"
-# train_filename="train-v2.0.json"
-# dev_filename="dev-v2.0.json"
-#
-# # 1.Create a tokenizer
-# tokenizer = BertTokenizer.from_pretrained(
-#     pretrained_model_name_or_path=base_LM_model, do_lower_case=False
-# )
-# # 2. Create a DataProcessor that handles all the conversion from raw text into a pytorch Dataset
-# label_list = ["start_token", "end_token"]
-# metric = "squad"
-# processor = SquadProcessor(
-#     tokenizer=tokenizer,
-#     max_seq_len=256,
-#     labels=label_list,
-#     metric=metric,
-#     train_filename=train_filename,
-#     dev_filename=dev_filename,
-#     test_filename=None,
-#     data_dir="../data/squad20",
-# )
-#
-#
-# # 3. Create a DataSilo that loads several datasets (train/dev/test), provides DataLoaders for them and calculates a few descriptive statistics of our datasets
-# data_silo = DataSilo(processor=processor, batch_size=batch_size, distributed=False)
-#
-# # 4. Create an AdaptiveModel
-# # a) which consists of a pretrained language model as a basis
-# language_model = Bert.load(base_LM_model)
-# # b) and a prediction head on top that is suited for our task => Question Answering
-# prediction_head = QuestionAnsweringHead(layer_dims=[768, len(label_list)])
-#
-# model = AdaptiveModel(
-#     language_model=language_model,
-#     prediction_heads=[prediction_head],
-#     embeds_dropout_prob=0.1,
-#     lm_output_types=["per_token"],
-#     device=device,
-# )
-#
-# # 5. Create an optimizer
-# optimizer, warmup_linear = initialize_optimizer(
-#     model=model,
-#     learning_rate=1e-5,
-#     warmup_proportion=0.2,
-#     n_batches=len(data_silo.loaders["train"]),
-#     n_epochs=n_epochs,
-# )
-# # 6. Feed everything to the Trainer, which keeps care of growing our model and evaluates it from time to time
-# trainer = Trainer(
-#     optimizer=optimizer,
-#     data_silo=data_silo,
-#     epochs=n_epochs,
-#     n_gpu=n_gpu,
-#     warmup_linear=warmup_linear,
-#     evaluate_every=evaluate_every,
-#     device=device,
-# )
-# # 7. Let it grow! Watch the tracked metrics live on the public mlflow server: https://public-mlflow.deepset.ai
-# model = trainer.train(model)
-#
-# # 8. Hooray! You have a model. Store it:
-# save_dir = "../saved_models/bert-english-qa-tutorial"
-# model.save(save_dir)
-# processor.save(save_dir)
-#
-# # 9. Load it & harvest your fruits (Inference)
-# QA_input = [
-#         {
-#             "questions": ["Who counted the game among the best ever made?"],
-#             "text":  "Twilight Princess was released to universal critical acclaim and commercial success. It received perfect scores from major publications such as 1UP.com, Computer and Video Games, Electronic Gaming Monthly, Game Informer, GamesRadar, and GameSpy. On the review aggregators GameRankings and Metacritic, Twilight Princess has average scores of 95% and 95 for the Wii version and scores of 95% and 96 for the GameCube version. GameTrailers in their review called it one of the greatest games ever created."
-#         }]
-save_dir = "base_models/bert-base-cased-english-SQUAD20"
+##########################
+########## Settings
+##########################
+set_all_seeds(seed=42)
+device, n_gpu = initialize_device_settings(use_cuda=True)
+batch_size = 24
+n_epochs = 2
+evaluate_every = 500
+base_LM_model = "bert-base-cased"
+train_filename="train-v2.0.json"
+dev_filename="dev-v2.0.json"
+
+# 1.Create a tokenizer
+tokenizer = BertTokenizer.from_pretrained(
+    pretrained_model_name_or_path=base_LM_model, do_lower_case=False
+)
+# 2. Create a DataProcessor that handles all the conversion from raw text into a pytorch Dataset
+label_list = ["start_token", "end_token"]
+metric = "squad"
+processor = SquadProcessor(
+    tokenizer=tokenizer,
+    max_seq_len=256,
+    labels=label_list,
+    metric=metric,
+    train_filename=train_filename,
+    dev_filename=dev_filename,
+    test_filename=None,
+    data_dir="../data/squad20",
+)
+
+
+# 3. Create a DataSilo that loads several datasets (train/dev/test), provides DataLoaders for them and calculates a few descriptive statistics of our datasets
+data_silo = DataSilo(processor=processor, batch_size=batch_size, distributed=False)
+
+# 4. Create an AdaptiveModel
+# a) which consists of a pretrained language model as a basis
+language_model = Bert.load(base_LM_model)
+# b) and a prediction head on top that is suited for our task => Question Answering
+prediction_head = QuestionAnsweringHead(layer_dims=[768, len(label_list)])
+
+model = AdaptiveModel(
+    language_model=language_model,
+    prediction_heads=[prediction_head],
+    embeds_dropout_prob=0.1,
+    lm_output_types=["per_token"],
+    device=device,
+)
+
+# 5. Create an optimizer
+optimizer, warmup_linear = initialize_optimizer(
+    model=model,
+    learning_rate=1e-5,
+    warmup_proportion=0.2,
+    n_batches=len(data_silo.loaders["train"]),
+    n_epochs=n_epochs,
+)
+# 6. Feed everything to the Trainer, which keeps care of growing our model and evaluates it from time to time
+trainer = Trainer(
+    optimizer=optimizer,
+    data_silo=data_silo,
+    epochs=n_epochs,
+    n_gpu=n_gpu,
+    warmup_linear=warmup_linear,
+    evaluate_every=evaluate_every,
+    device=device,
+)
+# 7. Let it grow! Watch the tracked metrics live on the public mlflow server: https://public-mlflow.deepset.ai
+model = trainer.train(model)
+
+# 8. Hooray! You have a model. Store it:
+save_dir = "../saved_models/bert-english-qa-tutorial"
+model.save(save_dir)
+processor.save(save_dir)
+
+# 9. Load it & harvest your fruits (Inference)
+QA_input = [
+        {
+            "questions": ["Who counted the game among the best ever made?"],
+            "text":  "Twilight Princess was released to universal critical acclaim and commercial success. It received perfect scores from major publications such as 1UP.com, Computer and Video Games, Electronic Gaming Monthly, Game Informer, GamesRadar, and GameSpy. On the review aggregators GameRankings and Metacritic, Twilight Princess has average scores of 95% and 95 for the Wii version and scores of 95% and 96 for the GameCube version. GameTrailers in their review called it one of the greatest games ever created."
+        }]
+
 model = Inferencer.load(save_dir)
-result = model.inference_from_file(file="/Users/tanay/data/squad20/dev-v2.0.json")
+result = model.inference_from_dicts(dicts=QA_input)
 
 for x in result:
     pprint.pprint(x)

From 279cbf17f4ec79fc51e0b973cb123cbb78f8e3bb Mon Sep 17 00:00:00 2001
From: Tanay Soni <tanaysoni12@gmail.com>
Date: Fri, 11 Oct 2019 16:20:15 +0200
Subject: [PATCH 09/11] Code formatting

---
 farm/infer.py | 77 ++++++++++++++++++++++++++++++---------------------
 1 file changed, 46 insertions(+), 31 deletions(-)

diff --git a/farm/infer.py b/farm/infer.py
index 5f279c532..79df12800 100644
--- a/farm/infer.py
+++ b/farm/infer.py
@@ -41,10 +41,18 @@ class Inferencer:
 
     """
 
-    def __init__(self, model, processor, batch_size=4, gpu=False, name=None, return_class_probs=False,
-                 multiprocessing_chunk_size=100):
+    def __init__(
+        self,
+        model,
+        processor,
+        batch_size=4,
+        gpu=False,
+        name=None,
+        return_class_probs=False,
+        multiprocessing_chunk_size=100,
+    ):
         """
-        Initializes inferencer from an AdaptiveModel and a Processor instance.
+        Initializes Inferencer from an AdaptiveModel and a Processor instance.
 
         :param model: AdaptiveModel to run in inference mode
         :type model: AdaptiveModel
@@ -54,7 +62,7 @@ def __init__(self, model, processor, batch_size=4, gpu=False, name=None, return_
         :type batch_size: int
         :param gpu: If GPU shall be used
         :type gpu: bool
-        :param name: Name for the current inferencer model, displayed in the REST API
+        :param name: Name for the current Inferencer model, displayed in the REST API
         :type name: string
         :param return_class_probs: either return probability distribution over all labels or the prob of the associated label
         :type return_class_probs: bool
@@ -62,9 +70,7 @@ def __init__(self, model, processor, batch_size=4, gpu=False, name=None, return_
 
         """
         # Init device and distributed settings
-        device, n_gpu = initialize_device_settings(
-            use_cuda=gpu, local_rank=-1, fp16=False
-        )
+        device, n_gpu = initialize_device_settings(use_cuda=gpu, local_rank=-1, fp16=False)
 
         self.processor = processor
         self.model = model
@@ -75,7 +81,7 @@ def __init__(self, model, processor, batch_size=4, gpu=False, name=None, return_
         # TODO adjust for multiple prediction heads
         if len(self.model.prediction_heads) == 1:
             self.prediction_type = self.model.prediction_heads[0].model_type
-            #self.label_map = self.processor.label_maps[0]
+            # self.label_map = self.processor.label_maps[0]
         elif len(self.model.prediction_heads) == 0:
             self.prediction_type = "embedder"
         # else:
@@ -88,9 +94,17 @@ def __init__(self, model, processor, batch_size=4, gpu=False, name=None, return_
         set_all_seeds(42, n_gpu)
 
     @classmethod
-    def load(cls, load_dir, batch_size=4, gpu=False, embedder_only=False, return_class_probs=False):
+    def load(
+        cls,
+        load_dir,
+        batch_size=4,
+        gpu=False,
+        embedder_only=False,
+        return_class_probs=False,
+        multiprocessing_chunk_size=100,
+    ):
         """
-        Initializes inferencer from directory with saved model.
+        Initializes Inferencer from directory with saved model.
 
         :param load_dir: Directory where the saved model is located.
         :type load_dir: str
@@ -101,12 +115,12 @@ def load(cls, load_dir, batch_size=4, gpu=False, embedder_only=False, return_cla
         :param embedder_only: If true, a faster processor (InferenceProcessor) is loaded. This should only be used
         for extracting embeddings (no downstream predictions).
         :type embedder_only: bool
+        :param multiprocessing_chunk_size: chunksize param for Python Multiprocessing imap().
+        :type multiprocessing_chunk_size: int
         :return: An instance of the Inferencer.
         """
 
-        device, n_gpu = initialize_device_settings(
-            use_cuda=gpu, local_rank=-1, fp16=False
-        )
+        device, n_gpu = initialize_device_settings(use_cuda=gpu, local_rank=-1, fp16=False)
 
         model = AdaptiveModel.load(load_dir, device)
         if embedder_only:
@@ -116,14 +130,22 @@ def load(cls, load_dir, batch_size=4, gpu=False, embedder_only=False, return_cla
             processor = Processor.load_from_dir(load_dir)
 
         name = os.path.basename(load_dir)
-        return cls(model, processor, batch_size=batch_size, gpu=gpu, name=name, return_class_probs=return_class_probs)
+        return cls(
+            model,
+            processor,
+            batch_size=batch_size,
+            gpu=gpu,
+            name=name,
+            return_class_probs=return_class_probs,
+            multiprocessing_chunk_size=multiprocessing_chunk_size,
+        )
 
     def inference_from_file(self, file):
         dicts = self.processor.file_to_dicts(file)
         preds_all = self.inference_from_dicts(dicts, rest_api_schema=False)
         return preds_all
 
-    def inference_from_dicts(self, dicts, rest_api_schema=True):
+    def inference_from_dicts(self, dicts, rest_api_schema=False):
         """
         Runs down-stream inference using the prediction head.
 
@@ -156,11 +178,11 @@ def inference_from_dicts(self, dicts, rest_api_schema=True):
             results = p.imap(
                 partial(self._multiproc, processor=self.processor, rest_api_schema=rest_api_schema),
                 grouper(dicts, self.multiprocessing_chunk_size),
-                1
+                1,
             )
 
             preds_all = []
-            with tqdm(total=len(dicts), unit=' Dicts') as pbar:
+            with tqdm(total=len(dicts), unit=" Dicts") as pbar:
                 for dataset, tensor_names, sample in results:
                     preds_all.append(self._run_inference(dataset, tensor_names, sample))
                     pbar.update(self.multiprocessing_chunk_size)
@@ -174,26 +196,23 @@ def _multiproc(cls, chunk, processor, rest_api_schema):
         samples = []
         for d in dicts:
             samples.extend(processor._dict_to_samples(d))
-        
+
         return dataset, tensor_names, samples
 
     def _run_inference(self, dataset, tensor_names, samples):
         data_loader = NamedDataLoader(
-            dataset=dataset,
-            sampler=SequentialSampler(dataset),
-            batch_size=self.batch_size,
-            tensor_names=tensor_names,
+            dataset=dataset, sampler=SequentialSampler(dataset), batch_size=self.batch_size, tensor_names=tensor_names
         )
 
         preds_all = []
         for i, batch in enumerate(data_loader):
             batch = {key: batch[key].to(self.device) for key in batch}
-            batch_samples = samples[i * self.batch_size: (i + 1) * self.batch_size]
+            batch_samples = samples[i * self.batch_size : (i + 1) * self.batch_size]
             with torch.no_grad():
                 logits = self.model.forward(**batch)
                 preds = self.model.formatted_preds(
                     logits=logits,
-                    samples=batch_samples,
+                    samples=batch_samples,  # TODO batch_samples and logits are not aligned
                     tokenizer=self.processor.tokenizer,
                     return_class_probs=self.return_class_probs,
                     **batch,
@@ -202,9 +221,7 @@ def _run_inference(self, dataset, tensor_names, samples):
 
         return preds_all
 
-    def extract_vectors(
-        self, dicts, extraction_strategy="cls_token", extraction_layer=-1
-    ):
+    def extract_vectors(self, dicts, extraction_strategy="cls_token", extraction_layer=-1):
         """
         Converts a text into vector(s) using the language model only (no prediction head involved).
 
@@ -224,10 +241,7 @@ def extract_vectors(
             samples.extend(self.processor._dict_to_samples(dict))
 
         data_loader = NamedDataLoader(
-            dataset=dataset,
-            sampler=SequentialSampler(dataset),
-            batch_size=self.batch_size,
-            tensor_names=tensor_names,
+            dataset=dataset, sampler=SequentialSampler(dataset), batch_size=self.batch_size, tensor_names=tensor_names
         )
 
         preds_all = []
@@ -256,6 +270,7 @@ def __init__(self, model, name=None):
     @classmethod
     def load(cls, load_dir, batch_size=4, gpu=False, embedder_only=True):
         import fasttext
+
         if os.path.isfile(load_dir):
             return cls(model=fasttext.load_model(load_dir))
         else:

From 951752a518a4c3ca62d6bde805eda1694e727c18 Mon Sep 17 00:00:00 2001
From: Tanay Soni <tanaysoni12@gmail.com>
Date: Fri, 11 Oct 2019 16:33:06 +0200
Subject: [PATCH 10/11] Fix list concatenation for preds_all

---
 farm/infer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/farm/infer.py b/farm/infer.py
index 79df12800..c845e3ef7 100644
--- a/farm/infer.py
+++ b/farm/infer.py
@@ -184,7 +184,7 @@ def inference_from_dicts(self, dicts, rest_api_schema=False):
             preds_all = []
             with tqdm(total=len(dicts), unit=" Dicts") as pbar:
                 for dataset, tensor_names, sample in results:
-                    preds_all.append(self._run_inference(dataset, tensor_names, sample))
+                    preds_all.extend(self._run_inference(dataset, tensor_names, sample))
                     pbar.update(self.multiprocessing_chunk_size)
 
         return preds_all

From 3fed46c389cb0e2af685f196febbed7fa200987d Mon Sep 17 00:00:00 2001
From: Tanay Soni <tanaysoni12@gmail.com>
Date: Fri, 11 Oct 2019 16:41:46 +0200
Subject: [PATCH 11/11] Update method name in test

---
 test/test_processor_saving_loading.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/test_processor_saving_loading.py b/test/test_processor_saving_loading.py
index 7c9c39eda..4168a0f51 100644
--- a/test/test_processor_saving_loading.py
+++ b/test/test_processor_saving_loading.py
@@ -24,14 +24,14 @@ def test_processor_saving_loading(caplog):
                                             label_list=["OTHER", "OFFENSE"],
                                             metrics=["f1_macro"]
                                             )
-    dicts = processor._file_to_dicts(file="samples/doc_class/train-sample.tsv")
+    dicts = processor.file_to_dicts(file="samples/doc_class/train-sample.tsv")
     data, tensor_names = processor.dataset_from_dicts(dicts)
 
     save_dir = "testsave/processor"
     processor.save(save_dir)
 
     processor = processor.load_from_dir(save_dir)
-    dicts = processor._file_to_dicts(file="samples/doc_class/train-sample.tsv")
+    dicts = processor.file_to_dicts(file="samples/doc_class/train-sample.tsv")
     data_loaded, tensor_names_loaded = processor.dataset_from_dicts(dicts)
 
     assert tensor_names == tensor_names_loaded