diff --git a/dragnet/data_processing.py b/dragnet/data_processing.py
index a43c4e9..95acf46 100644
--- a/dragnet/data_processing.py
+++ b/dragnet/data_processing.py
@@ -401,352 +401,3 @@ def prepare_all_data(data_dir, block_pct_tokens_thresh=0.1):
     return [prepare_data(data_dir, fileroot, block_pct_tokens_thresh)
             for fileroot in gs_blocks_fileroots]
 
-
-# class DragnetModelData(object):
-#     """
-#     the data needed to train a model
-#     includes the html, the gold standard tokens
-#
-#     a datadir with the training data directory structure
-#     each training data document has a number of files with a common
-#     "fileroot" and a set of additional files in subdirectories
-#         HTML / fileroot.html
-#         Corrected / fileroot.html.corrected.txt = cut and paste content
-#             from the HTML
-#         block_corrected / fileroot.block_corrected.txt
-#     source = one of 'all', 'domain_list', 'technoratti', 'reader'
-#     """
-#     def __init__(self, datadir, block_percent_threshold=0.1, source='all'):
-#         # set the re_source = a regex that can be used on fileroot
-#         # to eliminate files based on source
-#         if source == 'technoratti':
-#             re_keep = '^T[0-9]+'
-#         elif source == 'domain_list':
-#             re_keep = '(^[0-9])|(^[a-zA-Z]{2})'
-#         elif source == 'reader':
-#             re_keep = '^R[0-9]+'
-#         elif source == 'all':
-#             re_keep = ''  # match anything
-#         else:
-#             raise ValueError("Invalid source")
-#         self._re_source = re.compile(re_keep)
-#         self._source = source
-#
-#         # now read in all the data
-#         self._read_all_data(datadir, block_percent_threshold, source)
-#
-#     def _read_all_data(self, datadir, block_percent_threshold, source):
-#         """
-#         block_percent_threshold = the cut-off percent of all tokens in a block
-#             that are in the gold standard, above which the block is
-#             classified as content
-#         stores attributes .training_data, .test_data where each is a list
-#             of tuples:
-#             (raw_html_string,
-#                 content_gold_standard, comments_gold_standard, encoding)
-#             where content/comments gold_standard =
-#                 (list of block 0/1 flag, list of # tokens, all tokens as a list)
-#             encoding is the encoding from <text> tag for cleaneval,
-#                 otherwise it is None
-#         stores attributes .training_files, .test_files where each is a list
-#             of the file names
-#         """
-#         self.training_data = []
-#         self.test_data = []
-#         self.training_files = []
-#         self.test_files = []
-#
-#         training_fileroot = set(open(datadir + '/training.txt', 'r').read().strip().split())
-#         print("Reading the training and test data...")
-#         for file, fileroot in get_list_all_corrected_files(datadir):
-#             if self._re_source.match(fileroot):
-#                 html, encoding = read_HTML_file(datadir, fileroot)
-#                 block_corrected_file = open(
-#                     '%s/block_corrected/%s.block_corrected.txt' %
-#                     (datadir, fileroot), 'r')
-#                 blocks = block_corrected_file.read()[:-1].split('\n')
-#
-#                 content = []
-#                 comments = []
-#                 for block in blocks:
-#                     block_split = block.split('\t')
-#                     # will store the weights as the total number of tokens in the document
-#                     content.append((float(block_split[0]), len(block_split[2].strip().split()), block_split[3].strip().split()))
-#                     comments.append((float(block_split[1]), len(block_split[2].strip().split()), block_split[4].strip().split()))
-#
-#                 ret = []
-#                 for content_comments in [content, comments]:
-#                     extracted_flag = (np.array([ele[0] for ele in content_comments]) > block_percent_threshold).astype(np.int)
-#                     extracted_flag[np.array([ele[0] for ele in content_comments]) == -1] = -1
-#                     counts = np.array([ele[1] for ele in content_comments])
-#                     tokens = []
-#                     for this_block_tokens in [ele[2] for ele in content_comments if ele[1] > 0]:
-#                         tokens.extend(this_block_tokens)
-#                     ret.append((extracted_flag, counts, tokens))
-#
-#                 if fileroot in training_fileroot:
-#                     self.training_data.append((html, ret[0], ret[1], encoding))
-#                     self.training_files.append(fileroot)
-#                 else:
-#                     self.test_data.append((html, ret[0], ret[1], encoding))
-#                     self.test_files.append(fileroot)
-#
-#         print("..done!")
-#         print("Got %s training, %s test documents" % (len(self.training_data), len(self.test_data)))
-#
-#     @staticmethod
-#     def diagnose_css(datadir, plotdir):
-#         data = DragnetModelData(datadir, source='all')
-#
-#         # get a list of all the css tokens extracted as content and not content
-#         # ONLY USE TRAINING DATA
-#         content_css = []
-#         no_content_css = []
-#         for datum in data.training_data:
-#             blocks = Blockifier.blockify(datum[0], encoding=datum[3])
-#             extracted = np.logical_or(datum[1][0], datum[2][0])
-#             assert len(blocks) == len(extracted)
-#             content_css.extend([blocks[k].css for k in range_(len(blocks)) if extracted[k]])
-#             no_content_css.extend([blocks[k].css for k in range_(len(blocks)) if not extracted[k]])
-#
-#         # make a list of the most popular tokens
-#         from collections import defaultdict
-#         popular_tokens = {}
-#         for c, d in [('content', content_css), ('no_content', no_content_css)]:
-#             popular_tokens[c] = {}
-#             for tag in ['id', 'class']:
-#                 popular_tokens[c][tag] = defaultdict(lambda: 0)
-#             for block in d:
-#                 for tag in ['id', 'class']:
-#                     for token in re.split('\W+|_', block[tag]):
-#                         popular_tokens[c][tag][token] += 1
-#
-#         # sort tokens by most popular
-#         popular_tokens_sorted = {}
-#         for c in ['content', 'no_content']:
-#             popular_tokens_sorted[c] = {}
-#             for tag in ['id', 'class']:
-#                 popular_tokens_sorted[c][tag] = [(v, k) for k, v in popular_tokens[c][tag].iteritems()]
-#                 popular_tokens_sorted[c][tag].sort(reverse=True)
-#
-#         # write to a file with percent of total
-#         for c in ['content', 'no_content']:
-#             for tag in ['id', 'class']:
-#                 total_tokens = np.sum([ele[0] for ele in popular_tokens_sorted[c][tag]])
-#                 with open(plotdir + '/css_token_count_%s_%s.tsv' % (c, tag), 'w') as f:
-#                     f.write("Token\tCount\tPercent Total\tCum Total\n")
-#                     cumcount = 0
-#                     for count, token in popular_tokens_sorted[c][tag]:
-#                         cumcount += count
-#                         f.write("%s\t%s\t%s\t%s\n" % (count,
-#                                                       token,
-#                                                       float(count) / total_tokens,
-#                                                       float(cumcount) / total_tokens))
-#
-#         # take the ratio of token count in content vs no content
-#         # for the tokens in the specified list
-#         css_tokens = open("dragnet_css_tokens.txt", 'r').read().strip().split('\n')
-#         content_no_content_ratio = {}
-#         no_content_block_count = len(no_content_css)
-#         content_block_count = len(content_css)
-#         for tag in ['id', 'class']:
-#             content_no_content_ratio[tag] = []
-#             for token in css_tokens:
-#                 content_count_percent = np.sum([re.search(token, block[tag].lower()) is not None for block in content_css]) / float(content_block_count)
-#                 no_content_count_percent = np.sum([re.search(token, block[tag].lower()) is not None for block in no_content_css]) / float(no_content_block_count)
-#
-#                 if no_content_count_percent > 0:
-#                     ratio = content_count_percent / no_content_count_percent
-#                 else:
-#                     ratio = np.inf
-#
-#                 content_no_content_ratio[tag].append((ratio, token, content_count_percent, no_content_count_percent))
-#
-#             content_no_content_ratio[tag].sort()
-#
-#         # dump ratios to a file
-#         with open(plotdir + '/css_popular_token_ratio.txt', 'w') as f:
-#             f.write("Ratio of appearence frequency in content vs non-content blocks\n")
-#             f.write("Ratio------token-----percent of content blocks present-----percent of non-content blocks present\n")
-#             for tag in ['id', 'class']:
-#                 f.write("\n%s\n" % tag)
-#                 for t in content_no_content_ratio[tag]:
-#                     f.write("%s\t%s\t%s\t%s\n" % t)
-#
-#     @staticmethod
-#     def diagnose_data(datadir, plotdir, training_or_test='both'):
-#         """Do some diagnosis if the data set
-#
-#         Plotdir = output plots to this directory"""
-#         import pylab as plt
-#
-#         # we will accumulate the percent extracted for some histograms
-#         percent_extracted = []
-#         for s, t in [('all', 'All data'),
-#                      ('technoratti', 'Technoratti'),
-#                      ('domain_list', "Domain list"),
-#                      ('reader', "Popular RSS on Google Reader")]:
-#
-#             data = DragnetModelData(datadir, source=s)
-#             data._diagnose_data_one_source(plotdir, t, training_or_test='both')
-#
-#             percent_extracted.append((t, data._get_percent_tokens_extracted_in_block(datadir)))
-#
-#         # plot percent extracted
-#         fig = plt.figure(3)
-#         fig.clf()
-#         k = 0
-#         for ti, d in percent_extracted:
-#             plt.subplot(221 + k)
-#             plt.hist(d, 30)
-#             plt.title(ti)
-#             k += 1
-#         fig.show()
-#         fig.savefig(plotdir + '/percent_tokens_extracted.png')
-#
-#     def _get_percent_tokens_extracted_in_block(self, datadir):
-#         ret = []
-#         for file, fileroot in get_list_all_corrected_files(datadir):
-#             if self._re_source.match(fileroot):
-#                 # a histogram of block frequency
-#                 with open(os.path.join(datadir,
-#                         'block_corrected/%s.block_corrected.txt' % fileroot),
-#                         'r') as block_corrected_file:
-#                     blocks = block_corrected_file.read()[:-1].split('\n')
-#
-#                 for block in blocks:
-#                     block_split = block.split('\t')
-#                     ret.append(float(block_split[0]))
-#
-#         return np.asarray(ret)
-#
-#     def _diagnose_data_one_source(self, plotdir, ti, training_or_test='both'):
-#         """Make some plots and do some exploratory analyis on training data
-#         training_or_test is one of "training", "test", "both"
-#         """
-#         import pylab as plt
-#         from mozsci.histogram import Histogram1DFast
-#
-#         if training_or_test == 'training':
-#             plot_data = self.training_data
-#             files = self.training_files
-#         elif training_or_test == 'test':
-#             plot_data = self.test_data
-#             files = self.test_files
-#         elif training_or_test == 'both':
-#             plot_data = self.training_data + self.test_data
-#             files = self.training_files + self.test_files
-#         else:
-#             raise ValueError("Invalid training_or_test")
-#
-#         # block_level_aggreate = holds block count of # extracted as
-#         #                        content, comments and total
-#         block_level_aggregate = {'content': [], 'comments': [], 'total': []}
-#         for datum in plot_data:
-#             k = 1
-#             block_level_aggregate['total'].append(len(datum[1][1]))
-#             for c in ['content', 'comments']:
-#                 extracted_flag, overall_token_count, tokens = datum[k]
-#                 block_level_aggregate[c].append(np.sum(extracted_flag))
-#                 k += 1
-#
-#         # plot
-#         block_level_aggregate['total'] = np.array(block_level_aggregate['total']).astype(np.float)
-#         fig = plt.figure(1)
-#         fig.clf()
-#
-#         plt.subplot(221)
-#         plt.hist(block_level_aggregate['total'], 30)
-#         plt.title("Block count across files")
-#
-#         plt.subplot(222)
-#         plt.hist(block_level_aggregate['content'] / block_level_aggregate['total'], 30)
-#         plt.title("Percent of blocks that are content across files")
-#
-#         plt.subplot(223)
-#         plt.hist(block_level_aggregate['comments'] / block_level_aggregate['total'], 30)
-#         plt.title("Percent of blocks that are comments across files")
-#
-#         txt = "Total blocks: %s " % int(np.sum(block_level_aggregate['total']))
-#         for s in ['content', 'comments']:
-#             txt += "\nTotal %s %s (%s %%)" % (s, int(np.sum(block_level_aggregate[s])), np.sum(block_level_aggregate[s]) / np.sum(block_level_aggregate['total']) * 100)
-#         plt.figtext(0.6, 0.4, txt)
-#
-#         add_plot_title(ti + '\nBlock level, training + test')
-#
-#         fig.show()
-#         fig.savefig(plotdir + '/' + self._source + '_block_level.png')
-#
-#         # percent extracted as content vs block number
-#         bins = 20
-#         content_percent_vs_block_percent = {
-#             'content': np.zeros((len(plot_data), bins)),
-#             'comments': np.zeros((len(plot_data), bins))}
-#
-#         # number of tokens in block vs block number
-#         block_length_vs_block_percent = np.zeros((len(plot_data), bins))
-#
-#         for datum_number in range_(len(plot_data)):
-#             datum = plot_data[datum_number]
-#             k = 1
-#             for c in ['content', 'comments']:
-#                 extracted_flag, overall_token_count, tokens = datum[k]
-#                 block_percent = np.arange(len(extracted_flag)) / float(len(extracted_flag))
-#
-#                 # count of extracted blocks in each bin
-#                 h = Histogram1DFast(bins, 0, 1)
-#                 h.update_counts(block_percent, extracted_flag)
-#                 extracted_counts = h.bin_count
-#
-#                 # overall count
-#                 h = Histogram1DFast(bins, 0, 1)
-#                 h.update(block_percent)
-#                 total_counts = h.bin_count
-#
-#                 # number of tokens in block
-#                 if c == 'content':  # token count same for content, comments
-#                     h = Histogram1DFast(bins, 0, 1)
-#                     h.update_counts(block_percent, overall_token_count)
-#                     token_count = h.bin_count
-#                     block_length_vs_block_percent[datum_number, :] = token_count.astype(np.float) / total_counts
-#
-#                 content_percent_vs_block_percent[c][datum_number, :] = extracted_counts.astype(np.float) / total_counts
-#                 k += 1
-#
-#         # plot
-#         fig = plt.figure(2)
-#         fig.clf()
-#
-#         plt.subplot(311)
-#         c = 'content'
-#         masked_data = np.ma.masked_array(content_percent_vs_block_percent[c], np.isnan(content_percent_vs_block_percent[c]))
-#         np.mean(masked_data, axis=0)
-#         plt.plot(np.linspace(0, 1, bins), np.mean(masked_data, axis=0))
-#         plt.title("Content")
-#         plt.ylabel("Percent extracted")
-#
-#         plt.subplot(312)
-#         c = 'comments'
-#         masked_data = np.ma.masked_array(content_percent_vs_block_percent[c], np.isnan(content_percent_vs_block_percent[c]))
-#         np.mean(masked_data, axis=0)
-#         plt.plot(np.linspace(0, 1, bins), np.mean(masked_data, axis=0))
-#         plt.title("Comments")
-#         plt.ylabel("Percent extracted")
-#
-#         plt.subplot(313)
-#         masked_data = np.ma.masked_array(block_length_vs_block_percent, np.isnan(block_length_vs_block_percent))
-#         np.mean(masked_data, axis=0)
-#         plt.plot(np.linspace(0, 1, bins), np.mean(masked_data, axis=0))
-#         plt.title("All tokens")
-#         plt.xlabel("Block position in document")
-#         plt.ylabel("# tokens in block")
-#
-#         add_plot_title(ti + '\nPercent of blocks extracted, # tokens in doc, training + test')
-#         fig.show()
-#         fig.savefig(plotdir + '/' + self._source + '_block_level_block_position.png')
-#
-#
-# def add_plot_title(ti_str):
-#     """Add a string as a title on top of a subplot"""
-#     import pylab as plt
-#     plt.figtext(0.5, 0.94, ti_str, ha='center', color='black', weight='bold', size='large')
diff --git a/dragnet/extractor.py b/dragnet/extractor.py
index 231ba03..7e3f372 100644
--- a/dragnet/extractor.py
+++ b/dragnet/extractor.py
@@ -4,8 +4,9 @@
 from sklearn.base import BaseEstimator, ClassifierMixin
 from sklearn.ensemble import ExtraTreesClassifier
 
-from .compat import string_, str_cast
+from .compat import string_, str_cast, unicode_
 from .util import get_and_union_features
+from .blocks import TagCountNoCSSReadabilityBlockifier
 
 
 class Extractor(BaseEstimator, ClassifierMixin):
@@ -36,7 +37,7 @@ class Extractor(BaseEstimator, ClassifierMixin):
             ``predict_proba()`` method.
     """
 
-    def __init__(self, blockifier,
+    def __init__(self, blockifier=TagCountNoCSSReadabilityBlockifier,
                  features=('kohlschuetter', 'weninger', 'readability'),
                  model=None,
                  to_extract='content', prob_threshold=0.5, max_block_weight=200):
@@ -65,7 +66,7 @@ def features(self):
     def features(self, feats):
         self._features = get_and_union_features(feats)
 
-    def fit(self, blocks, labels, weights=None):
+    def fit(self, documents, labels, weights=None):
         """
         Fit :class`Extractor` features and model to a training dataset.
 
@@ -77,16 +78,25 @@ def fit(self, blocks, labels, weights=None):
         Returns:
             :class`Extractor`
         """
-        features_mat = self.features.fit_transform(blocks)
+        block_groups = np.array([self.blockifier.blockify(doc) for doc in documents])
+        mask = [self._has_enough_blocks(blocks) for blocks in block_groups]
+        block_groups = block_groups[mask]
+        labels = np.concatenate(np.array(labels)[mask])
+
+        # TODO: This only 'fit's one doc at a time. No feature fitting actually
+        # happens for now, but this might be important if the features change
+        features_mat = np.concatenate([self.features.fit_transform(blocks)
+                                       for blocks in block_groups])
         if weights is None:
             self.model.fit(features_mat, labels)
         else:
+            weights = np.concatenate(np.array(weights)[mask])
             self.model.fit(features_mat, labels, sample_weight=weights)
         return self
 
-    def concatenate_data(self, data):
+    def get_html_labels_weights(self, data):
         """
-        Concatenate the blocks, labels, and weights of many files' data.
+        Gather the html, labels, and weights of many files' data.
         Primarily useful for training/testing an :class`Extractor`.
 
         Args:
@@ -96,19 +106,16 @@ def concatenate_data(self, data):
             Tuple[List[Block], np.array(int), np.array(int)]: All blocks, all
                 labels, and all weights, respectively.
         """
-        all_blocks = []
-        all_labels = np.empty(0, dtype=int)
-        all_weights = np.empty(0, dtype=int)
+        all_html = []
+        all_labels = []
+        all_weights = []
         for html, content, comments in data:
-            blocks = self.blockifier.blockify(html)
-            if not self._has_enough_blocks(blocks):
-                continue
-            all_blocks.extend(blocks)
-            labels, weights, _ = self._get_labels_and_weights(
+            all_html.append(html)
+            labels, weights = self._get_labels_and_weights(
                 content, comments)
-            all_labels = np.hstack((all_labels, labels))
-            all_weights = np.hstack((all_weights, weights))
-        return all_blocks, all_labels, all_weights
+            all_labels.append(labels)
+            all_weights.append(weights)
+        return np.array(all_html), np.array(all_labels), np.array(all_weights)
 
     def _has_enough_blocks(self, blocks):
         if len(blocks) < 3:
@@ -126,29 +133,22 @@ def _get_labels_and_weights(self, content, comments):
         Returns:
             Tuple[np.array[int], np.array[int], List[str]]
         """
-        # TODO: get rid of the third element here and elsewhere?
         # extract content and comments
         if 'content' in self.to_extract and 'comments' in self.to_extract:
-            if self.max_block_weight is None:
-                return (np.logical_or(content[0], comments[0]).astype(int),
-                        content[1],
-                        content[2] + comments[2])
-            else:
-                return (np.logical_or(content[0], comments[0]).astype(int),
-                        np.minimum(content[1], self.max_block_weight),
-                        content[2] + comments[2])
+            labels = np.logical_or(content[0], comments[0]).astype(int)
+            weights = content[1],
         # extract content only
         elif 'content' in self.to_extract:
-            if self.max_block_weight is None:
-                return content
-            else:
-                return (content[0], np.minimum(content[1], self.max_block_weight), content[2])
+            labels = content[0]
+            weights = content[1]
         # extract comments only
         else:
-            if self.max_block_weight is None:
-                return comments
-            else:
-                return (comments[0], np.minimum(comments[1], self.max_block_weight), comments[2])
+            labels = comments[0]
+            weights = comments[1]
+        if self.max_block_weight is None:
+            weights = np.minimum(weights, self.max_block_weight)
+
+        return labels, weights
 
     def extract(self, html, encoding=None, as_blocks=False):
         """
@@ -166,55 +166,62 @@ def extract(self, html, encoding=None, as_blocks=False):
         Returns:
             str or List[Block]
         """
-        blocks = self.blockifier.blockify(html, encoding=encoding)
-        return self.extract_from_blocks(blocks, as_blocks=as_blocks)
+        preds, blocks = self.predict(html, encoding=encoding, return_blocks=True)
+        if as_blocks is False:
+            return str_cast(b'\n'.join(blocks[ind].text for ind in np.flatnonzero(preds)))
+        else:
+            return [blocks[ind] for ind in np.flatnonzero(preds)]
+
 
-    def extract_from_blocks(self, blocks, as_blocks=False):
+    def predict(self, documents, **kwargs):
         """
-        Extract the main content and/or comments from a sequence of (all) blocks
-        and return it as a string or as a sequence of block objects.
+        Predict class (content=1 or not-content=0) of the blocks in one or many
+        HTML document(s).
 
         Args:
-            blocks (List[Block]): Blockify'd HTML document.
-            as_blocks (bool): If False, return the main content as a combined
-                string; if True, return the content-holding blocks as a list of
-                block objects.
+            documents (str or List[str]): HTML document(s)
 
         Returns:
-            str or List[Block]
+            ``np.ndarray`` or List[``np.ndarray``]: array of binary predictions
+                for content (1) or not-content (0).
         """
-        if not self._has_enough_blocks(blocks):
-            if as_blocks is False:
-                return ''
-            else:
-                return []
-        features_mat = self.features.transform(blocks)
-        if self.prob_threshold is None:
-            preds = self.model.predict(features_mat)
-        else:
-            self._positive_idx = (
-                self._positive_idx or list(self.model.classes_).index(1))
-            preds = (self.model.predict_proba(features_mat) > self.prob_threshold)[:, self._positive_idx]
-        if as_blocks is False:
-            return str_cast(b'\n'.join(blocks[ind].text for ind in np.flatnonzero(preds)))
+        if isinstance(documents, (str, bytes, unicode_, np.unicode_)):
+            return self._predict_one(documents, **kwargs)
         else:
-            return [blocks[ind] for ind in np.flatnonzero(preds)]
+            return np.concatenate([self._predict_one(doc, **kwargs) for doc in documents])
 
-    def predict(self, blocks):
+
+    def _predict_one(self, document, encoding=None, return_blocks=False):
         """
-        Predict class (content=1 or not-content=0) of each block in a sequence.
+        Predict class (content=1 or not-content=0) of each block in an HTML
+        document.
 
         Args:
-            blocks (List[Block]): Blockify'd HTML document.
+            documents (str): HTML document
 
         Returns:
-            ``np.ndarray``: 1D array of block-level, binary predictions for
-                content (1) or not-content (0).
+            ``np.ndarray``: array of binary predictions for content (1) or
+            not-content (0).
         """
-        features_mat = self.features.transform(blocks)
-        if self.prob_threshold is None:
-            return self.model.predict(features_mat)
+        # blockify
+        blocks = self.blockifier.blockify(document, encoding=encoding)
+        # get features
+        try:
+            features = self.features.transform(blocks)
+        except ValueError: # Can't make features, predict no content
+            preds = np.zeros((len(blocks)))
+        # make predictions
         else:
-            self._positive_idx = (
-                self._positive_idx or list(self.model.classes_).index(1))
-            return (self.model.predict_proba(features_mat) > self.prob_threshold)[:, self._positive_idx].astype(int)
+            if self.prob_threshold is None:
+                preds = self.model.predict(features)
+            else:
+                self._positive_idx = (
+                    self._positive_idx or list(self.model.classes_).index(1))
+                preds = self.model.predict_proba(features) > self.prob_threshold
+                preds = preds[:, self._positive_idx].astype(int)
+
+        if return_blocks:
+            return preds, blocks
+        else:
+            return preds
+
diff --git a/dragnet/features/weninger.py b/dragnet/features/weninger.py
index 0ad7937..ed383ab 100644
--- a/dragnet/features/weninger.py
+++ b/dragnet/features/weninger.py
@@ -20,7 +20,7 @@ class WeningerFeatures(BaseEstimator, TransformerMixin):
     __name__ = 'weninger'
 
     def __init__(self, sigma=1.0):
-        self.sigma = 1.0
+        self.sigma = sigma
 
     def fit(self, blocks, y=None):
         """
diff --git a/dragnet/model_training.py b/dragnet/model_training.py
index 4e4838f..ee8d888 100644
--- a/dragnet/model_training.py
+++ b/dragnet/model_training.py
@@ -4,6 +4,7 @@
 import logging
 import os
 import pprint
+import numpy as np
 
 from sklearn.externals import joblib
 from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
@@ -29,6 +30,13 @@ def evaluate_model_predictions(y_true, y_pred, weights=None):
     Returns:
         Dict[str, float]
     """
+    if isinstance(y_pred[0], np.ndarray):
+        y_pred = np.concatenate(y_pred)
+    if isinstance(y_true[0], np.ndarray):
+        y_true = np.concatenate(y_true)
+    if (weights is not None) and (isinstance(weights[0], np.ndarray)):
+        weights = np.concatenate(weights)
+
     accuracy = accuracy_score(
         y_true, y_pred, normalize=True, sample_weight=weights)
     precision = precision_score(
@@ -85,25 +93,27 @@ def train_model(extractor, data_dir, output_dir=None):
     # set up directories and file naming
     output_dir, fname_prefix = _set_up_output_dir_and_fname_prefix(output_dir, extractor)
 
-    # prepare, split, and concatenate the data
+    # prepare and split the data
     logging.info('preparing, splitting, and concatenating the data...')
     data = prepare_all_data(data_dir)
     training_data, test_data = train_test_split(
         data, test_size=0.2, random_state=42)
-    train_blocks, train_labels, train_weights = extractor.concatenate_data(training_data)
-    test_blocks, test_labels, test_weights = extractor.concatenate_data(test_data)
+    train_html, train_labels, train_weights = extractor.get_html_labels_weights(training_data)
+    test_html, test_labels, test_weights = extractor.get_html_labels_weights(test_data)
 
     # fit the extractor on training data
     # then evaluate it on train and test data
     logging.info('fitting and evaluating the extractor features and model...')
     try:
-        extractor.fit(train_blocks, train_labels, weights=train_weights)
+        extractor.fit(train_html, train_labels, weights=train_weights)
     except (TypeError, ValueError):
-        extractor.fit(train_blocks, train_labels)
+        extractor.fit(train_html, train_labels)
     train_eval = evaluate_model_predictions(
-        train_labels, extractor.predict(train_blocks))
+        np.concatenate(train_labels), extractor.predict(train_html),
+        np.concatenate(train_weights))
     test_eval = evaluate_model_predictions(
-        test_labels, extractor.predict(test_blocks))
+        np.concatenate(test_labels), extractor.predict(test_html),
+        np.concatenate(test_weights))
 
     # report model performance
     _report_model_performance(output_dir, fname_prefix, train_eval, test_eval)
@@ -149,36 +159,50 @@ def train_many_models(extractor, param_grid, data_dir, output_dir=None,
     # set up directories and file naming
     output_dir, fname_prefix = _set_up_output_dir_and_fname_prefix(output_dir, extractor)
 
-    # prepare, split, and concatenate the data
-    logging.info('preparing, splitting, and concatenating the data...')
+    # prepare and split the data
+    logging.info('preparing and splitting the data...')
     data = prepare_all_data(data_dir)
     training_data, test_data = train_test_split(
         data, test_size=0.2, random_state=42)
-    train_blocks, train_labels, train_weights = extractor.concatenate_data(training_data)
-    test_blocks, test_labels, test_weights = extractor.concatenate_data(test_data)
+    train_html, train_labels, train_weights = extractor.get_html_labels_weights(training_data)
+    test_html, test_labels, test_weights = extractor.get_html_labels_weights(test_data)
+
+    # filter docs we can't get features from
+    train_blocks = np.array([extractor.blockifier.blockify(doc)
+                            for doc in train_html])
+    train_mask = [extractor._has_enough_blocks(blocks) for blocks in train_blocks]
+    train_blocks = train_blocks[train_mask]
+    train_labels = np.concatenate(train_labels[train_mask])
+    train_weights = np.concatenate(train_weights[train_mask])
+    test_labels = np.concatenate(test_labels)
+    test_weights = np.concatenate(test_weights)
+    # get features
+    # TODO: This only 'fit's one doc at a time. No feature fitting actually
+    # happens for now, but this might be important if the features change
+    train_features = np.concatenate([extractor.features.fit_transform(blocks)
+                                    for blocks in train_blocks])
 
     # fit many models
     gscv = GridSearchCV(
-        extractor, param_grid, fit_params={'weights': train_weights},
+        extractor.model, param_grid, fit_params={'sample_weight': train_weights},
         scoring=kwargs.get('scoring', 'f1'), cv=kwargs.get('cv', 5),
         n_jobs=kwargs.get('n_jobs', 1), verbose=kwargs.get('verbose', 1))
-    gscv = gscv.fit(train_blocks, train_labels)
+    gscv = gscv.fit(train_features, train_labels)
 
     logging.info('Score of the best model, on left-out data: %s', gscv.best_score_)
     logging.info('Params of the best model: %s', gscv.best_params_)
 
     # evaluate best model on train and test data
-    best_extractor = gscv.best_estimator_
+    extractor.model = gscv.best_estimator_
     train_eval = evaluate_model_predictions(
-        train_labels, best_extractor.predict(train_blocks))
+        train_labels, extractor.predict(train_html[train_mask]), weights=train_weights)
     test_eval = evaluate_model_predictions(
-        test_labels, best_extractor.predict(test_blocks))
-    _report_model_performance(output_dir, fname_prefix, train_eval, test_eval)
+        test_labels, extractor.predict(test_html), weights=test_weights)
 
     # pickle the final model
-    _write_model_to_disk(output_dir, fname_prefix, best_extractor)
+    _write_model_to_disk(output_dir, fname_prefix, extractor)
 
-    return best_extractor
+    return extractor
 
 
 def _set_up_output_dir_and_fname_prefix(output_dir, extractor):
diff --git a/dragnet/pickled_models/py2_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_comments_block_errors.txt b/dragnet/pickled_models/py2_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_comments_block_errors.txt
new file mode 100644
index 0000000..a6a9cf3
--- /dev/null
+++ b/dragnet/pickled_models/py2_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_comments_block_errors.txt
@@ -0,0 +1,10 @@
+Training errors for final model (block level):
+{'accuracy': 0.8888867678537985,
+ 'f1': 0.8135523962386007,
+ 'precision': 0.8147783353737416,
+ 'recall': 0.8123301407281905}
+Test errors for final model (block level):
+{'accuracy': 0.9155827171056652,
+ 'f1': 0.8185729821740981,
+ 'precision': 0.8098381446406853,
+ 'recall': 0.8274983004758668}
\ No newline at end of file
diff --git a/dragnet/pickled_models/py2_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_comments_content_block_errors.txt b/dragnet/pickled_models/py2_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_comments_content_block_errors.txt
new file mode 100644
index 0000000..acc8b86
--- /dev/null
+++ b/dragnet/pickled_models/py2_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_comments_content_block_errors.txt
@@ -0,0 +1,10 @@
+Training errors for final model (block level):
+{'accuracy': 0.9205167323012377,
+ 'f1': 0.9404319043625426,
+ 'precision': 0.9184194567439528,
+ 'recall': 0.9635254409178062}
+Test errors for final model (block level):
+{'accuracy': 0.8960239564871967,
+ 'f1': 0.9161795830853575,
+ 'precision': 0.8729650256683729,
+ 'recall': 0.9638954889057517}
\ No newline at end of file
diff --git a/dragnet/pickled_models/py2_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_comments_content_model.pkl.gz b/dragnet/pickled_models/py2_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_comments_content_model.pkl.gz
index 521ebe8..f4d8bdf 100644
Binary files a/dragnet/pickled_models/py2_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_comments_content_model.pkl.gz and b/dragnet/pickled_models/py2_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_comments_content_model.pkl.gz differ
diff --git a/dragnet/pickled_models/py2_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_comments_model.pkl.gz b/dragnet/pickled_models/py2_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_comments_model.pkl.gz
index e7cb31b..80c294e 100644
Binary files a/dragnet/pickled_models/py2_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_comments_model.pkl.gz and b/dragnet/pickled_models/py2_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_comments_model.pkl.gz differ
diff --git a/dragnet/pickled_models/py2_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_content_block_errors.txt b/dragnet/pickled_models/py2_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_content_block_errors.txt
new file mode 100644
index 0000000..66b365c
--- /dev/null
+++ b/dragnet/pickled_models/py2_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_content_block_errors.txt
@@ -0,0 +1,10 @@
+Training errors for final model (block level):
+{'accuracy': 0.9049718380999011,
+ 'f1': 0.8650992736751364,
+ 'precision': 0.8685604952034155,
+ 'recall': 0.8616655286517149}
+Test errors for final model (block level):
+{'accuracy': 0.9211562671881685,
+ 'f1': 0.8931811181654694,
+ 'precision': 0.8712090865626898,
+ 'recall': 0.9162901000930941}
\ No newline at end of file
diff --git a/dragnet/pickled_models/py2_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_content_model.pkl.gz b/dragnet/pickled_models/py2_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_content_model.pkl.gz
index fc34f6f..ddfda23 100644
Binary files a/dragnet/pickled_models/py2_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_content_model.pkl.gz and b/dragnet/pickled_models/py2_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_content_model.pkl.gz differ
diff --git a/dragnet/pickled_models/py2_sklearn_0.18.0/kohlschuetter_readability_weninger_comments_block_errors.txt b/dragnet/pickled_models/py2_sklearn_0.18.0/kohlschuetter_readability_weninger_comments_block_errors.txt
new file mode 100644
index 0000000..6380ab1
--- /dev/null
+++ b/dragnet/pickled_models/py2_sklearn_0.18.0/kohlschuetter_readability_weninger_comments_block_errors.txt
@@ -0,0 +1,10 @@
+Training errors for final model (block level):
+{'accuracy': 0.8880447742482034,
+ 'f1': 0.811423824082008,
+ 'precision': 0.8157638153549375,
+ 'recall': 0.8071297672971197}
+Test errors for final model (block level):
+{'accuracy': 0.9135073030617857,
+ 'f1': 0.8150270281573802,
+ 'precision': 0.8024665939179312,
+ 'recall': 0.827986913664174}
\ No newline at end of file
diff --git a/dragnet/pickled_models/py2_sklearn_0.18.0/kohlschuetter_readability_weninger_comments_content_block_errors.txt b/dragnet/pickled_models/py2_sklearn_0.18.0/kohlschuetter_readability_weninger_comments_content_block_errors.txt
new file mode 100644
index 0000000..5376898
--- /dev/null
+++ b/dragnet/pickled_models/py2_sklearn_0.18.0/kohlschuetter_readability_weninger_comments_content_block_errors.txt
@@ -0,0 +1,10 @@
+Training errors for final model (block level):
+{'accuracy': 0.9181331474618691,
+ 'f1': 0.9387730533370016,
+ 'precision': 0.9149869162618192,
+ 'recall': 0.963828893783574}
+Test errors for final model (block level):
+{'accuracy': 0.8929707266393693,
+ 'f1': 0.9138891019551617,
+ 'precision': 0.8692234593397384,
+ 'recall': 0.9633937494039252}
\ No newline at end of file
diff --git a/dragnet/pickled_models/py2_sklearn_0.18.0/kohlschuetter_readability_weninger_comments_content_model.pkl.gz b/dragnet/pickled_models/py2_sklearn_0.18.0/kohlschuetter_readability_weninger_comments_content_model.pkl.gz
index 77fe70d..5b18125 100644
Binary files a/dragnet/pickled_models/py2_sklearn_0.18.0/kohlschuetter_readability_weninger_comments_content_model.pkl.gz and b/dragnet/pickled_models/py2_sklearn_0.18.0/kohlschuetter_readability_weninger_comments_content_model.pkl.gz differ
diff --git a/dragnet/pickled_models/py2_sklearn_0.18.0/kohlschuetter_readability_weninger_comments_model.pkl.gz b/dragnet/pickled_models/py2_sklearn_0.18.0/kohlschuetter_readability_weninger_comments_model.pkl.gz
index cdfe8e1..81043cf 100644
Binary files a/dragnet/pickled_models/py2_sklearn_0.18.0/kohlschuetter_readability_weninger_comments_model.pkl.gz and b/dragnet/pickled_models/py2_sklearn_0.18.0/kohlschuetter_readability_weninger_comments_model.pkl.gz differ
diff --git a/dragnet/pickled_models/py2_sklearn_0.18.0/kohlschuetter_readability_weninger_content_block_errors.txt b/dragnet/pickled_models/py2_sklearn_0.18.0/kohlschuetter_readability_weninger_content_block_errors.txt
new file mode 100644
index 0000000..4e9c77b
--- /dev/null
+++ b/dragnet/pickled_models/py2_sklearn_0.18.0/kohlschuetter_readability_weninger_content_block_errors.txt
@@ -0,0 +1,10 @@
+Training errors for final model (block level):
+{'accuracy': 0.9055403901546596,
+ 'f1': 0.8659474221784542,
+ 'precision': 0.869143906134945,
+ 'recall': 0.8627743637329225}
+Test errors for final model (block level):
+{'accuracy': 0.9199633319073519,
+ 'f1': 0.8913886502283953,
+ 'precision': 0.8708065436067614,
+ 'recall': 0.9129672539972683}
\ No newline at end of file
diff --git a/dragnet/pickled_models/py2_sklearn_0.18.0/kohlschuetter_readability_weninger_content_model.pkl.gz b/dragnet/pickled_models/py2_sklearn_0.18.0/kohlschuetter_readability_weninger_content_model.pkl.gz
index ffdb5aa..a28771b 100644
Binary files a/dragnet/pickled_models/py2_sklearn_0.18.0/kohlschuetter_readability_weninger_content_model.pkl.gz and b/dragnet/pickled_models/py2_sklearn_0.18.0/kohlschuetter_readability_weninger_content_model.pkl.gz differ
diff --git a/dragnet/pickled_models/py3_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_comments_block_errors.txt b/dragnet/pickled_models/py3_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_comments_block_errors.txt
index 05c85ad..14e0bd5 100644
--- a/dragnet/pickled_models/py3_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_comments_block_errors.txt
+++ b/dragnet/pickled_models/py3_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_comments_block_errors.txt
@@ -1,11 +1,10 @@
 Training errors for final model (block level):
-{'accuracy': 0.99960252445444053,
- 'f1': 0.99919343733401533,
- 'precision': 0.99933105103687092,
- 'recall': 0.99905586152635717}
-
+{'accuracy': 0.886500603377158,
+ 'f1': 0.8087575250472692,
+ 'precision': 0.8133641076752258,
+ 'recall': 0.8042028283945921}
 Test errors for final model (block level):
-{'accuracy': 0.89056809905316825,
- 'f1': 0.64719694746110934,
- 'precision': 0.77070954211814047,
- 'recall': 0.55780419934227166}
+{'accuracy': 0.9117667909307584,
+ 'f1': 0.8100755614489277,
+ 'precision': 0.8026590198123045,
+ 'recall': 0.8176304384772264}
\ No newline at end of file
diff --git a/dragnet/pickled_models/py3_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_comments_content_block_errors.txt b/dragnet/pickled_models/py3_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_comments_content_block_errors.txt
index 350457e..70ecd61 100644
--- a/dragnet/pickled_models/py3_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_comments_content_block_errors.txt
+++ b/dragnet/pickled_models/py3_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_comments_content_block_errors.txt
@@ -1,11 +1,10 @@
 Training errors for final model (block level):
-{'accuracy': 0.99948619014842321,
- 'f1': 0.99939039118482653,
- 'precision': 1.0,
- 'recall': 0.99878152516265495}
-
+{'accuracy': 0.9204290246339881,
+ 'f1': 0.9404201712810462,
+ 'precision': 0.9176159900970319,
+ 'recall': 0.964386676596891}
 Test errors for final model (block level):
-{'accuracy': 0.91469410050983246,
- 'f1': 0.87341259119156978,
- 'precision': 0.88187150456963581,
- 'recall': 0.86511441188277804}
+{'accuracy': 0.8981140377681355,
+ 'f1': 0.9177836561506915,
+ 'precision': 0.8752737205679842,
+ 'recall': 0.9646335850324057}
\ No newline at end of file
diff --git a/dragnet/pickled_models/py3_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_comments_content_model.pkl.gz b/dragnet/pickled_models/py3_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_comments_content_model.pkl.gz
new file mode 100644
index 0000000..6fed511
Binary files /dev/null and b/dragnet/pickled_models/py3_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_comments_content_model.pkl.gz differ
diff --git a/dragnet/pickled_models/py3_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_comments_model.pkl.gz b/dragnet/pickled_models/py3_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_comments_model.pkl.gz
new file mode 100644
index 0000000..f74f01d
Binary files /dev/null and b/dragnet/pickled_models/py3_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_comments_model.pkl.gz differ
diff --git a/dragnet/pickled_models/py3_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_content_block_errors.txt b/dragnet/pickled_models/py3_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_content_block_errors.txt
index 90b9add..30e68d5 100644
--- a/dragnet/pickled_models/py3_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_content_block_errors.txt
+++ b/dragnet/pickled_models/py3_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_content_block_errors.txt
@@ -1,11 +1,10 @@
 Training errors for final model (block level):
-{'accuracy': 0.9989820748223478,
- 'f1': 0.99711578079934071,
- 'precision': 0.9943026186041416,
- 'recall': 0.99994490661671531}
-
+{'accuracy': 0.9054717718032232,
+ 'f1': 0.8658442469426517,
+ 'precision': 0.8690781718068796,
+ 'recall': 0.8626343003542436}
 Test errors for final model (block level):
-{'accuracy': 0.87859613983976692,
- 'f1': 0.69106915324916018,
- 'precision': 0.58421464943204071,
- 'recall': 0.8457612702013042}
+{'accuracy': 0.9201588950681415,
+ 'f1': 0.8918900132071536,
+ 'precision': 0.8694900158764989,
+ 'recall': 0.9154746777382902}
\ No newline at end of file
diff --git a/dragnet/pickled_models/py3_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_content_model.pkl.gz b/dragnet/pickled_models/py3_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_content_model.pkl.gz
new file mode 100644
index 0000000..cc118a3
Binary files /dev/null and b/dragnet/pickled_models/py3_sklearn_0.15.2_0.17.1/kohlschuetter_readability_weninger_content_model.pkl.gz differ
diff --git a/dragnet/pickled_models/py3_sklearn_0.18.0/kohlschuetter_readability_weninger_comments_block_errors.txt b/dragnet/pickled_models/py3_sklearn_0.18.0/kohlschuetter_readability_weninger_comments_block_errors.txt
new file mode 100644
index 0000000..1567b66
--- /dev/null
+++ b/dragnet/pickled_models/py3_sklearn_0.18.0/kohlschuetter_readability_weninger_comments_block_errors.txt
@@ -0,0 +1,10 @@
+Training errors for final model (block level):
+{'accuracy': 0.8857617952624446,
+ 'f1': 0.8090955569752635,
+ 'precision': 0.8069922589144594,
+ 'recall': 0.8112098475156461}
+Test errors for final model (block level):
+{'accuracy': 0.9138055368819898,
+ 'f1': 0.8146103452264529,
+ 'precision': 0.8065278500780844,
+ 'recall': 0.8228564751869476}
\ No newline at end of file
diff --git a/dragnet/pickled_models/py3_sklearn_0.18.0/kohlschuetter_readability_weninger_comments_content_block_errors.txt b/dragnet/pickled_models/py3_sklearn_0.18.0/kohlschuetter_readability_weninger_comments_content_block_errors.txt
new file mode 100644
index 0000000..1a98308
--- /dev/null
+++ b/dragnet/pickled_models/py3_sklearn_0.18.0/kohlschuetter_readability_weninger_comments_content_block_errors.txt
@@ -0,0 +1,10 @@
+Training errors for final model (block level):
+{'accuracy': 0.9203423488216474,
+ 'f1': 0.9403472982866902,
+ 'precision': 0.917661608068621,
+ 'recall': 0.964183054177825}
+Test errors for final model (block level):
+{'accuracy': 0.8932151805903563,
+ 'f1': 0.9139981611675603,
+ 'precision': 0.8701278254676313,
+ 'recall': 0.9625271084462247}
\ No newline at end of file
diff --git a/dragnet/pickled_models/py3_sklearn_0.18.0/kohlschuetter_readability_weninger_comments_content_model.pkl.gz b/dragnet/pickled_models/py3_sklearn_0.18.0/kohlschuetter_readability_weninger_comments_content_model.pkl.gz
index a9c8416..9c13e60 100644
Binary files a/dragnet/pickled_models/py3_sklearn_0.18.0/kohlschuetter_readability_weninger_comments_content_model.pkl.gz and b/dragnet/pickled_models/py3_sklearn_0.18.0/kohlschuetter_readability_weninger_comments_content_model.pkl.gz differ
diff --git a/dragnet/pickled_models/py3_sklearn_0.18.0/kohlschuetter_readability_weninger_comments_model.pkl.gz b/dragnet/pickled_models/py3_sklearn_0.18.0/kohlschuetter_readability_weninger_comments_model.pkl.gz
index f556114..baf41c7 100644
Binary files a/dragnet/pickled_models/py3_sklearn_0.18.0/kohlschuetter_readability_weninger_comments_model.pkl.gz and b/dragnet/pickled_models/py3_sklearn_0.18.0/kohlschuetter_readability_weninger_comments_model.pkl.gz differ
diff --git a/dragnet/pickled_models/py3_sklearn_0.18.0/kohlschuetter_readability_weninger_content_block_errors.txt b/dragnet/pickled_models/py3_sklearn_0.18.0/kohlschuetter_readability_weninger_content_block_errors.txt
new file mode 100644
index 0000000..e14fd9d
--- /dev/null
+++ b/dragnet/pickled_models/py3_sklearn_0.18.0/kohlschuetter_readability_weninger_content_block_errors.txt
@@ -0,0 +1,10 @@
+Training errors for final model (block level):
+{'accuracy': 0.904718001792332,
+ 'f1': 0.8644470492730987,
+ 'precision': 0.8697976287363944,
+ 'recall': 0.8591618957578304}
+Test errors for final model (block level):
+{'accuracy': 0.9226914380003667,
+ 'f1': 0.8950468762963577,
+ 'precision': 0.8747291809914246,
+ 'recall': 0.9163308712108342}
\ No newline at end of file
diff --git a/dragnet/pickled_models/py3_sklearn_0.18.0/kohlschuetter_readability_weninger_content_model.pkl.gz b/dragnet/pickled_models/py3_sklearn_0.18.0/kohlschuetter_readability_weninger_content_model.pkl.gz
index 0c41d75..888a054 100644
Binary files a/dragnet/pickled_models/py3_sklearn_0.18.0/kohlschuetter_readability_weninger_content_model.pkl.gz and b/dragnet/pickled_models/py3_sklearn_0.18.0/kohlschuetter_readability_weninger_content_model.pkl.gz differ