From 52e8eff985fdf75612837cef4d9ef55ad60f29ad Mon Sep 17 00:00:00 2001
From: Michael Lissner <mlissner@michaeljaylissner.com>
Date: Mon, 22 Sep 2014 18:10:30 -0700
Subject: [PATCH] Finishes the re-write for bulk files, closing #285.

---
 alert/api/management/__init__.py              |   0
 alert/api/management/commands/__init__.py     |   0
 .../management/commands/cl_make_bulk_data.py  | 115 ++++++++
 alert/api/tests.py                            | 100 +------
 alert/api/urls.py                             |  19 +-
 alert/api/views.py                            | 100 +------
 alert/assets/templates/api/bulk-data.html     | 199 +++++++++++++
 alert/assets/templates/api/dumps.html         | 151 ----------
 alert/audio/urls.py                           |   8 +-
 .../commands/cl_send_donation_reminders.py    |   2 -
 alert/dump_all_cases.py                       |  20 --
 alert/lib/dump_lib.py                         | 264 ------------------
 alert/lib/search_utils.py                     |  10 +-
 alert/lib/timer.py                            |  18 +-
 alert/scrapers/tests.py                       |  21 +-
 alert/search/api.py                           | 128 +++++----
 alert/search/api2.py                          | 179 ++++++++----
 alert/search/forms.py                         |  13 +-
 alert/search/urls.py                          |  11 +-
 alert/search/views.py                         |  10 +-
 alert/settings/10-public.py                   |   2 +-
 alert/urls.py                                 |   1 -
 apache/courtlistener.com.conf                 |   2 +-
 upgrade.txt                                   |  33 ++-
 24 files changed, 610 insertions(+), 796 deletions(-)
 create mode 100644 alert/api/management/__init__.py
 create mode 100644 alert/api/management/commands/__init__.py
 create mode 100644 alert/api/management/commands/cl_make_bulk_data.py
 create mode 100644 alert/assets/templates/api/bulk-data.html
 delete mode 100644 alert/assets/templates/api/dumps.html
 delete mode 100644 alert/dump_all_cases.py
 delete mode 100644 alert/lib/dump_lib.py

diff --git a/alert/api/management/__init__.py b/alert/api/management/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/alert/api/management/commands/__init__.py b/alert/api/management/commands/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/alert/api/management/commands/cl_make_bulk_data.py b/alert/api/management/commands/cl_make_bulk_data.py
new file mode 100644
index 0000000000..aac5810a69
--- /dev/null
+++ b/alert/api/management/commands/cl_make_bulk_data.py
@@ -0,0 +1,115 @@
+import StringIO
+import os
+import shutil
+import tarfile
+import time
+import errno
+
+from alert.lib.db_tools import queryset_generator
+from alert.lib.timer import print_timing
+from alert.search.models import Court, Document
+from django.core.management import BaseCommand
+from django.conf import settings
+from audio.models import Audio
+
+
+def mkdir_p(path):
+    """Makes a directory path, but doesn't crash if the path already exists."""
+    try:
+        os.makedirs(path)
+    except OSError as exc:  # Python >2.5
+        if exc.errno == errno.EEXIST and os.path.isdir(path):
+            pass
+        else:
+            raise
+
+
+class Command(BaseCommand):
+    help = 'Create the bulk files for all jurisdictions and for "all".'
+
+    def handle(self, *args, **options):
+        self.do_everything()
+
+    @print_timing
+    def do_everything(self):
+        """We can't wrap the handle() function, but we can wrap this one."""
+        from alert.search import api2
+        self.stdout.write('Starting bulk file creation...\n')
+        arg_tuples = (
+            ('opinion', Document, api2.DocumentResource),
+            ('oral-argument', Audio, api2.OralArgumentResource),
+        )
+        for obj_type_str, obj_type, api_resource_obj in arg_tuples:
+            self.make_archive(obj_type_str, obj_type, api_resource_obj)
+            self.swap_archives(obj_type_str)
+        self.stdout.write('Done.\n\n')
+
+    def swap_archives(self, obj_type_str):
+        """Swap out new archives for the old."""
+        self.stdout.write(' - Swapping in the new %s archives...\n'
+                          % obj_type_str)
+        mkdir_p(os.path.join(settings.DUMP_DIR, '%s' % obj_type_str))
+        for f in os.listdir('/tmp/bulk/%s' % obj_type_str):
+            shutil.move('/tmp/bulk/%s/%s' % (obj_type_str, f),
+                        os.path.join(settings.DUMP_DIR, '%ss' % obj_type_str))
+
+    def make_archive(self, obj_type_str, obj_type, api_resource_obj):
+        """Generate compressed archives containing the contents of an object
+        database.
+
+        There are a few tricks to this, but the main one is that each item in
+        the database goes into two files, all.tar.gz and {court}.tar.gz. This
+        means that if we want to avoid iterating the database once per file,
+        we need to generate all 350+ jurisdiction files simultaneously.
+
+        We do this by making a dict of open file handles and adding each item
+        to the correct two files: The all.tar.gz file and the {court}.tar.gz
+        file.
+        """
+        courts = Court.objects.all()
+        self.stdout.write(' - Creating %s bulk %s files '
+                          'simultaneously...\n' % (len(courts), obj_type_str))
+
+        mkdir_p('/tmp/bulk/%s' % obj_type_str)
+
+        # Open a gzip'ed tar file for every court
+        tar_files = {}
+        for court in courts:
+            tar_files[court.pk] = tarfile.open(
+                '/tmp/bulk/%s/%s.tar.gz' % (obj_type_str, court.pk),
+                mode='w:gz'
+            )
+        tar_files['all'] = tarfile.open(
+            '/tmp/bulk/%s/all.tar.gz' % obj_type_str,
+            mode='w:gz'
+        )
+
+        # Make the archives
+        qs = obj_type.objects.all()
+        item_resource = api_resource_obj()
+        item_list = queryset_generator(qs)
+        for item in item_list:
+            json_str = item_resource.serialize(
+                None,
+                item_resource.full_dehydrate(
+                    item_resource.build_bundle(obj=item)),
+                'application/json',
+            ).encode('utf-8')
+
+            # Add the json str to the two tarballs
+            tarinfo = tarfile.TarInfo("%s.json" % item.pk)
+            tarinfo.size = len(json_str)
+            tarinfo.mtime = time.mktime(item.date_modified.timetuple())
+            tarinfo.type = tarfile.REGTYPE
+
+            tar_files[item.docket.court_id].addfile(
+                tarinfo, StringIO.StringIO(json_str))
+            tar_files['all'].addfile(
+                tarinfo, StringIO.StringIO(json_str))
+
+        # Close off all the gzip'ed tar files
+        for court in courts:
+            tar_files[court.pk].close()
+        tar_files['all'].close()
+
+        self.stdout.write(' - all %s bulk files created.\n' % obj_type_str)
diff --git a/alert/api/tests.py b/alert/api/tests.py
index bb8bafc978..0facc0fb07 100644
--- a/alert/api/tests.py
+++ b/alert/api/tests.py
@@ -1,11 +1,8 @@
 from datetime import timedelta
-import os
-import time
-from django.conf import settings
 from django.test import TestCase
 from django.utils.timezone import now
-from alert.lib.dump_lib import make_dump_file
 from alert.search.models import Docket, Citation, Court, Document
+from api.management.commands.cl_make_bulk_data import Command
 
 
 class BulkDataTest(TestCase):
@@ -28,98 +25,9 @@ def setUp(self):
         )
         self.doc.save(index=False)
 
-        self.day = last_month.day
-        self.month = last_month.month
-        self.year = last_month.year
-        self.now = now().date()
-
     def tearDown(self):
         self.doc.delete()
 
-    def test_no_year_provided_with_court_provided(self):
-        """When a user doesn't provide a year and wants everything for a
-        particular court, do we properly throw a 400 error?
-        """
-        r = self.client.get('/api/bulk/test.xml.gz')
-        self.assertEqual(
-            r.status_code,
-            400,
-            msg="Should have gotten HTTP code 400. Instead got: %s" % r.status_code
-        )
-
-    def test_no_year_provided_all_courts_requested(self):
-        """If a user requests everything, do we give it to them?"""
-        start_moment = time.time()
-        qs = Document.objects.all()
-        filename = 'all.xml'
-        make_dump_file(qs, settings.DUMP_DIR, filename)
-        r = self.client.get('/api/bulk/all.xml.gz')
-
-        # Normally, the redirect hands the user off to Apache, which serves the file.
-        # Since we don't always have apache set up, we make sure we get redirected and
-        # we check that the file exists on disk with a non-zero filesize.
-        self.assertEqual(
-            r.status_code,
-            302,
-            msg="Redirection to bulk file failed."
-        )
-        file_path = os.path.join(settings.DUMP_DIR, filename + '.gz')
-        self.assertGreater(
-            os.path.getsize(file_path),
-            0,
-            msg="Bulk data file does not have content."
-        )
-        self.assertGreater(
-            os.stat(file_path).st_mtime,
-            start_moment,
-            msg="File was created before the test was run, indicating it predates this test."
-        )
-
-    def test_year_based_bulk_file(self):
-        """Do we generate and provide year-based bulk files properly?"""
-        r = self.client.get('/api/bulk/%s/test.xml.gz' % self.year)
-        self.assertEqual(r.status_code, 302, msg="Got status code of %s with content: %s" %
-                                                 (r.status_code, r.content))
-
-    def test_month_based_bulk_file(self):
-        """Do we generate and provide month-based bulk files properly?"""
-        r = self.client.get('/api/bulk/%s/%s/test.xml.gz' % (self.year, self.month))
-        self.assertEqual(r.status_code, 302, msg="Got status code of %s with content: %s" %
-                                                 (r.status_code, r.content))
-
-    def test_day_based_bulk_file_twice(self):
-        """Do we generate and provide day-based bulk files properly?
-
-        When they come from the cache the second time, does it still work?
-        """
-        r = self.client.get('/api/bulk/%s/%s/%s/test.xml.gz' % (self.year, self.month, self.day))
-        self.assertEqual(r.status_code, 302, msg="Got status code of %s with content: %s" %
-                                                 (r.status_code, r.content))
-        # 2x!
-        r = self.client.get('/api/bulk/%s/%s/%s/test.xml.gz' % (self.year, self.month, self.day))
-        self.assertEqual(r.status_code, 302, msg="Got status code of %s with content: %s" %
-                                                 (r.status_code, r.content))
-
-    def test_month_not_yet_complete(self):
-        """A limitation is that we do not serve files until the month is complete.
-
-        Do we throw the proper error when this is the case?
-        """
-        r = self.client.get('/api/bulk/%s/%s/test.xml.gz' % (self.now.year, self.now.month))
-        self.assertEqual(r.status_code, 400)
-        self.assertIn('partially in the future', r.content, msg="Did not get correct error message. "
-                                                                "Instead got: %s" % r.content)
-
-    def test_month_completely_in_the_future(self):
-        """Do we throw an error when a date in the future is requested?"""
-        r = self.client.get('/api/bulk/%s/%s/test.xml.gz' % (self.now.year + 1, self.now.month))
-        self.assertEqual(r.status_code, 400)
-        self.assertIn('date is in the future', r.content, msg="Did not get correct error message. "
-                                                              "Instead got: %s" % r.content)
-
-    def test_no_data_for_time_period(self):
-        """If we lack data for a period of time, do we throw an error?"""
-        r = self.client.get('/api/bulk/1982/06/09/test.xml.gz')
-        self.assertEqual(r.status_code, 404)
-        self.assertIn('not have any data', r.content, msg="Did not get correct error message. "
-                                                          "Instead got: %s" % r.content)
+    def test_make_all_bulk_files(self):
+        """Can we successfully generate all bulk files?"""
+        Command.do_everything()
diff --git a/alert/api/urls.py b/alert/api/urls.py
index e48b748917..66467b8303 100644
--- a/alert/api/urls.py
+++ b/alert/api/urls.py
@@ -1,27 +1,22 @@
 from alert.api.views import (
     court_index, documentation_index, dump_index, rest_index,
-    serve_or_gen_dump, serve_pagerank_file, coverage_data
+    serve_pagerank_file, coverage_data
 )
+
 from alert.urls import pacer_codes
 from django.conf.urls import patterns
 
 urlpatterns = patterns('',
+    # Documentation
     (r'^api/$', documentation_index),
     (r'^api/jurisdictions/$', court_index),
     (r'^api/rest-info/$', rest_index),
     (r'^api/bulk-info/$', dump_index),
-    (r'^api/bulk/(?P<court>all|%s)\.xml\.gz$' % "|".join(pacer_codes),
-     serve_or_gen_dump),
-    (r'^api/bulk/(?P<year>\d{4})/(?P<court>all|%s)\.xml\.gz$' % "|".join(
-        pacer_codes),
-     serve_or_gen_dump),
-    (r'^api/bulk/(?P<year>\d{4})/(?P<month>\d{1,2})/(?P<court>all|%s)\.xml\.gz$' % "|".join(
-            pacer_codes),
-        serve_or_gen_dump),
-    (r'^api/bulk/(?P<year>\d{4})/(?P<month>\d{1,2})/(?P<day>\d{1,2})/(?P<court>all|%s)\.xml\.gz$' % "|".join(
-            pacer_codes),
-        serve_or_gen_dump),
+
+    # Pagerank file
     (r'^api/bulk/external_pagerank/$', serve_pagerank_file),
+
+    # Coverage API
     (r'^api/rest/v[12]/coverage/(all|%s)/' % '|'.join(pacer_codes),
      coverage_data),
 )
diff --git a/alert/api/views.py b/alert/api/views.py
index ce726ff9fc..beecfb1f2c 100644
--- a/alert/api/views.py
+++ b/alert/api/views.py
@@ -1,21 +1,15 @@
 import json
 import os
-from django.conf import settings
 
 from alert import settings
-from alert.lib import search_utils, magic
-from alert.lib.db_tools import queryset_generator_by_date
-from alert.lib.dump_lib import make_dump_file
-from alert.lib.dump_lib import get_date_range
+from alert.lib import magic
 from alert.lib.filesize import size
-from alert.lib.sunburnt import sunburnt
-from alert.search.models import Court, Document
+from alert.search.models import Court
 from alert.stats import tally_stat
 
-from django.http import HttpResponseBadRequest, Http404, HttpResponse, HttpResponseRedirect
+from django.http import Http404, HttpResponse, HttpResponseRedirect
 from django.shortcuts import render_to_response
 from django.template import RequestContext
-from django.utils.timezone import now
 from lib import search_utils
 from lib.sunburnt import sunburnt
 
@@ -94,85 +88,19 @@ def dump_index(request):
     courts = make_court_variable()
     court_count = len(courts)
     try:
-        dump_size = size(os.path.getsize(os.path.join(settings.DUMP_DIR, 'all.xml.gz')))
+        dump_size = size(os.path.getsize(
+            os.path.join(settings.DUMP_DIR, 'all.xml.gz')))
     except os.error:
         # Happens when the file is inaccessible or doesn't exist. An estimate.
-        dump_size = '13GB'
-    return render_to_response('api/dumps.html',
-                              {'court_count': court_count,
-                               'courts': courts,
-                               'dump_size': dump_size,
-                               'private': False},
-                              RequestContext(request))
-
-
-def serve_or_gen_dump(request, court, year=None, month=None, day=None):
-    """Serves the dump file to the user, generating it if needed."""
-    if year is None:
-        if court != 'all':
-            # Sanity check
-            return HttpResponseBadRequest('<h2>Error 400: Complete dumps are '
-                                          'not available for individual courts. Try using "all" for '
-                                          'your court ID instead.</h2>')
-        else:
-            # Serve the dump for all cases.
-            tally_stat('bulk_data.served.all')
-            return HttpResponseRedirect('/dumps/all.xml.gz')
-
-    else:
-        # Date-based dump
-        start_date, end_date, annual, monthly, daily = get_date_range(year, month, day)
-
-        today = now().date()
-        # Ensure that it's a valid request.
-        if (today < end_date) and (today < start_date):
-            # It's the future. They fail.
-            return HttpResponseBadRequest('<h2>Error 400: Requested date is in the future. Please try again then.</h2>')
-        elif today <= end_date:
-            # Some of the data is in the past, some could be in the future.
-            return HttpResponseBadRequest('<h2>Error 400: Requested date is partially in the future. Please try again '
-                                          'then.</h2>')
-
-    filename = court + '.xml'
-    if daily:
-        filepath = os.path.join(year, month, day)
-    elif monthly:
-        filepath = os.path.join(year, month)
-    elif annual:
-        filepath = os.path.join(year)
-
-    path_from_root = os.path.join(settings.DUMP_DIR, filepath)
-
-    # See if we already have it on disk.
-    try:
-        _ = open(os.path.join(path_from_root, filename + '.gz'), 'rb')
-        tally_stat('bulk_data.served.by_date')
-        return HttpResponseRedirect(os.path.join('/dumps', filepath, filename + '.gz'))
-    except IOError:
-        # Time-based dump
-        if court == 'all':
-            # dump everything; disable default ordering
-            qs = Document.objects.all().order_by()
-        else:
-            # dump just the requested court; disable default ordering
-            qs = Document.objects.filter(docket__court=court).order_by()
-
-        # check if there are any documents at all
-        dump_has_docs = qs.filter(date_filed__gte=start_date,
-                                  date_filed__lte=end_date).exists()
-        if dump_has_docs:
-            docs_to_dump = queryset_generator_by_date(qs,
-                                                      'date_filed',
-                                                      start_date,
-                                                      end_date)
-
-            make_dump_file(docs_to_dump, path_from_root, filename)
-        else:
-            return HttpResponseBadRequest('<h2>Error 404: We do not have any data for this time period.</h2>',
-                                          status=404)
-
-        tally_stat('bulk_data.served.by_date')
-        return HttpResponseRedirect('%s.gz' % os.path.join('/dumps', filepath, filename))
+        dump_size = 'about 13GB'
+    return render_to_response(
+        'api/bulk-data.html',
+        {'court_count': court_count,
+         'courts': courts,
+         'dump_size': dump_size,
+         'private': False},
+        RequestContext(request)
+    )
 
 
 def serve_pagerank_file(request):
diff --git a/alert/assets/templates/api/bulk-data.html b/alert/assets/templates/api/bulk-data.html
new file mode 100644
index 0000000000..2d653bd58d
--- /dev/null
+++ b/alert/assets/templates/api/bulk-data.html
@@ -0,0 +1,199 @@
+{% extends "base.html" %}
+
+{% block title %}Bulk Data - CourtListener.com{% endblock %}
+{% block search-form %}{% endblock %}
+
+{% block sidebar %}{% endblock %}
+
+{% block content %}
+    <div class="span-13 append-2">
+        <h2 id="about">Bulk Data</h2>
+
+        <p>For hackers, legal analysts and anybody else that might want
+            them, we provide bulk files containing all of our data. Several
+            types of files are available as listed below, but in general the
+            files that are available correspond to the major types of data we
+            have in our database (presently, Opinions and Oral Arguments, but
+            we expect this to slowly expand).
+        </p>
+
+        <h3 id="citegeist">The CiteGeist Bulk Data File</h3>
+
+        <p>
+            On the 15th of each month, we re-generate the
+            <a href="http://freelawproject.org/2013/11/12/courtlistener-improves-search-results-thanks-to-volunteer-contributor/">
+                CiteGeist scores
+            </a>
+            for the entire collection. Since a single new citation can have a
+            ripple effect across the entire citation network, we store these
+            values in a flat file rather than in our database. This saves us
+            from having to update millions of records every month.
+        </p>
+
+        <p>
+            This file can be obtained with:
+        </p>
+        <blockquote>
+            <code>curl -O
+                https://www.courtlistener.com/api/bulk/external_pagerank/</code>
+        </blockquote>
+
+        <p>When inspecting this file, you will find two columns of data. The
+            first column corresponds to the ID numbers of the items in our
+            opinion database, and the second value corresponds to the CiteGeist
+            score for that item.
+        </p>
+
+
+        <h3 id="bulk-data">Bulk Data Files for Opinions and Oral Arguments</h3>
+
+        <p>
+            Two types of bulk file are available for each type of content in
+            CourtListener. The first is a bulk file containing everything in
+            the system for that type of data. The second is a
+            jurisdiction-based file that only has the content for a certain
+            jurisdiction. In general, the scheme for the bulk files is as
+            follows:
+        </p>
+        <blockquote>
+            <code>https://www.courtlistener.com/api/bulk-data/<strong>$data-type</strong>/<strong>$jurisdiction</strong>.tar.gz</code>
+        </blockquote>
+
+        <p>Some examples:</p>
+        <dl>
+            <dt>All opinions from the First Circuit of Appeals (ca1):</dt>
+            <dd><a href="/api/bulk-data/opinion/ca1.tar.gz">https://www.courtlistener.com/api/bulk-data/<strong>opinion</strong>/<strong>ca1</strong>.tar.gz</a>
+            </dd>
+            <dt>All oral arguments from the Second Circuit of Appeals (ca2):
+            </dt>
+            <dd><a href="/api/bulk-data/oral-argument/ca2.tar.gz">https://www.courtlistener.com/api/bulk-data/<strong>oral-argument</strong>/<strong>ca2</strong>.tar.gz</a>
+            </dd>
+            <dt>All opinions from all jurisdictions:</dt>
+            <dd><a href="/api/bulk-data/opinion/all.tar.gz">https://www.courtlistener.com/api/bulk-data/<strong>opinion</strong>/<strong>all</strong>.tar.gz</a>
+            </dd>
+        </dl>
+        <p>A list of all current jurisdictions is on the right and we regularly
+            add new jurisdictions. To monitor for new jurisdictions, you may
+            want to look at the <a
+                    href="/api/rest-info/#jurisdiction-endpoint">Jurisdiction
+                endpoint</a> of the REST API.
+        </p>
+
+        <h4>What To Expect in the Bulk Files</h4>
+
+        <p>
+            These files are generated using the <a href="/api/rest-info/">REST
+            API</a> and follow the schemas described there. The files inside
+            the tar archives have names corresponding to the ID of each item,
+            and are formatted as JSON. If you wish to see a sample file, we
+            advise selecting a small or secretive jurisdiction (such as the
+            FISA court) and using that to get an idea of what the bulk files
+            contain.
+        </p>
+
+        <h4>Generation Times</h4>
+
+        <p>
+            As can be seen on the public <a
+                href="https://www.google.com/calendar/embed?src=michaeljaylissner.com_fvcq09gchprghkghqa69be5hl0@group.calendar.google.com&ctz=America/Los_Angeles">CourtListener
+            maintenance calendar</a>,
+            bulk data files are regenerated on the last day of every month
+            beginning at 3AM PST. Generation can take many hours, but in
+            general is expected to conclude before the 1st of each month. On
+            the last day of the month, we do not guarantee that you will get
+            either the new or old archives, as archives are updated in place as
+            their generation completes. In other words, on the last day of the
+            month, do not count on getting up-to-date information until the
+            next day.
+        </p>
+
+        <h3>Donations</h3>
+
+        <p><a href="http://freelawproject.org">Free Law Project</a> has been
+            providing bulk data for many years but is opposed to charging for
+            public domain data. However, if you find these files valuable to
+            your work and are able, we ask that you seriously consider how
+            much they might cost otherwise and consider making a
+            <a href="/donate/">donation</a> in a similar amount. Free Law
+            Project is a California non-profit and we rely on your support to
+            survive.
+        </p>
+
+        <h3>Adding Features and Fixing Bugs</h3>
+        <p>Like all Free Law Project initiatives, CourtListener is an open
+            source project. If you are a developer and you notice bugs or
+            missing features, we enthusiastically welcome your contributions
+            <a href="https://github.com/freelawproject/courtlistener/tree/master/alert/api/management/commands/cl_make_bulk_data.py">on
+                Github</a>.
+        </p>
+        <p>Unfortunately, there are always more bugs than time.</p>
+
+        <h3>Obsoleted Bulk Data APIs</h3>
+
+        <p>In the past, bulk data files were available by day, month, or year
+            for every jurisdiction, and a single file was available containing
+            all data. Without community objection, these APIs <a
+                    href="https://github.com/freelawproject/courtlistener/issues/285">were
+                sunsetted</a>
+            in the fall of 2014.
+        </p>
+    </div>
+    <div class="span-9 last">
+        <div class="span-9 last sidebar-section">
+            <h3><span>Available Jurisdictions</span></h3>
+
+            <p>
+                We currently have <strong>{{ court_count }}</strong> courts
+                that can be accessed with our APIs. Details
+                about the jurisdictions that are
+                available can be found <a href="/api/jurisdictions/">here</a>.
+            </p>
+
+            <p>Below is a cheat sheet listing the abbreviations for all
+                available jurisdictions.</p>
+
+            <div id="scrollable-jurisdictions">
+                <table class="settings-table">
+                    <thead>
+                    <tr>
+                        <th>Name</th>
+                        <th title="Gathered from Blue Book, Cornell.edu and ALWD">
+                            Abbreviation
+                        </th>
+                        <th title="The number of cases in this court on CourtListener">
+                            Count
+                        </th>
+                    </tr>
+                    </thead>
+                    <tbody>
+                    {% for court in courts %}
+                        <tr>
+                            <td>{{ court.full_name }}</td>
+                            <td>
+                                <a href="/?q=&court_{{ court.pk }}=on&order_by=score+desc">
+                                    {{ court.pk }}
+                                </a>
+                            </td>
+                            <td>{{ court.count }}</td>
+                        </tr>
+                    {% endfor %}
+                    </tbody>
+                </table>
+            </div>
+        </div>
+        <div class="span-9 last sidebar-section">
+            <h3 id="copyright"><span>Copyright</span></h3>
+
+            <p>
+                Our bulk data files are free of known copyright
+                restrictions.<br/>
+                <a rel="license"
+                   href="http://creativecommons.org/publicdomain/mark/1.0/">
+                    <img src="https://i.creativecommons.org/p/mark/1.0/88x31.png"
+                         alt="Public Domain Mark" height="31"
+                         width="88"/>
+                </a>
+            </p>
+        </div>
+    </div>
+{% endblock %}
diff --git a/alert/assets/templates/api/dumps.html b/alert/assets/templates/api/dumps.html
deleted file mode 100644
index 7331c25a89..0000000000
--- a/alert/assets/templates/api/dumps.html
+++ /dev/null
@@ -1,151 +0,0 @@
-{% extends "base.html" %}
-
-{% block title %}Bulk Data - CourtListener.com{% endblock %}
-{% block search-form %}{% endblock %}
-
-{% block sidebar %}{% endblock %}
-
-{% block content %}
-    <div class="span-13 append-2">
-        <h2 id="about">Bulk Data</h2>
-
-        <p>For hackers and legal analysts, we provide bulk files containing our data. Two types of files are available.
-            The first is a single file containing the CiteGeist scores for all items in our collection. The second are
-            XML files containing large sets of data.
-        </p>
-
-        <h3 id="citegeist">The CiteGeist Bulk Data File</h3>
-        <p>
-            On the 15th of each month, we re-generate the
-            <a href="http://freelawproject.org/2013/11/12/courtlistener-improves-search-results-thanks-to-volunteer-contributor/">
-                CiteGeist scores
-            </a>
-            for the entire collection. Since a single new citation can have a ripple effect across the entire citation
-            network, we store these values in a flat file rather than in our database. This saves us from having to
-            update millions of records every month.
-        </p>
-        <p>
-            This file can be obtained with:
-        </p>
-        <blockquote>
-            <code>curl -O https://www.courtlistener.com/api/bulk/external_pagerank/</code>
-        </blockquote>
-
-
-        <h3 id="bulk-data">Bulk Data Files</h3>
-        <p>
-            XML Bulk data files are available by the year, month and day. For each time period, individual bulk
-            data files are available for each court, as well as a single file containing data from
-            all courts. We generate these files the first time they are requested, so some files may be very fast,
-            while others may need to be created for you, which can take a moment.
-        </p>
-
-        <p>The XML information in the files should be self-explanatory, but we welcome discussion
-            in <a href="http://lists.freelawproject.org/cgi-bin/mailman/listinfo/dev">
-                our developer forum</a> (preferred) or via our <a href="/contact/">contact page</a>.
-        </p>
-
-        <p>All time stamps are Pacific Standard Time.</p>
-
-
-        <h4 id="usage">Requesting the Bulk Data Files</h4>
-
-        <p>Annual, monthly or daily bulk data files can be accessed at
-            https://www.courtlistener.com/api/bulk/<strong>year/month/day/court</strong>.xml.gz.</p>
-
-        <p>
-            For example, let's look at ways to access the First Circuit of Appeals (<strong>ca1</strong>):
-        </p>
-        <ul>
-            <li>All ca1 cases from 2009: <a href="/api/bulk/2009/ca1.xml.gz">
-                https://www.courtlistener.com/api/bulk/<strong>2009/ca1</strong>.xml.gz</a></li>
-            <li>All ca1 cases from June 2009: <a href="/api/bulk/2009/06/ca1.xml.gz">
-                https://www.courtlistener.com/api/bulk/2009/<strong>06</strong>/ca1.xml.gz</a></li>
-            <li>All ca1 cases from June 9, 2009: <a href="/api/bulk/2009/06/09/ca1.xml.gz">
-                https://www.courtlistener.com/api/bulk/2009/06/<strong>09</strong>/ca1.xml.gz</a></li>
-        </ul>
-
-        <p>If you would like <strong>all</strong> cases for a given time period,
-            you can use <strong>all</strong> for the court name. For example,
-            <a href="/api/bulk/2009/06/09/all.xml.gz">
-                https://www.courtlistener.com/api/bulk/2009/06/09/<strong>all</strong>.xml.gz</a>
-            returns all of the cases from June 9, 2009 (across all courts).</p>
-
-        <p>We also provide a bulk data file of all cases up through the last day
-            of the previous month. To obtain this file, simply omit the date
-            in your query, and use <strong>all</strong> for the court name:
-            <a href="/api/bulk/all.xml.gz">https://www.courtlistener.com/api/bulk/all.xml.gz</a>.
-            This file is very large ({{ dump_size }}). It's currently not possible to obtain complete
-            data files for individual courts, due the processing required to generate such files.</p>
-
-        <p>On the backend, bulk data files are generated when a GET request is placed on an /api/bulk/ endpoint
-            and once the file is generated, you are redirected to its location on our server's disk.
-            If the file was previously generated, you will be redirected immediately to a cached copy.
-            This architecture is necessary on our backend and means that consumers of this API will need
-            to automatically follow 302 redirects. If you are using cURL, this can be accomplished with
-            the <code>-L</code> flag, and saving binaries can be done with the <code>-O</code> flag.
-            Thus a complete GET request might look like:</p>
-
-        <p><code>curl -L -O https://www.courtlistener.com/api/bulk/2009/06/09/ca9.xml.gz</code></p>
-
-        <p>
-            If you are interested in maintaining your system in sync with our data, you should look at
-            <a href="/api/rest-info/">our REST API</a>, which provides resources ordered by modification date.
-        </p>
-
-        <p>
-            Note that prior to November, 2013 these endpoints were previously located at <code>/dump-api/</code>. The
-            old location will redirect you as necessary, but note that it will eventually go away.
-        </p>
-
-    </div>
-    <div class="span-9 last">
-        <div class="span-9 last sidebar-section">
-            <h3><span>Available Jurisdictions</span></h3>
-
-            <p>
-                We currently have <strong>{{ court_count }}</strong> courts that can be accessed with our APIs. Details
-                about the jurisdictions that are
-                available can be found <a href="/api/jurisdictions/">here</a>.
-            </p>
-
-            <p>Below is a cheat sheet listing the abbreviations for all available jurisdictions.</p>
-
-            <div id="scrollable-jurisdictions">
-                <table class="settings-table">
-                    <thead>
-                    <tr>
-                        <th>Name</th>
-                        <th title="Gathered from Blue Book, Cornell.edu and ALWD">Abbreviation</th>
-                        <th title="The number of cases in this court on CourtListener">Count</th>
-                    </tr>
-                    </thead>
-                    <tbody>
-                    {% for court in courts %}
-                        <tr>
-                            <td>{{ court.full_name }}</td>
-                            <td>
-                                <a href="/?q=&court_{{ court.pk }}=on&order_by=score+desc">
-                                    {{ court.pk }}
-                                </a>
-                            </td>
-                            <td>{{ court.count }}</td>
-                        </tr>
-                    {% endfor %}
-                    </tbody>
-                </table>
-            </div>
-        </div>
-        <div class="span-9 last sidebar-section">
-            <h3 id="copyright"><span>Copyright</span></h3>
-
-            <p>
-                Our bulk data files are free of known copyright restrictions.<br/>
-                <a rel="license" href="http://creativecommons.org/publicdomain/mark/1.0/">
-                    <img src="https://i.creativecommons.org/p/mark/1.0/88x31.png" alt="Public Domain Mark" height="31"
-                         width="88"/>
-                </a>
-            </p>
-        </div>
-    </div>
-{% endblock %}
diff --git a/alert/audio/urls.py b/alert/audio/urls.py
index 22427d96af..c2a267df4a 100644
--- a/alert/audio/urls.py
+++ b/alert/audio/urls.py
@@ -1,5 +1,3 @@
-from alert.audio.feeds import AllJurisdictionsPodcast, JurisdictionPodcast, \
-    SearchPodcast
 from alert.audio.views import view_audio_file
 from alert.audio.sitemap import oral_argument_sitemap_maker
 from alert.urls import pacer_codes
@@ -11,9 +9,9 @@
 
     # Podcasts
     (r'^podcast/court/(?P<court>' + '|'.join(pacer_codes) + ')/$',
-     JurisdictionPodcast()),
-    (r'^podcast/court/all/$', AllJurisdictionsPodcast()),
-    (r'^podcast/(search)/', SearchPodcast()),
+     'JurisdictionPodcast()'),
+    (r'^podcast/court/all/$', 'AllJurisdictionsPodcast()'),
+    (r'^podcast/(search)/', 'SearchPodcast()'),
 
     # Sitemap
     (r'^sitemap-oral-arguments\.xml', oral_argument_sitemap_maker),
diff --git a/alert/donate/management/commands/cl_send_donation_reminders.py b/alert/donate/management/commands/cl_send_donation_reminders.py
index cc248271a0..53797c3d53 100644
--- a/alert/donate/management/commands/cl_send_donation_reminders.py
+++ b/alert/donate/management/commands/cl_send_donation_reminders.py
@@ -2,12 +2,10 @@
 from django.core.management.base import BaseCommand
 from django.db.models import Sum
 from django.template import loader, Context
-from optparse import make_option
 from django.utils.timezone import now
 from alert.search.models import Document, Court
 from alert.stats import Stat
 from alert.userHandling.models import UserProfile
-from datetime import date
 from datetime import timedelta
 
 
diff --git a/alert/dump_all_cases.py b/alert/dump_all_cases.py
deleted file mode 100644
index 03fa3a649d..0000000000
--- a/alert/dump_all_cases.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import os
-import sys
-
-execfile('/etc/courtlistener')
-sys.path.append(INSTALL_ROOT)
-os.environ.setdefault("DJANGO_SETTINGS_MODULE", "settings")
-
-from alert.lib.dump_lib import dump_it_all
-
-
-def main():
-    """
-    A simple function that dumps all cases to a single dump file.
-    """
-    dump_it_all()
-
-    exit(0)
-
-if __name__ == '__main__':
-    main()
diff --git a/alert/lib/dump_lib.py b/alert/lib/dump_lib.py
deleted file mode 100644
index c60ed2c672..0000000000
--- a/alert/lib/dump_lib.py
+++ /dev/null
@@ -1,264 +0,0 @@
-import calendar
-import gzip
-import time
-import os
-
-from datetime import datetime, date
-from django.utils.timezone import utc, now
-from django.conf import settings
-from lxml import etree
-from alert.lib.db_tools import queryset_generator_by_date
-from alert.search.models import Document
-
-
-class myGzipFile(gzip.GzipFile):
-    """Backports Python 2.7 functionality into 2.6.
-
-    In order to use the 'with syntax' below, I need to subclass the gzip
-    library here. Once all of the machines are running Python 2.7, this class
-    can be removed, and the 'with' code below can simply reference the gzip
-    class rather than this one.
-
-    This line of code worked in 2.7:
-    with gzip.open(filename, mode='wb') as z_file:
-    """
-    def __enter__(self):
-        if self.fileobj is None:
-            raise ValueError("I/O operation on closed GzipFile object")
-        return self
-
-    def __exit__(self, *args):
-        self.close()
-
-
-def make_dump_file(docs_to_dump, path_from_root, filename):
-    # This var is needed to clear out null characters and control characters
-    # (skipping newlines)
-    null_map = dict.fromkeys(range(0, 10) + range(11, 13) + range(14, 32))
-
-    temp_dir = str(time.time())
-
-    try:
-        os.makedirs(os.path.join(path_from_root, temp_dir))
-    except OSError:
-        # Path exists.
-        pass
-
-    with myGzipFile(os.path.join(path_from_root, temp_dir, filename),
-                    mode='wb') as z_file:
-
-        z_file.write('<?xml version="1.0" encoding="utf-8"?>\n' +
-                     '<opinions dumpdate="' + str(date.today()) + '">\n')
-
-        for doc in docs_to_dump:
-            row = etree.Element("opinion")
-            try:
-                # These are required by the DB, and thus are safe
-                # without the try/except blocks
-                row.set('id', str(doc.pk))
-                row.set('path', doc.get_absolute_url())
-                row.set('sha1', doc.sha1)
-                row.set('court', doc.docket.court.full_name)
-                try:
-                    row.set('download_url', doc.download_url)
-                except:
-                    pass
-                row.set('time_retrieved', str(doc.time_retrieved))
-                # All are wrapped in try/except b/c the value might not be found.
-                try:
-                    row.set('date_filed', str(doc.date_filed))
-                except:
-                    pass
-                try:
-                    row.set('precedential_status', doc.precedential_status)
-                except:
-                    pass
-                try:
-                    row.set('local_path', str(doc.local_path))
-                except:
-                    pass
-                try:
-                    row.set('docket_number', doc.citation.docket_number)
-                except:
-                    pass
-                try:
-                    row.set('federal_cite_one', doc.citation.federal_cite_one)
-                except:
-                    pass
-                try:
-                    row.set('federal_cite_two', doc.citation.federal_cite_two)
-                except:
-                    pass
-                try:
-                    row.set('federal_cite_three', doc.citation.federal_cite_three)
-                except:
-                    pass
-                try:
-                    row.set('state_cite_one', doc.citation.state_cite_one)
-                except:
-                    pass
-                try:
-                    row.set('state_cite_two', doc.citation.state_cite_two)
-                except:
-                    pass
-                try:
-                    row.set('state_cite_three', doc.citation.state_cite_three)
-                except:
-                    pass
-                try:
-                    row.set('state_cite_regional', doc.citation.state_cite_regional)
-                except:
-                    pass
-                try:
-                    row.set('specialty_cite_one', doc.citation.specialty_cite_one)
-                except:
-                    pass
-                try:
-                    row.set('scotus_early_cite', doc.citation.scotus_early_cite)
-                except:
-                    pass
-                try:
-                    row.set('lexis_cite', doc.citation.lexis_cite)
-                except:
-                    pass
-                try:
-                    row.set('westlaw_cite', doc.citation.westlaw_cite)
-                except:
-                    pass
-                try:
-                    row.set('neutral_cite', doc.citation.neutral_cite)
-                except:
-                    pass
-                try:
-                    row.set('case_name', doc.citation.case_name)
-                except:
-                    pass
-                try:
-                    row.set('judges', doc.judges)
-                except:
-                    pass
-                try:
-                    row.set('nature_of_suit', doc.nature_of_suit)
-                except:
-                    pass
-                try:
-                    row.set('source', doc.get_source_display())
-                except:
-                    pass
-                try:
-                    row.set('blocked', str(doc.blocked))
-                except:
-                    pass
-                try:
-                    row.set('date_blocked', str(doc.date_blocked))
-                except:
-                    pass
-                try:
-                    row.set('extracted_by_ocr', str(doc.extracted_by_ocr))
-                except:
-                    pass
-
-                ids = ','.join([str(pk) for pk in doc.citation.citing_opinions.all().values_list('pk', flat=True)])
-                if len(ids) > 0:
-                    row.set('cited_by', ids)
-
-                # Gather the doc text
-                if doc.html_with_citations:
-                    row.text = doc.html_with_citations.translate(null_map)
-                elif doc.html_lawbox:
-                    row.text = doc.html_lawbox
-                elif doc.html:
-                    row.text = doc.html
-                else:
-                    row.text = doc.plain_text.translate(null_map)
-            except ValueError:
-                # Null byte found. Punt.
-                continue
-
-            z_file.write('  %s\n' % etree.tostring(row).encode('utf-8'))
-
-        # Close things off
-        z_file.write('</opinions>')
-
-    # Delete the old archive, then replace it with the new one. Deleting
-    # shouldn't necessary according to the Python documentation, but in testing
-    # I'm not seeing file clobbering happen.
-    try:
-        os.remove(os.path.join(path_from_root, filename))
-    except OSError:
-        # The file doesn't exist yet. This should only really be triggered by
-        # the all_cases dumper. The others shouldn't get this far.
-        pass
-
-    # Move the new file to the correct location
-    os.rename(os.path.join(path_from_root, temp_dir, filename),
-              os.path.join(path_from_root, filename) + '.gz')
-
-    # Remove the directory, but only if it's empty.
-    os.rmdir(os.path.join(path_from_root, temp_dir))
-
-    return os.path.join(path_from_root, filename)
-
-
-def dump_it_all():
-    start_date = datetime(1754, 9, 1, tzinfo=utc)  # First American case
-    end_date = now()
-    # Get the documents from the database.
-    qs = Document.objects.all()
-    docs_to_dump = queryset_generator_by_date(
-        qs,
-        'date_filed',
-        start_date,
-        end_date
-    )
-
-    path_from_root = settings.DUMP_DIR
-    filename = 'all.xml'
-    make_dump_file(docs_to_dump, path_from_root, filename)
-
-
-def get_date_range(year, month, day):
-    """ Create a date range to be queried.
-
-    Given a year and optionally a month or day, return a date range. If only a
-    year is given, return start date of January 1, and end date of December
-    31st. Do similarly if a year and month are supplied or if all three values
-    are provided.
-    """
-    # Sort out the start dates
-    if month is None:
-        start_month = 1
-    else:
-        start_month = int(month)
-    if day is None:
-        start_day = 1
-    else:
-        start_day = int(day)
-
-    start_year = int(year)
-    start_date = date(start_year, start_month, start_day)
-
-    annual = False
-    monthly = False
-    daily = False
-    # Sort out the end dates
-    if day is None and month is None:
-        # it's an annual query
-        annual = True
-        end_month = 12
-        end_day = 31
-    elif day is None:
-        # it's a month query
-        monthly = True
-        end_month = int(month)
-        end_day = calendar.monthrange(int(year), end_month)[1]
-    else:
-        # all three values provided!
-        daily = True
-        end_month = int(month)
-        end_day = int(day)
-
-    end_year = int(year)
-    end_date = date(end_year, end_month, end_day)
-
-    return start_date, end_date, annual, monthly, daily
diff --git a/alert/lib/search_utils.py b/alert/lib/search_utils.py
index 5f295d1039..c7b90531a7 100644
--- a/alert/lib/search_utils.py
+++ b/alert/lib/search_utils.py
@@ -77,8 +77,8 @@ def make_stats_variable(solr_facet_values, search_form):
     return facets
 
 
-def merge_form_with_courts(COURTS, search_form):
-    """Merges the COURTS dict with the values from the search form.
+def merge_form_with_courts(courts, search_form):
+    """Merges the courts dict with the values from the search form.
 
     Final value is like (note that order is significant):
     courts = {
@@ -122,10 +122,10 @@ def merge_form_with_courts(COURTS, search_form):
 
     for field in search_form:
         if no_facets_selected:
-            for court in COURTS:
+            for court in courts:
                 court['checked'] = True
         else:
-            for court in COURTS:
+            for court in courts:
                 # We're merging two lists, so we have to do a nested loop
                 # to find the right value.
                 if 'court_%s' % court['pk'] == field.html_name:
@@ -143,7 +143,7 @@ def merge_form_with_courts(COURTS, search_form):
     b_bundle = []
     state_bundle = []
     state_bundles = []
-    for court in COURTS:
+    for court in courts:
         if court['jurisdiction'] == 'F':
             court['tab'] = 'federal'
         elif court['jurisdiction'] == 'FD':
diff --git a/alert/lib/timer.py b/alert/lib/timer.py
index 09e34cbc1d..c87e8edd57 100644
--- a/alert/lib/timer.py
+++ b/alert/lib/timer.py
@@ -1,19 +1,3 @@
-# This software and any associated files are copyright 2010 Brian Carver and
-# Michael Lissner.
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-
 import time
 
 def print_timing(func):
@@ -24,6 +8,6 @@ def wrapper(*arg):
         t1 = time.time()
         res = func(*arg)
         t2 = time.time()
-        print 'Completed in %0.1f seconds' % ((t2 - t1))
+        print 'Completed in %0.1f seconds.' % ((t2 - t1))
         return res
     return wrapper
diff --git a/alert/scrapers/tests.py b/alert/scrapers/tests.py
index 19a5a409e8..0af4d724ad 100644
--- a/alert/scrapers/tests.py
+++ b/alert/scrapers/tests.py
@@ -4,17 +4,22 @@
 import time
 from django.utils.timezone import now
 from alert.audio.models import Audio
-from alert.lib.solr_core_admin import create_solr_core, delete_solr_core, swap_solr_core
+from alert.lib.solr_core_admin import create_solr_core, delete_solr_core, \
+    swap_solr_core
 from alert.lib.string_utils import trunc
 from alert.lib import sunburnt
 from alert.scrapers.DupChecker import DupChecker
 from alert.scrapers.models import urlToHash, ErrorLog
 from alert.scrapers.management.commands.cl_scrape_opinions import get_extension
-from alert.scrapers.management.commands.cl_scrape_opinions import Command as OpinionCommand
-from alert.scrapers.management.commands.cl_scrape_oral_arguments import Command as OralArgCommand
-from alert.scrapers.management.commands.cl_report_scrape_status import calculate_counts, tally_errors
+from alert.scrapers.management.commands.cl_scrape_opinions import \
+    Command as OpinionCommand
+from alert.scrapers.management.commands.cl_scrape_oral_arguments import \
+    Command as OralArgCommand
+from alert.scrapers.management.commands.cl_report_scrape_status import \
+    calculate_counts, tally_errors
 from alert.scrapers.tasks import extract_from_txt
-from alert.scrapers.test_assets import test_opinion_scraper, test_oral_arg_scraper
+from alert.scrapers.test_assets import test_opinion_scraper, \
+    test_oral_arg_scraper
 from alert.search.models import Citation, Court, Document, Docket
 from alert import settings
 from celery.task.sets import subtask
@@ -71,7 +76,8 @@ def test_parsing_xml_oral_arg_site_to_site_object(self):
         self.assertEqual(len(site.case_names), 2)
 
     def test_content_extraction(self):
-        """Do all of the supported mimetypes get extracted to text successfully, including OCR?"""
+        """Do all of the supported mimetypes get extracted to text
+        successfully, including OCR?"""
         site = test_opinion_scraper.Site().parse()
 
         test_strings = ['supreme',
@@ -81,7 +87,8 @@ def test_content_extraction(self):
                         'indiana',
                         'fidelity']
         for i in range(0, len(site.case_names)):
-            path = os.path.join(settings.INSTALL_ROOT, 'alert', site.download_urls[i])
+            path = os.path.join(settings.INSTALL_ROOT, 'alert',
+                                site.download_urls[i])
             with open(path) as f:
                 content = f.read()
                 cf = ContentFile(content)
diff --git a/alert/search/api.py b/alert/search/api.py
index 4a6d1633ec..8dc77ea578 100644
--- a/alert/search/api.py
+++ b/alert/search/api.py
@@ -6,14 +6,15 @@
 from alert.lib.search_utils import build_main_query
 from alert.lib.string_utils import filter_invalid_XML_chars
 from alert.lib.sunburnt import sunburnt, SolrError
-from alert.search.forms import SearchForm
-from alert.search.models import Citation, Court, Document, SOURCES, DOCUMENT_STATUSES
+from alert.search import forms
+from alert.search.models import Citation, Court, Document, SOURCES, \
+    DOCUMENT_STATUSES
 from alert.stats import tally_stat
 
 from django.core.cache import cache
 from lxml import etree
 from tastypie import fields, http
-from tastypie.authentication import BasicAuthentication, SessionAuthentication, MultiAuthentication
+from tastypie import authentication
 from tastypie.constants import ALL
 from tastypie.exceptions import BadRequest
 from tastypie.resources import ModelResource
@@ -27,14 +28,16 @@
 numerical_filters = ('exact', 'gte', 'gt', 'lte', 'lt', 'range',)
 
 
-class BasicAuthenticationWithUser(BasicAuthentication):
-    """Wraps the BasicAuthentication class, changing the get_identifier method to provide the username instead of
-    essentially nothing.
+class BasicAuthenticationWithUser(authentication.BasicAuthentication):
+    """Wraps the BasicAuthentication class, changing the get_identifier method
+    to provide the username instead of essentially nothing.
 
     Proposed this change in: https://github.com/toastdriven/django-tastypie/pull/1085/commits
     """
+
     def __init__(self, backend=None, realm='django-tastypie', **kwargs):
-        super(BasicAuthenticationWithUser, self).__init__(backend, realm, **kwargs)
+        super(BasicAuthenticationWithUser, self).__init__(backend, realm,
+                                                          **kwargs)
 
     def get_identifier(self, request):
         return request.META.get('REMOTE_USER', request.user.username)
@@ -50,7 +53,8 @@ def _handle_500(self, request, exception):
         if isinstance(exception, SolrError):
             solr_status_code = exception[0]['status']
             error_xml = etree.fromstring(exception[1])
-            solr_msg = error_xml.xpath('//lst[@name = "error"]/str[@name = "msg"]/text()')[0]
+            solr_msg = error_xml.xpath(
+                '//lst[@name = "error"]/str[@name = "msg"]/text()')[0]
             data = {
                 'error_message': "SolrError raised while interpreting your query.",
                 'solr_status_code': solr_status_code,
@@ -62,7 +66,8 @@ def _handle_500(self, request, exception):
                 response_class=http.HttpApplicationError
             )
         else:
-            return super(ModelResourceWithFieldsFilter, self)._handle_500(request, exception)
+            return super(ModelResourceWithFieldsFilter, self)._handle_500(
+                request, exception)
 
     def alter_list_data_to_serialize(self, request, data):
         # Add a request_uri field
@@ -71,7 +76,8 @@ def alter_list_data_to_serialize(self, request, data):
         return data
 
     def full_dehydrate(self, bundle, *args, **kwargs):
-        bundle = super(ModelResourceWithFieldsFilter, self).full_dehydrate(bundle, *args, **kwargs)
+        bundle = super(ModelResourceWithFieldsFilter, self).full_dehydrate(
+            bundle, *args, **kwargs)
         # bundle.obj[0]._data['citeCount'] = 0
         fields = bundle.request.GET.get("fields", "")
         if fields:
@@ -92,7 +98,8 @@ def dehydrate(self, bundle):
     def dispatch(self, request_type, request, **kwargs):
         """Simple override here to tally stats before sending off the results."""
         tally_stat(self.tally_name)
-        return super(ModelResourceWithFieldsFilter, self).dispatch(request_type, request, **kwargs)
+        return super(ModelResourceWithFieldsFilter, self).dispatch(
+            request_type, request, **kwargs)
 
 
 class PerUserCacheThrottle(CacheThrottle):
@@ -123,10 +130,12 @@ def should_be_throttled(self, identifier, **kwargs):
 
         # Weed out anything older than the timeframe.
         minimum_time = int(time.time()) - int(self.timeframe)
-        times_accessed = [access for access in cache.get(key) if access >= minimum_time]
+        times_accessed = [access for access in cache.get(key) if
+                          access >= minimum_time]
         cache.set(key, times_accessed, self.expiration)
 
-        throttle_at = self.custom_throttles.get(identifier, int(self.throttle_at))
+        throttle_at = self.custom_throttles.get(identifier,
+                                                int(self.throttle_at))
         if len(times_accessed) >= throttle_at:
             # Throttle them.
             return True
@@ -142,8 +151,9 @@ class CourtResource(ModelResourceWithFieldsFilter):
     )
 
     class Meta:
-        authentication = MultiAuthentication(BasicAuthenticationWithUser(realm="courtlistener.com"),
-                                             SessionAuthentication())
+        authentication = authentication.MultiAuthentication(
+            BasicAuthenticationWithUser(realm="courtlistener.com"),
+            authentication.SessionAuthentication())
         throttle = PerUserCacheThrottle(throttle_at=1000)
         resource_name = 'jurisdiction'
         queryset = Court.objects.exclude(jurisdiction='T')
@@ -162,16 +172,19 @@ class Meta:
             'end_date': good_date_filters,
             'jurisdictions': ALL,
         }
-        ordering = ['date_modified', 'start_date', 'end_date', 'position', 'jurisdiction']
+        ordering = ['date_modified', 'start_date', 'end_date', 'position',
+                    'jurisdiction']
         excludes = ['has_opinion_scraper', 'has_oral_argument_scraper']
 
 
 class CitationResource(ModelResourceWithFieldsFilter):
-    opinion_uris = fields.ToManyField('search.api.DocumentResource', 'parent_documents')
+    opinion_uris = fields.ToManyField('search.api.DocumentResource',
+                                      'parent_documents')
 
     class Meta:
-        authentication = MultiAuthentication(BasicAuthenticationWithUser(realm="courtlistener.com"),
-                                             SessionAuthentication())
+        authentication = authentication.MultiAuthentication(
+            BasicAuthenticationWithUser(realm="courtlistener.com"),
+            authentication.SessionAuthentication())
         throttle = PerUserCacheThrottle(throttle_at=1000)
         queryset = Citation.objects.all()
         max_limit = 20
@@ -220,15 +233,17 @@ class DocumentResource(ModelResourceWithFieldsFilter):
     )
 
     class Meta:
-        authentication = MultiAuthentication(BasicAuthenticationWithUser(realm="courtlistener.com"),
-                                             SessionAuthentication())
+        authentication = authentication.MultiAuthentication(
+            BasicAuthenticationWithUser(realm="courtlistener.com"),
+            authentication.SessionAuthentication())
         throttle = PerUserCacheThrottle(throttle_at=1000)
         resource_name = 'opinion'
-        queryset = Document.objects.all().select_related('docket__court__pk', 'citation')
+        queryset = Document.objects.all().select_related('docket__court__pk',
+                                                         'citation')
         max_limit = 20
         allowed_methods = ['get']
         include_absolute_url = True
-        excludes = ['is_stub_document', 'cases_cited',]
+        excludes = ['is_stub_document', 'cases_cited', ]
         filtering = {
             'id': ('exact',),
             'time_retrieved': good_time_filters,
@@ -243,7 +258,8 @@ class Meta:
             'blocked': ALL,
             'extracted_by_ocr': ALL,
         }
-        ordering = ['time_retrieved', 'date_modified', 'date_filed', 'date_blocked']
+        ordering = ['time_retrieved', 'date_modified', 'date_filed',
+                    'date_blocked']
 
 
 class CitedByResource(ModelResourceWithFieldsFilter):
@@ -264,12 +280,15 @@ class CitedByResource(ModelResourceWithFieldsFilter):
     )
 
     class Meta:
-        authentication = MultiAuthentication(BasicAuthenticationWithUser(realm="courtlistener.com"),
-                                             SessionAuthentication())
+        authentication = authentication.MultiAuthentication(
+            BasicAuthenticationWithUser(realm="courtlistener.com"),
+            authentication.SessionAuthentication())
         throttle = PerUserCacheThrottle(throttle_at=1000)
         resource_name = 'cited-by'
         queryset = Document.objects.all()
-        excludes = ('is_stub_document', 'html', 'html_lawbox', 'html_with_citations', 'plain_text',)
+        excludes = (
+            'is_stub_document', 'html', 'html_lawbox', 'html_with_citations',
+            'plain_text',)
         include_absolute_url = True
         max_limit = 20
         list_allowed_methods = ['get']
@@ -281,7 +300,8 @@ class Meta:
     def get_object_list(self, request):
         id = request.GET.get('id')
         if id:
-            return super(CitedByResource, self).get_object_list(request).filter(
+            return \
+                super(CitedByResource, self).get_object_list(request).filter(
                 pk=id)[0].citation.citing_opinions.all()
         else:
             # No ID field --> no results.
@@ -296,7 +316,8 @@ def apply_filters(self, request, applicable_filters):
         """
         return self.get_object_list(request)
 
-    def get_resource_uri(self, bundle_or_obj=None, url_name='api_dispatch_list'):
+    def get_resource_uri(self, bundle_or_obj=None,
+                         url_name='api_dispatch_list'):
         """Creates a URI like /api/v1/search/$id/
         """
         url_str = '/api/rest/%s/%s/%s/'
@@ -328,12 +349,15 @@ class CitesResource(ModelResourceWithFieldsFilter):
     )
 
     class Meta:
-        authentication = MultiAuthentication(BasicAuthenticationWithUser(realm="courtlistener.com"),
-                                             SessionAuthentication())
+        authentication = authentication.MultiAuthentication(
+            BasicAuthenticationWithUser(realm="courtlistener.com"),
+            authentication.SessionAuthentication())
         throttle = PerUserCacheThrottle(throttle_at=1000)
         resource_name = 'cites'
         queryset = Document.objects.all()
-        excludes = ('is_stub_document', 'html', 'html_lawbox', 'html_with_citations', 'plain_text',)
+        excludes = (
+            'is_stub_document', 'html', 'html_lawbox', 'html_with_citations',
+            'plain_text',)
         include_absolute_url = True
         max_limit = 20
         list_allowed_methods = ['get']
@@ -346,7 +370,8 @@ def get_object_list(self, request):
         """Get the citation associated with the document ID, then get all the items that it is cited by."""
         id = request.GET.get('id')
         if id:
-            cases_cited = super(CitesResource, self).get_object_list(request).filter(
+            cases_cited = \
+                super(CitesResource, self).get_object_list(request).filter(
                 pk=id)[0].cases_cited.all()
             docs = Document.objects.filter(citation__in=cases_cited)
             return docs
@@ -363,7 +388,8 @@ def apply_filters(self, request, applicable_filters):
         """
         return self.get_object_list(request)
 
-    def get_resource_uri(self, bundle_or_obj=None, url_name='api_dispatch_list'):
+    def get_resource_uri(self, bundle_or_obj=None,
+                         url_name='api_dispatch_list'):
         """Creates a URI like /api/v1/search/$id/
         """
         url_str = '/api/rest/%s/%s/%s/'
@@ -379,6 +405,7 @@ def get_resource_uri(self, bundle_or_obj=None, url_name='api_dispatch_list'):
 
 class SolrList(object):
     """This implements a yielding list object that fetches items as they are queried."""
+
     def __init__(self, main_query, offset, limit, length=None):
         super(SolrList, self).__init__()
         self.main_query = main_query
@@ -415,7 +442,8 @@ def __getitem__(self, item):
 
         # Pull the text snippet up a level, where tastypie can find it
         for result in results_si.result.docs:
-            result['snippet'] = '&hellip;'.join(result['solr_highlights']['text'])
+            result['snippet'] = '&hellip;'.join(
+                result['solr_highlights']['text'])
 
         # Return the results as objects, not dicts.
         for result in results_si.result.docs:
@@ -522,8 +550,9 @@ class SearchResource(ModelResourceWithFieldsFilter):
     )
     source = fields.CharField(
         attribute='source',
-        help_text='the source of the document, one of: %s' % ', '.join(['%s (%s)' % (t[0], t[1]) for t in
-                                                                        SOURCES]),
+        help_text='the source of the document, one of: %s' % ', '.join(
+            ['%s (%s)' % (t[0], t[1]) for t in
+             SOURCES]),
         null=True,
     )
     snippet = fields.CharField(
@@ -533,8 +562,9 @@ class SearchResource(ModelResourceWithFieldsFilter):
     )
     status = fields.CharField(
         attribute='status',
-        help_text='The precedential status of document, one of: %s' % ', '.join([('stat_%s' % t[1]).replace(' ', '+')
-                                                                                 for t in DOCUMENT_STATUSES]),
+        help_text='The precedential status of document, one of: %s' % ', '.join(
+            [('stat_%s' % t[1]).replace(' ', '+')
+             for t in DOCUMENT_STATUSES]),
         null=True,
     )
     suit_nature = fields.CharField(
@@ -553,8 +583,9 @@ class SearchResource(ModelResourceWithFieldsFilter):
     )
 
     class Meta:
-        authentication = MultiAuthentication(BasicAuthenticationWithUser(realm="courtlistener.com"),
-                                             SessionAuthentication())
+        authentication = authentication.MultiAuthentication(
+            BasicAuthenticationWithUser(realm="courtlistener.com"),
+            authentication.SessionAuthentication())
         throttle = PerUserCacheThrottle(throttle_at=1000)
         resource_name = 'search'
         max_limit = 20
@@ -581,7 +612,8 @@ class Meta:
             'score+desc',
         ]
 
-    def get_resource_uri(self, bundle_or_obj=None, url_name='api_dispatch_list'):
+    def get_resource_uri(self, bundle_or_obj=None,
+                         url_name='api_dispatch_list'):
         """Creates a URI like /api/v1/search/$id/
         """
         url_str = '/api/rest/%s/%s/%s/'
@@ -603,7 +635,7 @@ def get_object_list(self, request=None, **kwargs):
                 highlight='text'
             )
         except KeyError:
-            sf = SearchForm({'q': "*:*"})
+            sf = forms.SearchForm({'q': "*:*"})
             if sf.is_valid():
                 main_query = build_main_query(
                     sf.cleaned_data,
@@ -621,23 +653,25 @@ def get_object_list(self, request=None, **kwargs):
         return sl
 
     def obj_get_list(self, bundle, **kwargs):
-        search_form = SearchForm(bundle.request.GET)
+        search_form = forms.SearchForm(bundle.request.GET)
         if search_form.is_valid():
             cd = search_form.cleaned_data
             if cd['q'] == '':
                 cd['q'] = '*:*'  # Get everything.
             return self.get_object_list(bundle.request, cd=cd)
         else:
-            BadRequest("Invalid resource lookup data provided. Unable to complete your query.")
+            BadRequest(
+                "Invalid resource lookup data provided. Unable to complete your query.")
 
     def obj_get(self, bundle, **kwargs):
-        search_form = SearchForm(bundle.request.GET)
+        search_form = forms.SearchForm(bundle.request.GET)
         if search_form.is_valid():
             cd = search_form.cleaned_data
             cd['q'] = 'id:%s' % kwargs['pk']
             return self.get_object_list(bundle.request, cd=cd)[0]
         else:
-            BadRequest("Invalid resource lookup data provided. Unable to complete your request.")
+            BadRequest(
+                "Invalid resource lookup data provided. Unable to complete your request.")
 
     def apply_sorting(self, obj_list, options=None):
         """Since we're not using Django Model sorting, we just want to use our own, which is already
diff --git a/alert/search/api2.py b/alert/search/api2.py
index dad9b3f568..b17e11133c 100644
--- a/alert/search/api2.py
+++ b/alert/search/api2.py
@@ -6,16 +6,18 @@
 from alert.lib.search_utils import build_main_query
 from alert.lib.string_utils import filter_invalid_XML_chars
 from alert.lib.sunburnt import sunburnt, SolrError
-from alert.search.forms import SearchForm
-from alert.search.models import Citation, Court, Document, SOURCES, DOCUMENT_STATUSES
+from alert.search import forms
+from alert.search.models import Citation, Court, Docket, Document, \
+    SOURCES, DOCUMENT_STATUSES
+
 from alert.stats import tally_stat
 
 from django.core.cache import cache
 from lxml import etree
 from tastypie import fields, http
-from tastypie.authentication import BasicAuthentication, SessionAuthentication, MultiAuthentication
+from tastypie import authentication
 from tastypie.constants import ALL
-from tastypie.exceptions import BadRequest, TastypieError
+from tastypie.exceptions import BadRequest
 from tastypie.resources import ModelResource
 from tastypie.throttle import CacheThrottle
 
@@ -27,14 +29,16 @@
 numerical_filters = ('exact', 'gte', 'gt', 'lte', 'lt', 'range',)
 
 
-class BasicAuthenticationWithUser(BasicAuthentication):
-    """Wraps the BasicAuthentication class, changing the get_identifier method to provide the username instead of
-    essentially nothing.
+class BasicAuthenticationWithUser(authentication.BasicAuthentication):
+    """Wraps the BasicAuthentication class, changing the get_identifier method
+    to provide the username instead of essentially nothing.
 
     Proposed this change in: https://github.com/toastdriven/django-tastypie/pull/1085/commits
     """
+
     def __init__(self, backend=None, realm='django-tastypie', **kwargs):
-        super(BasicAuthenticationWithUser, self).__init__(backend, realm, **kwargs)
+        super(BasicAuthenticationWithUser, self).__init__(backend, realm,
+                                                          **kwargs)
 
     def get_identifier(self, request):
         return request.META.get('REMOTE_USER', request.user.username)
@@ -50,9 +54,11 @@ def _handle_500(self, request, exception):
         if isinstance(exception, SolrError):
             solr_status_code = exception[0]['status']
             error_xml = etree.fromstring(exception[1])
-            solr_msg = error_xml.xpath('//lst[@name = "error"]/str[@name = "msg"]/text()')[0]
+            solr_msg = error_xml.xpath(
+                '//lst[@name = "error"]/str[@name = "msg"]/text()')[0]
             data = {
-                'error_message': "SolrError raised while interpreting your query.",
+                'error_message': "SolrError raised while interpreting your "
+                                 "query.",
                 'solr_status_code': solr_status_code,
                 'solr_msg': solr_msg,
             }
@@ -62,14 +68,16 @@ def _handle_500(self, request, exception):
                 response_class=http.HttpApplicationError
             )
         else:
-            return super(ModelResourceWithFieldsFilter, self)._handle_500(request, exception)
+            return super(ModelResourceWithFieldsFilter, self)._handle_500(
+                request, exception)
 
     def alter_list_data_to_serialize(self, request, data):
         data['meta']['request_uri'] = request.get_full_path()
         return data
 
     def full_dehydrate(self, bundle, *args, **kwargs):
-        bundle = super(ModelResourceWithFieldsFilter, self).full_dehydrate(bundle, *args, **kwargs)
+        bundle = super(ModelResourceWithFieldsFilter, self).full_dehydrate(
+            bundle, *args, **kwargs)
         # bundle.obj[0]._data['citeCount'] = 0
         fields = bundle.request.GET.get("fields", "")
         if fields:
@@ -90,7 +98,8 @@ def dehydrate(self, bundle):
     def dispatch(self, request_type, request, **kwargs):
         """Simple override here to tally stats before sending off the results."""
         tally_stat(self.tally_name)
-        return super(ModelResourceWithFieldsFilter, self).dispatch(request_type, request, **kwargs)
+        return super(ModelResourceWithFieldsFilter, self).dispatch(
+            request_type, request, **kwargs)
 
 
 class PerUserCacheThrottle(CacheThrottle):
@@ -121,10 +130,12 @@ def should_be_throttled(self, identifier, **kwargs):
 
         # Weed out anything older than the timeframe.
         minimum_time = int(time.time()) - int(self.timeframe)
-        times_accessed = [access for access in cache.get(key) if access >= minimum_time]
+        times_accessed = [access for access in cache.get(key) if
+                          access >= minimum_time]
         cache.set(key, times_accessed, self.expiration)
 
-        throttle_at = self.custom_throttles.get(identifier, int(self.throttle_at))
+        throttle_at = self.custom_throttles.get(identifier,
+                                                int(self.throttle_at))
         if len(times_accessed) >= throttle_at:
             # Throttle them.
             return True
@@ -135,8 +146,9 @@ def should_be_throttled(self, identifier, **kwargs):
 
 class CourtResource(ModelResourceWithFieldsFilter):
     class Meta:
-        authentication = MultiAuthentication(BasicAuthenticationWithUser(realm="courtlistener.com"),
-                                             SessionAuthentication())
+        authentication = authentication.MultiAuthentication(
+            BasicAuthenticationWithUser(realm="courtlistener.com"),
+            authentication.SessionAuthentication())
         throttle = PerUserCacheThrottle(throttle_at=1000)
         resource_name = 'jurisdiction'
         queryset = Court.objects.exclude(jurisdiction='T')
@@ -156,19 +168,53 @@ class Meta:
             'end_date': good_date_filters,
             'jurisdictions': ALL,
         }
-        ordering = ['date_modified', 'start_date', 'end_date', 'position', 'jurisdiction']
+        ordering = ['date_modified', 'start_date', 'end_date', 'position',
+                    'jurisdiction']
+
+
+class DocketResource(ModelResourceWithFieldsFilter):
+    court = fields.ForeignKey(
+        CourtResource,
+        'court'
+    )
+
+    class Meta:
+        authentication = authentication.MultiAuthentication(
+            BasicAuthenticationWithUser(realm="courtlistener.com"),
+            authentication.SessionAuthentication())
+
+        throttle = PerUserCacheThrottle(throttle_at=1000)
+        resource_name = 'docket'
+        queryset = Docket.objects.all()
+        max_limit = 20
+        allowed_methods = ['get']
+        include_absolute_url = True
+        filtering = {
+            'id': ('exact',),
+            'date_modified': good_time_filters,
+            'court': ('exact',),
+            'date_blocked': good_date_filters,
+            'blocked': ALL,
+        }
+        ordering = ['date_modified', 'date_blocked']
 
 
 class CitationResource(ModelResourceWithFieldsFilter):
-    opinion_uris = fields.ToManyField('search.api.DocumentResource', 'parent_documents')
+    opinion_uris = fields.ToManyField('search.api.DocumentResource',
+                                      'parent_documents')
 
     class Meta:
-        authentication = MultiAuthentication(BasicAuthenticationWithUser(realm="courtlistener.com"),
-                                             SessionAuthentication())
+        authentication = authentication.MultiAuthentication(
+            BasicAuthenticationWithUser(realm="courtlistener.com"),
+            authentication.SessionAuthentication())
         throttle = PerUserCacheThrottle(throttle_at=1000)
         queryset = Citation.objects.all()
         max_limit = 20
-        excludes = ['slug', ]
+        excludes = ['slug', ]  # Why?
+
+
+class OralArgumentResource(ModelResourceWithFieldsFilter):
+    pass
 
 
 class DocumentResource(ModelResourceWithFieldsFilter):
@@ -177,9 +223,9 @@ class DocumentResource(ModelResourceWithFieldsFilter):
         'citation',
         full=True
     )
-    court = fields.ForeignKey(
-        CourtResource,
-        'court'
+    docket = fields.ForeignKey(
+        DocketResource,
+        'docket'
     )
     html = fields.CharField(
         attribute='html',
@@ -213,15 +259,16 @@ class DocumentResource(ModelResourceWithFieldsFilter):
     )
 
     class Meta:
-        authentication = MultiAuthentication(BasicAuthenticationWithUser(realm="courtlistener.com"),
-                                             SessionAuthentication())
+        authentication = authentication.MultiAuthentication(
+            BasicAuthenticationWithUser(realm="courtlistener.com"),
+            authentication.SessionAuthentication())
         throttle = PerUserCacheThrottle(throttle_at=1000)
         resource_name = 'opinion'
-        queryset = Document.objects.all().select_related('docket__court__pk', 'citation')
+        queryset = Document.objects.all().select_related('docket', 'citation')
         max_limit = 20
         allowed_methods = ['get']
         include_absolute_url = True
-        excludes = ['is_stub_document', 'cases_cited',]
+        excludes = ['is_stub_document', 'cases_cited']
         filtering = {
             'id': ('exact',),
             'time_retrieved': good_time_filters,
@@ -236,7 +283,8 @@ class Meta:
             'blocked': ALL,
             'extracted_by_ocr': ALL,
         }
-        ordering = ['time_retrieved', 'date_modified', 'date_filed', 'date_blocked']
+        ordering = ['time_retrieved', 'date_modified', 'date_filed',
+                    'date_blocked']
 
 
 class CitedByResource(ModelResourceWithFieldsFilter):
@@ -257,12 +305,15 @@ class CitedByResource(ModelResourceWithFieldsFilter):
     )
 
     class Meta:
-        authentication = MultiAuthentication(BasicAuthenticationWithUser(realm="courtlistener.com"),
-                                             SessionAuthentication())
+        authentication = authentication.MultiAuthentication(
+            BasicAuthenticationWithUser(realm="courtlistener.com"),
+            authentication.SessionAuthentication())
         throttle = PerUserCacheThrottle(throttle_at=1000)
         resource_name = 'cited-by'
         queryset = Document.objects.all()
-        excludes = ('is_stub_document', 'html', 'html_lawbox', 'html_with_citations', 'plain_text',)
+        excludes = (
+            'is_stub_document', 'html', 'html_lawbox', 'html_with_citations',
+            'plain_text',)
         include_absolute_url = True
         max_limit = 20
         list_allowed_methods = ['get']
@@ -274,8 +325,9 @@ class Meta:
     def get_object_list(self, request):
         id = request.GET.get('id')
         if id:
-            return super(CitedByResource, self).get_object_list(request).filter(
-                pk=id)[0].citation.citing_opinions.all()
+            return \
+                super(CitedByResource, self).get_object_list(request).filter(
+                    pk=id)[0].citation.citing_opinions.all()
         else:
             # No ID field --> no results.
             return super(CitedByResource, self).get_object_list(request).none()
@@ -289,7 +341,8 @@ def apply_filters(self, request, applicable_filters):
         """
         return self.get_object_list(request)
 
-    def get_resource_uri(self, bundle_or_obj=None, url_name='api_dispatch_list'):
+    def get_resource_uri(self, bundle_or_obj=None,
+                         url_name='api_dispatch_list'):
         """Creates a URI like /api/v1/search/$id/
         """
         url_str = '/api/rest/%s/%s/%s/'
@@ -321,12 +374,15 @@ class CitesResource(ModelResourceWithFieldsFilter):
     )
 
     class Meta:
-        authentication = MultiAuthentication(BasicAuthenticationWithUser(realm="courtlistener.com"),
-                                             SessionAuthentication())
+        authentication = authentication.MultiAuthentication(
+            BasicAuthenticationWithUser(realm="courtlistener.com"),
+            authentication.SessionAuthentication())
         throttle = PerUserCacheThrottle(throttle_at=1000)
         resource_name = 'cites'
         queryset = Document.objects.all()
-        excludes = ('is_stub_document', 'html', 'html_lawbox', 'html_with_citations', 'plain_text',)
+        excludes = (
+            'is_stub_document', 'html', 'html_lawbox', 'html_with_citations',
+            'plain_text',)
         include_absolute_url = True
         max_limit = 20
         list_allowed_methods = ['get']
@@ -339,8 +395,9 @@ def get_object_list(self, request):
         """Get the citation associated with the document ID, then get all the items that it is cited by."""
         id = request.GET.get('id')
         if id:
-            cases_cited = super(CitesResource, self).get_object_list(request).filter(
-                pk=id)[0].cases_cited.all()
+            cases_cited = \
+                super(CitesResource, self).get_object_list(request).filter(
+                    pk=id)[0].cases_cited.all()
             docs = Document.objects.filter(citation__in=cases_cited)
             return docs
         else:
@@ -356,7 +413,8 @@ def apply_filters(self, request, applicable_filters):
         """
         return self.get_object_list(request)
 
-    def get_resource_uri(self, bundle_or_obj=None, url_name='api_dispatch_list'):
+    def get_resource_uri(self, bundle_or_obj=None,
+                         url_name='api_dispatch_list'):
         """Creates a URI like /api/v1/search/$id/
         """
         url_str = '/api/rest/%s/%s/%s/'
@@ -372,6 +430,7 @@ def get_resource_uri(self, bundle_or_obj=None, url_name='api_dispatch_list'):
 
 class SolrList(object):
     """This implements a yielding list object that fetches items as they are queried."""
+
     def __init__(self, main_query, offset, limit, length=None):
         super(SolrList, self).__init__()
         self.main_query = main_query
@@ -408,7 +467,8 @@ def __getitem__(self, item):
 
         # Pull the text snippet up a level, where tastypie can find it
         for result in results_si.result.docs:
-            result['snippet'] = '&hellip;'.join(result['solr_highlights']['text'])
+            result['snippet'] = '&hellip;'.join(
+                result['solr_highlights']['text'])
 
         # Return the results as objects, not dicts.
         for result in results_si.result.docs:
@@ -515,8 +575,9 @@ class SearchResource(ModelResourceWithFieldsFilter):
     )
     source = fields.CharField(
         attribute='source',
-        help_text='the source of the document, one of: %s' % ', '.join(['%s (%s)' % (t[0], t[1]) for t in
-                                                                        SOURCES]),
+        help_text='the source of the document, one of: %s' % ', '.join(
+            ['%s (%s)' % (t[0], t[1]) for t in
+             SOURCES]),
         null=True,
     )
     snippet = fields.CharField(
@@ -526,8 +587,9 @@ class SearchResource(ModelResourceWithFieldsFilter):
     )
     status = fields.CharField(
         attribute='status',
-        help_text='The precedential status of document, one of: %s' % ', '.join([('stat_%s' % t[1]).replace(' ', '+')
-                                                                                 for t in DOCUMENT_STATUSES]),
+        help_text='The precedential status of document, one of: %s' % ', '.join(
+            [('stat_%s' % t[1]).replace(' ', '+')
+             for t in DOCUMENT_STATUSES]),
         null=True,
     )
     suit_nature = fields.CharField(
@@ -546,8 +608,9 @@ class SearchResource(ModelResourceWithFieldsFilter):
     )
 
     class Meta:
-        authentication = MultiAuthentication(BasicAuthenticationWithUser(realm="courtlistener.com"),
-                                             SessionAuthentication())
+        authentication = authentication.MultiAuthentication(
+            BasicAuthenticationWithUser(realm="courtlistener.com"),
+            authentication.SessionAuthentication())
         throttle = PerUserCacheThrottle(throttle_at=1000)
         resource_name = 'search'
         max_limit = 20
@@ -574,7 +637,8 @@ class Meta:
             'score+desc',
         ]
 
-    def get_resource_uri(self, bundle_or_obj=None, url_name='api_dispatch_list'):
+    def get_resource_uri(self, bundle_or_obj=None,
+                         url_name='api_dispatch_list'):
         """Creates a URI like /api/v1/search/$id/
         """
         url_str = '/api/rest/%s/%s/%s/'
@@ -592,9 +656,10 @@ def get_object_list(self, request=None, **kwargs):
         try:
             main_query = build_main_query(kwargs['cd'], highlight='text')
         except KeyError:
-            sf = SearchForm({'q': "*:*"})
+            sf = forms.SearchForm({'q': "*:*"})
             if sf.is_valid():
-                main_query = build_main_query(sf.cleaned_data, highlight='text')
+                main_query = build_main_query(sf.cleaned_data,
+                                              highlight='text')
 
         main_query['caller'] = 'api_search'
         # Use a SolrList that has a couple of the normal functions built in.
@@ -606,23 +671,25 @@ def get_object_list(self, request=None, **kwargs):
         return sl
 
     def obj_get_list(self, bundle, **kwargs):
-        search_form = SearchForm(bundle.request.GET)
+        search_form = forms.SearchForm(bundle.request.GET)
         if search_form.is_valid():
             cd = search_form.cleaned_data
             if cd['q'] == '':
                 cd['q'] = '*:*'  # Get everything.
             return self.get_object_list(bundle.request, cd=cd)
         else:
-            BadRequest("Invalid resource lookup data provided. Unable to complete your query.")
+            BadRequest(
+                "Invalid resource lookup data provided. Unable to complete your query.")
 
     def obj_get(self, bundle, **kwargs):
-        search_form = SearchForm(bundle.request.GET)
+        search_form = forms.SearchForm(bundle.request.GET)
         if search_form.is_valid():
             cd = search_form.cleaned_data
             cd['q'] = 'id:%s' % kwargs['pk']
             return self.get_object_list(bundle.request, cd=cd)[0]
         else:
-            BadRequest("Invalid resource lookup data provided. Unable to complete your request.")
+            BadRequest(
+                "Invalid resource lookup data provided. Unable to complete your request.")
 
     def apply_sorting(self, obj_list, options=None):
         """Since we're not using Django Model sorting, we just want to use our own, which is already
diff --git a/alert/search/forms.py b/alert/search/forms.py
index c3c4806e9a..3dde9a80ea 100644
--- a/alert/search/forms.py
+++ b/alert/search/forms.py
@@ -37,10 +37,6 @@
     '%Y/%m',     # '2006/10'
 ]
 
-# Query the DB so we can build up check boxes for each court in use.
-COURTS = Court.objects.filter(in_use=True).values(
-    'pk', 'short_name', 'jurisdiction', 'has_oral_argument_scraper')
-
 
 def _clean_form(request, cd):
     """Returns cleaned up values as a Form object.
@@ -62,7 +58,9 @@ def _clean_form(request, cd):
     mutable_get['order_by'] = cd['order_by']
     mutable_get['source'] = cd['source']
 
-    for court in COURTS:
+    courts = Court.objects.filter(in_use=True).values(
+        'pk', 'short_name', 'jurisdiction', 'has_oral_argument_scraper')
+    for court in courts:
         mutable_get['court_%s' % court['pk']] = cd['court_%s' % court['pk']]
 
     return SearchForm(mutable_get)
@@ -202,8 +200,9 @@ def __init__(self, *args, **kwargs):
         names coming from the database, we need to interact directly with the
         fields dict.
         """
-
-        for court in COURTS:
+        courts = Court.objects.filter(in_use=True).values(
+            'pk', 'short_name', 'jurisdiction', 'has_oral_argument_scraper')
+        for court in courts:
             self.fields['court_' + court['pk']] = forms.BooleanField(
                 label=court['short_name'],
                 required=False,
diff --git a/alert/search/urls.py b/alert/search/urls.py
index bd43b42549..c810504de9 100644
--- a/alert/search/urls.py
+++ b/alert/search/urls.py
@@ -1,8 +1,5 @@
 from alert.search import api
 from alert.search import api2
-from alert.search.feeds import SearchFeed, JurisdictionFeed, \
-    AllJurisdictionsFeed
-from alert.search.views import show_results
 from alert.urls import pacer_codes
 
 from django.conf.urls import patterns, include
@@ -26,16 +23,16 @@
 
 urlpatterns = patterns('',
     # Search pages
-    (r'^$', show_results),  # the home page!
+    (r'^$', 'alert.search.views.show_results'),  # the home page!
 
     # The API
     (r'^api/rest/', include(v1_api.urls)),
     (r'^api/rest/', include(v2_api.urls)),
 
     # Feeds & Podcasts
-    (r'^feed/(search)/$', SearchFeed()),
+    (r'^feed/(search)/$', 'SearchFeed()'),
     # lacks URL capturing b/c it will use GET queries.
-    (r'^feed/court/all/$', AllJurisdictionsFeed()),
+    (r'^feed/court/all/$', 'AllJurisdictionsFeed()'),
     (r'^feed/court/(?P<court>' + '|'.join(pacer_codes) + ')/$',
-     JurisdictionFeed()),
+     'JurisdictionFeed()'),
 )
diff --git a/alert/search/views.py b/alert/search/views.py
index 615431edcb..689a24f4d8 100644
--- a/alert/search/views.py
+++ b/alert/search/views.py
@@ -16,9 +16,9 @@
 from alert.lib import search_utils
 from alert.lib import sunburnt
 from alert.lib.bot_detector import is_bot
-from alert.search.forms import SearchForm, COURTS, _clean_form
+from alert.search.forms import SearchForm, _clean_form
 from alert import settings
-from alert.search.models import Document
+from alert.search.models import Document, Court
 from alert.stats import tally_stat, Stat
 from audio.models import Audio
 
@@ -49,8 +49,12 @@ def do_search(request, rows=20, order_by=None):
                     settings.SOLR_AUDIO_URL, mode='r')
                 status_facets = None
             results_si = conn.raw_query(**search_utils.build_main_query(cd))
+
+            courts = Court.objects.filter(in_use=True).values(
+                'pk', 'short_name', 'jurisdiction',
+                'has_oral_argument_scraper')
             courts, court_count_human, court_count = search_utils\
-                .merge_form_with_courts(COURTS, search_form)
+                .merge_form_with_courts(courts, search_form)
 
         except Exception, e:
             logger.warning("Error loading search page with request: %s" % request.GET)
diff --git a/alert/settings/10-public.py b/alert/settings/10-public.py
index 1d7480f9cb..e110324fb9 100644
--- a/alert/settings/10-public.py
+++ b/alert/settings/10-public.py
@@ -158,7 +158,7 @@
 STATIC_ROOT = os.path.join(INSTALL_ROOT, 'alert/assets/static/')  # This is where things get collected to
 
 # Where should the data dumps be stored?
-DUMP_DIR = os.path.join(INSTALL_ROOT, 'alert/assets/media/dumps/')
+DUMP_DIR = os.path.join(INSTALL_ROOT, 'alert/assets/media/bulk-data/')
 
 TEMPLATE_DIRS = (
     # Don't forget to use absolute paths, not relative paths.
diff --git a/alert/urls.py b/alert/urls.py
index 66c9674131..140936535a 100644
--- a/alert/urls.py
+++ b/alert/urls.py
@@ -5,7 +5,6 @@
 from django.contrib import admin
 from django.views.generic import RedirectView
 
-
 pacer_codes = Court.objects.filter(in_use=True).values_list('pk', flat=True)
 
 admin.autodiscover()
diff --git a/apache/courtlistener.com.conf b/apache/courtlistener.com.conf
index 9e9663b0c7..6f78b93679 100644
--- a/apache/courtlistener.com.conf
+++ b/apache/courtlistener.com.conf
@@ -25,7 +25,7 @@
 
     Alias /media/   /var/www/court-listener/alert/assets/media/
     Alias /static/  /var/www/court-listener/alert/assets/static/
-    Alias /dumps/   /var/www/court-listener/alert/assets/media/dumps/
+    Alias /api/bulk-data/   /var/www/court-listener/alert/assets/media/bulk-data/
     Alias /humans.txt   /var/www/court-listener/alert/humans.txt
     Alias /tools/free-law-machine/ /sata/vm/
     Alias /tools/sample-data/ /sata/sample-data/
diff --git a/upgrade.txt b/upgrade.txt
index a427067dbb..9a85981364 100644
--- a/upgrade.txt
+++ b/upgrade.txt
@@ -54,9 +54,6 @@ We welcome a conversion of these notes to a better process using Fabric.
           slug are updated elsewhere?
     + Case_name and slugs need to be pulled from the correct places (opinions
       should pull from Citation and Dockets from Docket, for example).
-    - favorites cannot be created correctly for some reason
-    - Why doesn't the button in the admin site work? It should use
-      get_absolute_url of Document, but it fails weirdly.
     - Write some kind of script to handle matching up oral args with opinions
     + Make audio searchable?
     + Make sure the processing_complete flag is triggered properly.
@@ -64,19 +61,17 @@ We welcome a conversion of these notes to a better process using Fabric.
     - Make the audio page
     + Podcasts/Audio feeds
     - Audio alerts?
-    - Bulk files
-    - Sitemaps
+    + Sitemaps
     - selecting order drop down has weirdness on OA page.
     - Do citation feeds redirect properly now that we're using the PK rather than an ascii conversion?
     - can't click tabs in the jurisdiction picker in opinions!
     - add recent oral arguments to the homepage?
-
     - Scraper:
         + The scrapers for audio and opinions need to be finalized and tested.
-    - Atom feeds for audio
     - Rewrite bulk files and ensure that the save and delete routines of
       Document and Audio properly invalidates the bulk files if necessary.
-    - Alerts functionality?
+    - verify that serve_static_files works for audio stuff (analyze it's code
+      and such)
 
   API Changes:
    - /api/rest-info/ makes reference to a number of API calls that go to v1.
@@ -114,7 +109,29 @@ We welcome a conversion of these notes to a better process using Fabric.
                instance_dir='/usr/local/solr/example/solr/audio',
             )
   - Install the seal-rookery.
+  - Bulk files have been rewritten and require some new tweaks:
+        - Update cron to generate bulk files on the last day of each month,
+          using something like the following:
+
+              min hour 30 4,6,9,11        * manage.py cl_make_bulk_data
+              min hour 31 1,3,5,7,8,10,12 * manage.py cl_make_bulk_data
+              min hour 28 2               * manage.py cl_make_bulk_data
+
+        - Remove any old cron entries referencing dump_all_files.py
+        - Add a new directory at $INSTALL_ROOT/alert/assets/media/bulk-data to
+          contain the new bulk files when they are created, and symlink it from
+          /sata/.
+        - Generate the new bulk files by calling manage.py cl_make_bulk_data.
+        - Be sure that Apache is restarted so the new location is updated.
+        - Delete any old bulk files located at
+          $INSTALL_ROOT/alert/assets/media/dumps
+
+
 
+  Later:
+  - favorites cannot be created correctly for some reason
+  - Why doesn't the button in the admin site work? It should use
+    get_absolute_url of Document, but it fails weirdly.
 
 2014-06-18:
  - This update makes alert editing a lot more intuitive.