diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 4a0a0e64..3a38850c 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -27,7 +27,7 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install flake8 isort cpplint
+        pip install flake8 isort cpplint black
         pip install -r requirements.txt
     - name: Lint with flake8
       run: |
@@ -36,6 +36,9 @@ jobs:
         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
         flake8 . --count --exit-zero --max-complexity=11 --max-line-length=127 --statistics
         flake8 --filename='*.pyx,*.px*' --ignore E901,E225,E226,E227,E402,E999
+    - name: Lint with black
+      run: |
+        black --check .
     - name: Lint with isort
       run: |
         isort -c .
diff --git a/benchmarks/benchmark_als.py b/benchmarks/benchmark_als.py
index 211b89e4..bcdda46b 100644
--- a/benchmarks/benchmark_als.py
+++ b/benchmarks/benchmark_als.py
@@ -17,6 +17,7 @@
 
 try:
     import implicit.gpu  # noqa
+
     has_cuda = True
 except ImportError:
     has_cuda = False
@@ -30,25 +31,29 @@ def inner(iteration, elapsed):
             loss = calculate_loss(plays, model.item_factors, model.user_factors, 0)
             print("model %s iteration %i loss %.5f" % (name, iteration, loss))
             output[name].append(loss)
+
         return inner
 
     for steps in [2, 3, 4]:
-        model = AlternatingLeastSquares(factors=100, use_native=True, use_cg=True, regularization=0,
-                                        iterations=25)
+        model = AlternatingLeastSquares(
+            factors=100, use_native=True, use_cg=True, regularization=0, iterations=25
+        )
         model.cg_steps = steps
-        model.fit_callback = store_loss(model, 'cg%i' % steps)
+        model.fit_callback = store_loss(model, "cg%i" % steps)
         model.fit(plays)
 
     if has_cuda:
-        model = AlternatingLeastSquares(factors=100, use_native=True, use_gpu=True,
-                                        regularization=0, iterations=25)
-        model.fit_callback = store_loss(model, 'gpu')
+        model = AlternatingLeastSquares(
+            factors=100, use_native=True, use_gpu=True, regularization=0, iterations=25
+        )
+        model.fit_callback = store_loss(model, "gpu")
         model.use_gpu = True
         model.fit(plays)
 
-    model = AlternatingLeastSquares(factors=100, use_native=True, use_cg=False, regularization=0,
-                                    iterations=25)
-    model.fit_callback = store_loss(model, 'cholesky')
+    model = AlternatingLeastSquares(
+        factors=100, use_native=True, use_cg=False, regularization=0, iterations=25
+    )
+    model.fit_callback = store_loss(model, "cholesky")
     model.fit(plays)
 
     return output
@@ -61,99 +66,122 @@ def store_time(model, name):
         def inner(iteration, elapsed):
             print(name, model.factors, iteration, elapsed)
             times[name][model.factors].append(elapsed)
+
         return inner
 
     output = defaultdict(list)
     for factors in range(32, 257, 32):
         for steps in [2, 3, 4]:
-            model = AlternatingLeastSquares(factors=factors, use_native=True, use_cg=True,
-                                            regularization=0, iterations=iterations)
-            model.fit_callback = store_time(model, 'cg%i' % steps)
+            model = AlternatingLeastSquares(
+                factors=factors,
+                use_native=True,
+                use_cg=True,
+                regularization=0,
+                iterations=iterations,
+            )
+            model.fit_callback = store_time(model, "cg%i" % steps)
             model.cg_steps = steps
             model.fit(plays)
 
-        model = AlternatingLeastSquares(factors=factors, use_native=True, use_cg=False,
-                                        regularization=0, iterations=iterations)
-        model.fit_callback = store_time(model, 'cholesky')
+        model = AlternatingLeastSquares(
+            factors=factors, use_native=True, use_cg=False, regularization=0, iterations=iterations
+        )
+        model.fit_callback = store_time(model, "cholesky")
         model.fit(plays)
 
         if has_cuda:
-            model = AlternatingLeastSquares(factors=factors, use_native=True, use_gpu=True,
-                                            regularization=0, iterations=iterations)
-            model.fit_callback = store_time(model, 'gpu')
+            model = AlternatingLeastSquares(
+                factors=factors,
+                use_native=True,
+                use_gpu=True,
+                regularization=0,
+                iterations=iterations,
+            )
+            model.fit_callback = store_time(model, "gpu")
             model.fit(plays)
 
         # take the min time for the output
-        output['factors'].append(factors)
+        output["factors"].append(factors)
         for name, stats in times.items():
             output[name].append(min(stats[factors]))
 
     return output
 
 
-LABELS = {'cg2': 'CG (2 Steps/Iteration)',
-          'cg3': 'CG (3 Steps/Iteration)',
-          'cg4': 'CG (4 Steps/Iteration)',
-          'gpu': 'GPU',
-          'cholesky': 'Cholesky'}
-
-COLOURS = {'cg2': "#2ca02c",
-           'cg3': "#ff7f0e",
-           'cg4': "#c5b0d5",
-           'gpu': "#1f77b4",
-           'cholesky': "#d62728"}
-
-
-def generate_speed_graph(data, filename="als_speed.png", keys=['gpu', 'cg2', 'cg3', 'cholesky'],
-                         labels=None, colours=None):
+LABELS = {
+    "cg2": "CG (2 Steps/Iteration)",
+    "cg3": "CG (3 Steps/Iteration)",
+    "cg4": "CG (4 Steps/Iteration)",
+    "gpu": "GPU",
+    "cholesky": "Cholesky",
+}
+
+COLOURS = {
+    "cg2": "#2ca02c",
+    "cg3": "#ff7f0e",
+    "cg4": "#c5b0d5",
+    "gpu": "#1f77b4",
+    "cholesky": "#d62728",
+}
+
+
+def generate_speed_graph(
+    data,
+    filename="als_speed.png",
+    keys=["gpu", "cg2", "cg3", "cholesky"],
+    labels=None,
+    colours=None,
+):
     labels = labels or {}
     colours = colours or {}
 
     seaborn.set()
     fig, ax = plt.subplots()
 
-    factors = data['factors']
+    factors = data["factors"]
     for key in keys:
-        ax.plot(factors, data[key],
-                color=colours.get(key, COLOURS.get(key)),
-                marker='o', markersize=6)
+        ax.plot(
+            factors, data[key], color=colours.get(key, COLOURS.get(key)), marker="o", markersize=6
+        )
 
         ax.text(factors[-1] + 5, data[key][-1], labels.get(key, LABELS[key]), fontsize=10)
 
     ax.set_ylabel("Seconds per Iteration")
     ax.set_xlabel("Factors")
-    plt.savefig(filename, bbox_inches='tight', dpi=300)
+    plt.savefig(filename, bbox_inches="tight", dpi=300)
 
 
-def generate_loss_graph(data, filename="als_speed.png", keys=['gpu', 'cg2', 'cg3', 'cholesky']):
+def generate_loss_graph(data, filename="als_speed.png", keys=["gpu", "cg2", "cg3", "cholesky"]):
     seaborn.set()
 
     fig, ax = plt.subplots()
 
-    iterations = range(1, len(data['cholesky']) + 1)
+    iterations = range(1, len(data["cholesky"]) + 1)
     for key in keys:
-        ax.plot(iterations, data[key],
-                color=COLOURS[key],
-                marker='o', markersize=6)
+        ax.plot(iterations, data[key], color=COLOURS[key], marker="o", markersize=6)
         ax.text(iterations[-1] + 1, data[key][-1], LABELS[key], fontsize=10)
 
     ax.set_ylabel("Mean Squared Error")
     ax.set_xlabel("Iteration")
-    plt.savefig(filename, bbox_inches='tight', dpi=300)
+    plt.savefig(filename, bbox_inches="tight", dpi=300)
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Benchmark CG version against Cholesky",
-                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-
-    parser.add_argument('--input', type=str, required=True,
-                        dest='inputfile', help='dataset file in matrix market format')
-    parser.add_argument('--graph', help='generates graphs',
-                        action="store_true")
-    parser.add_argument('--loss', help='test training loss',
-                        action="store_true")
-    parser.add_argument('--speed', help='test training speed',
-                        action="store_true")
+    parser = argparse.ArgumentParser(
+        description="Benchmark CG version against Cholesky",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+
+    parser.add_argument(
+        "--input",
+        type=str,
+        required=True,
+        dest="inputfile",
+        help="dataset file in matrix market format",
+    )
+    parser.add_argument("--graph", help="generates graphs", action="store_true")
+    parser.add_argument("--loss", help="test training loss", action="store_true")
+    parser.add_argument("--speed", help="test training speed", action="store_true")
 
     args = parser.parse_args()
     if not (args.speed or args.loss):
diff --git a/benchmarks/benchmark_qmf.py b/benchmarks/benchmark_qmf.py
index faab73d8..adc80b1e 100644
--- a/benchmarks/benchmark_qmf.py
+++ b/benchmarks/benchmark_qmf.py
@@ -32,15 +32,24 @@ def benchmark_implicit(matrix, factors, reg, iterations):
 def benchmark_qmf(qmfpath, matrix, factors, reg, iterations):
     matrix = matrix.tocoo()
     datafile = "qmf_data.txt"
-    open(datafile, "w").write("\n".join("%s %s %s" % vals
-                                        for vals in zip(matrix.row, matrix.col, matrix.data)))
+    open(datafile, "w").write(
+        "\n".join("%s %s %s" % vals for vals in zip(matrix.row, matrix.col, matrix.data))
+    )
 
     def get_qmf_command(nepochs):
-        return [qmfpath, "--train_dataset", datafile,
-                "--nfactors", str(factors),
-                "--confidence_weight", "1",
-                "--nepochs", str(nepochs),
-                "--regularization_lambda", str(reg)]
+        return [
+            qmfpath,
+            "--train_dataset",
+            datafile,
+            "--nfactors",
+            str(factors),
+            "--confidence_weight",
+            "1",
+            "--nepochs",
+            str(nepochs),
+            "--regularization_lambda",
+            str(reg),
+        ]
 
     # ok, so QMF needs to read the data in - and including
     # that in the timing isn't fair. So run it once with no iterations
@@ -58,8 +67,9 @@ def get_qmf_command(nepochs):
 def run_benchmark(args):
     plays = bm25_weight(scipy.io.mmread(args.inputfile))
 
-    qmf_time = benchmark_qmf(args.qmfpath, plays, args.factors, args.regularization,
-                             args.iterations)
+    qmf_time = benchmark_qmf(
+        args.qmfpath, plays, args.factors, args.regularization, args.iterations
+    )
 
     implicit_time = benchmark_implicit(plays, args.factors, args.regularization, args.iterations)
 
@@ -69,19 +79,25 @@ def run_benchmark(args):
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Generates Benchmark",
-                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-
-    parser.add_argument('--input', type=str,
-                        dest='inputfile', help='dataset file in matrix market format')
-    parser.add_argument('--qmfpath', type=str,
-                        dest='qmfpath', help='full path to qmf wals.bin file', required=True)
-    parser.add_argument('--factors', type=int, default=50, dest='factors',
-                        help='Number of factors to calculate')
-    parser.add_argument('--reg', type=float, default=0.8, dest='regularization',
-                        help='regularization weight')
-    parser.add_argument('--iter', type=int, default=15, dest='iterations',
-                        help='Number of ALS iterations')
+    parser = argparse.ArgumentParser(
+        description="Generates Benchmark", formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+
+    parser.add_argument(
+        "--input", type=str, dest="inputfile", help="dataset file in matrix market format"
+    )
+    parser.add_argument(
+        "--qmfpath", type=str, dest="qmfpath", help="full path to qmf wals.bin file", required=True
+    )
+    parser.add_argument(
+        "--factors", type=int, default=50, dest="factors", help="Number of factors to calculate"
+    )
+    parser.add_argument(
+        "--reg", type=float, default=0.8, dest="regularization", help="regularization weight"
+    )
+    parser.add_argument(
+        "--iter", type=int, default=15, dest="iterations", help="Number of ALS iterations"
+    )
     args = parser.parse_args()
 
     logging.basicConfig(level=logging.DEBUG)
diff --git a/benchmarks/benchmark_spark.py b/benchmarks/benchmark_spark.py
index ca0c2a04..5a07bc10 100644
--- a/benchmarks/benchmark_spark.py
+++ b/benchmarks/benchmark_spark.py
@@ -20,19 +20,19 @@
 def convert_sparse_to_dataframe(spark, context, sparse_matrix):
     """ Converts a scipy sparse matrix to a spark dataframe """
     m = sparse_matrix.tocoo()
-    data = context.parallelize(numpy.array([m.row, m.col, m.data]).T,
-                               numSlices=len(m.row)/1024)
-    return spark.createDataFrame(data.map(lambda p: Row(row=int(p[0]),
-                                                        col=int(p[1]),
-                                                        data=float(p[2]))))
+    data = context.parallelize(numpy.array([m.row, m.col, m.data]).T, numSlices=len(m.row) / 1024)
+    return spark.createDataFrame(
+        data.map(lambda p: Row(row=int(p[0]), col=int(p[1]), data=float(p[2])))
+    )
 
 
 def benchmark_spark(ratings, factors, iterations=5):
-    conf = (SparkConf()
-            .setAppName("implicit_benchmark")
-            .setMaster('local[*]')
-            .set('spark.driver.memory', '16G')
-            )
+    conf = (
+        SparkConf()
+        .setAppName("implicit_benchmark")
+        .setMaster("local[*]")
+        .set("spark.driver.memory", "16G")
+    )
     context = SparkContext(conf=conf)
     spark = SparkSession(context)
 
@@ -41,14 +41,20 @@ def benchmark_spark(ratings, factors, iterations=5):
         ratings = convert_sparse_to_dataframe(spark, context, ratings)
 
         for rank in factors:
-            als = ALS(rank=rank, maxIter=iterations,
-                      alpha=1, implicitPrefs=True,
-                      userCol="row", itemCol="col", ratingCol="data")
+            als = ALS(
+                rank=rank,
+                maxIter=iterations,
+                alpha=1,
+                implicitPrefs=True,
+                userCol="row",
+                itemCol="col",
+                ratingCol="data",
+            )
             start = time.time()
             als.fit(ratings)
             elapsed = time.time() - start
             times[rank] = elapsed / iterations
-            print("spark. factors=%i took %.3f" % (rank, elapsed/iterations))
+            print("spark. factors=%i took %.3f" % (rank, elapsed / iterations))
     finally:
         spark.stop()
 
@@ -59,15 +65,15 @@ def benchmark_implicit(ratings, factors, iterations=5, use_gpu=False):
     ratings = ratings.tocsr()
     times = {}
     for rank in factors:
-        model = implicit.als.AlternatingLeastSquares(factors=rank,
-                                                     iterations=iterations,
-                                                     use_gpu=use_gpu)
+        model = implicit.als.AlternatingLeastSquares(
+            factors=rank, iterations=iterations, use_gpu=use_gpu
+        )
         start = time.time()
         model.fit(ratings)
         elapsed = time.time() - start
         # take average time over iterations to be consistent with spark timings
         times[rank] = elapsed / iterations
-        print("implicit. factors=%i took %.3f" % (rank, elapsed/iterations))
+        print("implicit. factors=%i took %.3f" % (rank, elapsed / iterations))
     return times
 
 
@@ -76,22 +82,24 @@ def generate_graph(times, factors, filename="spark_speed.png"):
     fig, ax = plt.subplots()
     for key in times:
         current = [times[key][f] for f in factors]
-        ax.plot(factors, current, marker='o', markersize=6)
+        ax.plot(factors, current, marker="o", markersize=6)
         ax.text(factors[-1] + 5, current[-1], key, fontsize=10)
 
     ax.set_ylabel("Seconds per Iteration")
     ax.set_xlabel("Factors")
-    plt.savefig(filename, bbox_inches='tight', dpi=300)
+    plt.savefig(filename, bbox_inches="tight", dpi=300)
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Benchmark Spark against implicit",
-                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-
-    parser.add_argument('--input', type=str, required=True,
-                        help='dataset file in matrix market format')
-    parser.add_argument('--output', type=str, required=True,
-                        help='output file location')
+    parser = argparse.ArgumentParser(
+        description="Benchmark Spark against implicit",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+
+    parser.add_argument(
+        "--input", type=str, required=True, help="dataset file in matrix market format"
+    )
+    parser.add_argument("--output", type=str, required=True, help="output file location")
     args = parser.parse_args()
     if not (args.speed or args.loss):
         print("must specify at least one of --speed or --loss")
@@ -102,9 +110,9 @@ def generate_graph(times, factors, filename="spark_speed.png"):
     times = {}
     factors = list(range(64, 257, 64))
 
-    times['Implicit (GPU)'] = benchmark_implicit(m, factors, use_gpu=True)
-    times['Spark MLlib'] = benchmark_spark(m, factors)
-    times['Implicit (CPU)'] = benchmark_implicit(m, factors, use_gpu=False)
+    times["Implicit (GPU)"] = benchmark_implicit(m, factors, use_gpu=True)
+    times["Spark MLlib"] = benchmark_spark(m, factors)
+    times["Implicit (CPU)"] = benchmark_implicit(m, factors, use_gpu=False)
 
     print(times)
     generate_graph(times, factors, filename=args.output + ".png")
diff --git a/cuda_setup.py b/cuda_setup.py
index c6914bf9..cd8cf8ea 100644
--- a/cuda_setup.py
+++ b/cuda_setup.py
@@ -29,55 +29,62 @@ def locate_cuda():
 
     If nvcc can't be found, this returns None
     """
-    nvcc_bin = 'nvcc'
+    nvcc_bin = "nvcc"
     if sys.platform.startswith("win"):
-        nvcc_bin = 'nvcc.exe'
+        nvcc_bin = "nvcc.exe"
 
     # first check if the CUDAHOME env variable is in use
-    if 'CUDAHOME' in os.environ:
-        home = os.environ['CUDAHOME']
-        nvcc = os.path.join(home, 'bin', nvcc_bin)
-    elif 'CUDA_PATH' in os.environ:
-        home = os.environ['CUDA_PATH']
-        nvcc = os.path.join(home, 'bin', nvcc_bin)
+    if "CUDAHOME" in os.environ:
+        home = os.environ["CUDAHOME"]
+        nvcc = os.path.join(home, "bin", nvcc_bin)
+    elif "CUDA_PATH" in os.environ:
+        home = os.environ["CUDA_PATH"]
+        nvcc = os.path.join(home, "bin", nvcc_bin)
     else:
         # otherwise, search the PATH for NVCC
-        nvcc = find_in_path(nvcc_bin, os.environ['PATH'])
+        nvcc = find_in_path(nvcc_bin, os.environ["PATH"])
         if nvcc is None:
-            logging.warning('The nvcc binary could not be located in your $PATH. Either add it to '
-                            'your path, or set $CUDAHOME to enable CUDA extensions')
+            logging.warning(
+                "The nvcc binary could not be located in your $PATH. Either add it to "
+                "your path, or set $CUDAHOME to enable CUDA extensions"
+            )
             return None
         home = os.path.dirname(os.path.dirname(nvcc))
         if not os.path.exists(os.path.join(home, "include")):
             logging.warning("Failed to find cuda include directory, attempting /usr/local/cuda")
             home = "/usr/local/cuda"
 
-    cudaconfig = {'home': home,
-                  'nvcc': nvcc,
-                  'include': os.path.join(home, 'include'),
-                  'lib64':   os.path.join(home, 'lib64')}
-
-    post_args = ["-arch=sm_50",
-                 "-gencode=arch=compute_50,code=sm_50",
-                 "-gencode=arch=compute_52,code=sm_52",
-                 "-gencode=arch=compute_60,code=sm_60",
-                 "-gencode=arch=compute_61,code=sm_61",
-                 "-gencode=arch=compute_70,code=sm_70",
-                 "-gencode=arch=compute_70,code=compute_70",
-                 "--ptxas-options=-v", "-O2"]
+    cudaconfig = {
+        "home": home,
+        "nvcc": nvcc,
+        "include": os.path.join(home, "include"),
+        "lib64": os.path.join(home, "lib64"),
+    }
+
+    post_args = [
+        "-arch=sm_50",
+        "-gencode=arch=compute_50,code=sm_50",
+        "-gencode=arch=compute_52,code=sm_52",
+        "-gencode=arch=compute_60,code=sm_60",
+        "-gencode=arch=compute_61,code=sm_61",
+        "-gencode=arch=compute_70,code=sm_70",
+        "-gencode=arch=compute_70,code=compute_70",
+        "--ptxas-options=-v",
+        "-O2",
+    ]
 
     if sys.platform == "win32":
-        cudaconfig['lib64'] = os.path.join(home, 'lib', 'x64')
-        post_args += ['-Xcompiler', '/MD']
+        cudaconfig["lib64"] = os.path.join(home, "lib", "x64")
+        post_args += ["-Xcompiler", "/MD"]
     else:
-        post_args += ['-c', '--compiler-options', "'-fPIC'"]
+        post_args += ["-c", "--compiler-options", "'-fPIC'"]
 
     for k, v in cudaconfig.items():
         if not os.path.exists(v):
-            logging.warning('The CUDA %s path could not be located in %s', k, v)
+            logging.warning("The CUDA %s path could not be located in %s", k, v)
             return None
 
-    cudaconfig['post_args'] = post_args
+    cudaconfig["post_args"] = post_args
     return cudaconfig
 
 
@@ -85,45 +92,55 @@ def locate_cuda():
 # https://github.com/cupy/cupy/blob/master/cupy_setup_build.py
 class _UnixCCompiler(unixccompiler.UnixCCompiler):
     src_extensions = list(unixccompiler.UnixCCompiler.src_extensions)
-    src_extensions.append('.cu')
+    src_extensions.append(".cu")
 
     def _compile(self, obj, src, ext, cc_args, extra_postargs, pp_opts):
         # For sources other than CUDA C ones, just call the super class method.
-        if os.path.splitext(src)[1] != '.cu':
+        if os.path.splitext(src)[1] != ".cu":
             return unixccompiler.UnixCCompiler._compile(
-                self, obj, src, ext, cc_args, extra_postargs, pp_opts)
+                self, obj, src, ext, cc_args, extra_postargs, pp_opts
+            )
 
         # For CUDA C source files, compile them with NVCC.
         _compiler_so = self.compiler_so
         try:
-            nvcc_path = CUDA['nvcc']
-            post_args = CUDA['post_args']
+            nvcc_path = CUDA["nvcc"]
+            post_args = CUDA["post_args"]
             # TODO? base_opts = build.get_compiler_base_options()
-            self.set_executable('compiler_so', nvcc_path)
+            self.set_executable("compiler_so", nvcc_path)
 
             return unixccompiler.UnixCCompiler._compile(
-                self, obj, src, ext, cc_args, post_args, pp_opts)
+                self, obj, src, ext, cc_args, post_args, pp_opts
+            )
         finally:
             self.compiler_so = _compiler_so
 
 
 class _MSVCCompiler(msvccompiler.MSVCCompiler):
-    _cu_extensions = ['.cu']
+    _cu_extensions = [".cu"]
 
     src_extensions = list(unixccompiler.UnixCCompiler.src_extensions)
     src_extensions.extend(_cu_extensions)
 
-    def _compile_cu(self, sources, output_dir=None, macros=None,
-                    include_dirs=None, debug=0, extra_preargs=None,
-                    extra_postargs=None, depends=None):
+    def _compile_cu(
+        self,
+        sources,
+        output_dir=None,
+        macros=None,
+        include_dirs=None,
+        debug=0,
+        extra_preargs=None,
+        extra_postargs=None,
+        depends=None,
+    ):
         # Compile CUDA C files, mainly derived from UnixCCompiler._compile().
-        macros, objects, extra_postargs, pp_opts, _build = \
-            self._setup_compile(output_dir, macros, include_dirs, sources,
-                                depends, extra_postargs)
+        macros, objects, extra_postargs, pp_opts, _build = self._setup_compile(
+            output_dir, macros, include_dirs, sources, depends, extra_postargs
+        )
 
-        compiler_so = CUDA['nvcc']
+        compiler_so = CUDA["nvcc"]
         cc_args = self._get_cc_args(pp_opts, debug, extra_preargs)
-        post_args = CUDA['post_args']
+        post_args = CUDA["post_args"]
 
         for obj in objects:
             try:
@@ -131,7 +148,7 @@ def _compile_cu(self, sources, output_dir=None, macros=None,
             except KeyError:
                 continue
             try:
-                self.spawn([compiler_so] + cc_args + [src, '-o', obj] + post_args)
+                self.spawn([compiler_so] + cc_args + [src, "-o", obj] + post_args)
             except errors.DistutilsExecError as e:
                 raise errors.CompileError(str(e))
 
@@ -142,14 +159,13 @@ def compile(self, sources, **kwargs):
         cu_sources = []
         other_sources = []
         for source in sources:
-            if os.path.splitext(source)[1] == '.cu':
+            if os.path.splitext(source)[1] == ".cu":
                 cu_sources.append(source)
             else:
                 other_sources.append(source)
 
         # Compile source files other than CUDA C ones.
-        other_objects = msvccompiler.MSVCCompiler.compile(
-            self, other_sources, **kwargs)
+        other_objects = msvccompiler.MSVCCompiler.compile(self, other_sources, **kwargs)
 
         # Compile CUDA C sources.
         cu_objects = self._compile_cu(cu_sources, **kwargs)
@@ -163,22 +179,24 @@ class cuda_build_ext(setuptools_build_ext):
 
     def run(self):
         if CUDA is not None:
+
             def wrap_new_compiler(func):
                 def _wrap_new_compiler(*args, **kwargs):
                     try:
                         return func(*args, **kwargs)
                     except errors.DistutilsPlatformError:
-                        if not sys.platform == 'win32':
+                        if not sys.platform == "win32":
                             CCompiler = _UnixCCompiler
                         else:
                             CCompiler = _MSVCCompiler
-                        return CCompiler(
-                            None, kwargs['dry_run'], kwargs['force'])
+                        return CCompiler(None, kwargs["dry_run"], kwargs["force"])
+
                 return _wrap_new_compiler
+
             ccompiler.new_compiler = wrap_new_compiler(ccompiler.new_compiler)
             # Intentionally causes DistutilsPlatformError in
             # ccompiler.new_compiler() function to hook.
-            self.compiler = 'nvidia'
+            self.compiler = "nvidia"
 
         setuptools_build_ext.run(self)
 
diff --git a/docs/conf.py b/docs/conf.py
index 92d0ed01..8c3560fe 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -30,25 +30,24 @@
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
-extensions = ['sphinx.ext.autodoc',
-              'sphinx.ext.napoleon']
+extensions = ["sphinx.ext.autodoc", "sphinx.ext.napoleon"]
 
 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]
 
 # The suffix(es) of source filenames.
 # You can specify multiple suffix as a list of string:
 #
 # source_suffix = ['.rst', '.md']
-source_suffix = '.rst'
+source_suffix = ".rst"
 
 # The master toctree document.
-master_doc = 'index'
+master_doc = "index"
 
 # General information about the project.
-project = u'Implicit'
-copyright = u'2017, Ben Frederickson'
-author = u'Ben Frederickson'
+project = u"Implicit"
+copyright = u"2017, Ben Frederickson"
+author = u"Ben Frederickson"
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
@@ -57,7 +56,7 @@
 # The short X.Y version.
 import implicit  # noqa
 
-version = '.'.join(implicit.__version__.split('.')[:2])
+version = ".".join(implicit.__version__.split(".")[:2])
 
 # The full version, including alpha/beta/rc tags.
 release = implicit.__version__
@@ -72,10 +71,10 @@
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This patterns also effect to html_static_path and html_extra_path
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
 
 # The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
+pygments_style = "sphinx"
 
 # If true, `todo` and `todoList` produce output, else they produce nothing.
 todo_include_todos = False
@@ -86,7 +85,7 @@
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 #
-html_theme = 'sphinx_rtd_theme'
+html_theme = "sphinx_rtd_theme"
 
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
@@ -97,7 +96,7 @@
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = ["_static"]
 
 # Custom sidebar templates, must be a dictionary that maps document names
 # to template names.
@@ -105,12 +104,12 @@
 # This is required for the alabaster theme
 # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars
 html_sidebars = {
-    '**': [
-        'about.html',
-        'navigation.html',
-        'relations.html',  # needs 'show_related': True theme option to display
-        'searchbox.html',
-        'donate.html',
+    "**": [
+        "about.html",
+        "navigation.html",
+        "relations.html",  # needs 'show_related': True theme option to display
+        "searchbox.html",
+        "donate.html",
     ]
 }
 
@@ -118,7 +117,7 @@
 # -- Options for HTMLHelp output ------------------------------------------
 
 # Output file base name for HTML help builder.
-htmlhelp_basename = 'Implicitdoc'
+htmlhelp_basename = "Implicitdoc"
 
 
 # -- Options for LaTeX output ---------------------------------------------
@@ -127,15 +126,12 @@
     # The paper size ('letterpaper' or 'a4paper').
     #
     # 'papersize': 'letterpaper',
-
     # The font size ('10pt', '11pt' or '12pt').
     #
     # 'pointsize': '10pt',
-
     # Additional stuff for the LaTeX preamble.
     #
     # 'preamble': '',
-
     # Latex figure (float) alignment
     #
     # 'figure_align': 'htbp',
@@ -145,8 +141,7 @@
 # (source start file, target name, title,
 #  author, documentclass [howto, manual, or own class]).
 latex_documents = [
-    (master_doc, 'Implicit.tex', u'Implicit Documentation',
-     u'Ben Frederickson', 'manual'),
+    (master_doc, "Implicit.tex", u"Implicit Documentation", u"Ben Frederickson", "manual"),
 ]
 
 
@@ -154,10 +149,7 @@
 
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
-man_pages = [
-    (master_doc, 'implicit', u'Implicit Documentation',
-     [author], 1)
-]
+man_pages = [(master_doc, "implicit", u"Implicit Documentation", [author], 1)]
 
 
 # -- Options for Texinfo output -------------------------------------------
@@ -166,7 +158,13 @@
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
 texinfo_documents = [
-    (master_doc, 'Implicit', u'Implicit Documentation',
-     author, 'Implicit', 'One line description of project.',
-     'Miscellaneous'),
+    (
+        master_doc,
+        "Implicit",
+        u"Implicit Documentation",
+        author,
+        "Implicit",
+        "One line description of project.",
+        "Miscellaneous",
+    ),
 ]
diff --git a/examples/lastfm.py b/examples/lastfm.py
index 65d52c74..ff1afa2d 100644
--- a/examples/lastfm.py
+++ b/examples/lastfm.py
@@ -15,24 +15,33 @@
 import tqdm
 
 from implicit.als import AlternatingLeastSquares
-from implicit.approximate_als import (AnnoyAlternatingLeastSquares, FaissAlternatingLeastSquares,
-                                      NMSLibAlternatingLeastSquares)
+from implicit.approximate_als import (
+    AnnoyAlternatingLeastSquares,
+    FaissAlternatingLeastSquares,
+    NMSLibAlternatingLeastSquares,
+)
 from implicit.bpr import BayesianPersonalizedRanking
 from implicit.datasets.lastfm import get_lastfm
 from implicit.lmf import LogisticMatrixFactorization
-from implicit.nearest_neighbours import (BM25Recommender, CosineRecommender,
-                                         TFIDFRecommender, bm25_weight)
+from implicit.nearest_neighbours import (
+    BM25Recommender,
+    CosineRecommender,
+    TFIDFRecommender,
+    bm25_weight,
+)
 
 # maps command line model argument to class name
-MODELS = {"als":  AlternatingLeastSquares,
-          "nmslib_als": NMSLibAlternatingLeastSquares,
-          "annoy_als": AnnoyAlternatingLeastSquares,
-          "faiss_als": FaissAlternatingLeastSquares,
-          "tfidf": TFIDFRecommender,
-          "cosine": CosineRecommender,
-          "bpr": BayesianPersonalizedRanking,
-          "lmf": LogisticMatrixFactorization,
-          "bm25": BM25Recommender}
+MODELS = {
+    "als": AlternatingLeastSquares,
+    "nmslib_als": NMSLibAlternatingLeastSquares,
+    "annoy_als": AnnoyAlternatingLeastSquares,
+    "faiss_als": FaissAlternatingLeastSquares,
+    "tfidf": TFIDFRecommender,
+    "cosine": CosineRecommender,
+    "bpr": BayesianPersonalizedRanking,
+    "lmf": LogisticMatrixFactorization,
+    "bm25": BM25Recommender,
+}
 
 
 def get_model(model_name):
@@ -43,13 +52,13 @@ def get_model(model_name):
 
     # some default params
     if model_name.endswith("als"):
-        params = {'factors': 64, 'dtype': np.float32}
+        params = {"factors": 64, "dtype": np.float32}
     elif model_name == "bm25":
-        params = {'K1': 100, 'B': 0.5}
+        params = {"K1": 100, "B": 0.5}
     elif model_name == "bpr":
-        params = {'factors': 63}
+        params = {"factors": 63}
     elif model_name == "lmf":
-        params = {'factors': 30, "iterations": 40, "regularization": 1.5}
+        params = {"factors": 30, "iterations": 40, "regularization": 1.5}
     else:
         params = {}
 
@@ -57,8 +66,8 @@ def get_model(model_name):
 
 
 def calculate_similar_artists(output_filename, model_name="als"):
-    """ generates a list of similar artists in lastfm by utilizing the 'similar_items'
-    api of the models """
+    """generates a list of similar artists in lastfm by utilizing the 'similar_items'
+    api of the models"""
     artists, users, plays = get_lastfm()
 
     # create a model from the input data
@@ -99,7 +108,7 @@ def calculate_similar_artists(output_filename, model_name="als"):
                     o.write("%s\t%s\t%s\n" % (artist, artists[other], score))
                 progress.update(1)
 
-    logging.debug("generated similar artists in %0.2fs",  time.time() - start)
+    logging.debug("generated similar artists in %0.2fs", time.time() - start)
 
 
 def calculate_recommendations(output_filename, model_name="als"):
@@ -137,22 +146,37 @@ def calculate_recommendations(output_filename, model_name="als"):
                 for artistid, score in model.recommend(userid, user_plays):
                     o.write("%s\t%s\t%s\n" % (username, artists[artistid], score))
                 progress.update(1)
-    logging.debug("generated recommendations in %0.2fs",  time.time() - start)
+    logging.debug("generated recommendations in %0.2fs", time.time() - start)
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Generates similar artists on the last.fm dataset"
-                                     " or generates personalized recommendations for each user",
-                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--output', type=str, default='similar-artists.tsv',
-                        dest='outputfile', help='output file name')
-    parser.add_argument('--model', type=str, default='als',
-                        dest='model', help='model to calculate (%s)' % "/".join(MODELS.keys()))
-    parser.add_argument('--recommend',
-                        help='Recommend items for each user rather than calculate similar_items',
-                        action="store_true")
-    parser.add_argument('--param', action='append',
-                        help="Parameters to pass to the model, formatted as 'KEY=VALUE")
+    parser = argparse.ArgumentParser(
+        description="Generates similar artists on the last.fm dataset"
+        " or generates personalized recommendations for each user",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument(
+        "--output",
+        type=str,
+        default="similar-artists.tsv",
+        dest="outputfile",
+        help="output file name",
+    )
+    parser.add_argument(
+        "--model",
+        type=str,
+        default="als",
+        dest="model",
+        help="model to calculate (%s)" % "/".join(MODELS.keys()),
+    )
+    parser.add_argument(
+        "--recommend",
+        help="Recommend items for each user rather than calculate similar_items",
+        action="store_true",
+    )
+    parser.add_argument(
+        "--param", action="append", help="Parameters to pass to the model, formatted as 'KEY=VALUE"
+    )
 
     args = parser.parse_args()
 
diff --git a/examples/movielens.py b/examples/movielens.py
index 68f7ae9b..7ecb66c4 100644
--- a/examples/movielens.py
+++ b/examples/movielens.py
@@ -24,15 +24,17 @@
 from implicit.bpr import BayesianPersonalizedRanking
 from implicit.datasets.movielens import get_movielens
 from implicit.lmf import LogisticMatrixFactorization
-from implicit.nearest_neighbours import (BM25Recommender, CosineRecommender,
-                                         TFIDFRecommender, bm25_weight)
+from implicit.nearest_neighbours import (
+    BM25Recommender,
+    CosineRecommender,
+    TFIDFRecommender,
+    bm25_weight,
+)
 
 log = logging.getLogger("implicit")
 
 
-def calculate_similar_movies(output_filename,
-                             model_name="als", min_rating=4.0,
-                             variant='20m'):
+def calculate_similar_movies(output_filename, model_name="als", min_rating=4.0, variant="20m"):
     # read in the input data file
     start = time.time()
     titles, ratings = get_movielens(variant)
@@ -95,22 +97,44 @@ def calculate_similar_movies(output_filename,
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Generates related movies from the MovieLens 20M "
-                                     "dataset (https://grouplens.org/datasets/movielens/20m/)",
-                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-
-    parser.add_argument('--output', type=str, default='similar-movies.tsv',
-                        dest='outputfile', help='output file name')
-    parser.add_argument('--model', type=str, default='als',
-                        dest='model', help='model to calculate (als/bm25/tfidf/cosine)')
-    parser.add_argument('--variant', type=str, default='20m', dest='variant',
-                        help='Whether to use the 20m, 10m, 1m or 100k movielens dataset')
-    parser.add_argument('--min_rating', type=float, default=4.0, dest='min_rating',
-                        help='Minimum rating to assume that a rating is positive')
+    parser = argparse.ArgumentParser(
+        description="Generates related movies from the MovieLens 20M "
+        "dataset (https://grouplens.org/datasets/movielens/20m/)",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+
+    parser.add_argument(
+        "--output",
+        type=str,
+        default="similar-movies.tsv",
+        dest="outputfile",
+        help="output file name",
+    )
+    parser.add_argument(
+        "--model",
+        type=str,
+        default="als",
+        dest="model",
+        help="model to calculate (als/bm25/tfidf/cosine)",
+    )
+    parser.add_argument(
+        "--variant",
+        type=str,
+        default="20m",
+        dest="variant",
+        help="Whether to use the 20m, 10m, 1m or 100k movielens dataset",
+    )
+    parser.add_argument(
+        "--min_rating",
+        type=float,
+        default=4.0,
+        dest="min_rating",
+        help="Minimum rating to assume that a rating is positive",
+    )
     args = parser.parse_args()
 
     logging.basicConfig(level=logging.DEBUG)
 
-    calculate_similar_movies(args.outputfile,
-                             model_name=args.model,
-                             min_rating=args.min_rating, variant=args.variant)
+    calculate_similar_movies(
+        args.outputfile, model_name=args.model, min_rating=args.min_rating, variant=args.variant
+    )
diff --git a/implicit/__init__.py b/implicit/__init__.py
index 7a869a7a..b3f203f4 100644
--- a/implicit/__init__.py
+++ b/implicit/__init__.py
@@ -1,5 +1,5 @@
 from . import als, approximate_als, bpr, lmf, nearest_neighbours
 
-__version__ = '0.4.4'
+__version__ = "0.4.4"
 
 __all__ = [als, approximate_als, bpr, nearest_neighbours, lmf, __version__]
diff --git a/implicit/als.py b/implicit/als.py
index 4c59c291..ced62558 100644
--- a/implicit/als.py
+++ b/implicit/als.py
@@ -16,7 +16,7 @@ def AlternatingLeastSquares(
     num_threads=0,
     random_state=None,
 ):
-    """ Alternating Least Squares
+    """Alternating Least Squares
 
     A Recommendation Model based off the algorithms described in the paper 'Collaborative
     Filtering for Implicit Feedback Datasets' with performance optimizations described in
diff --git a/implicit/approximate_als.py b/implicit/approximate_als.py
index 53cecc6f..5395c67c 100644
--- a/implicit/approximate_als.py
+++ b/implicit/approximate_als.py
@@ -15,7 +15,7 @@
 
 
 def augment_inner_product_matrix(factors):
-    """ This function transforms a factor matrix such that an angular nearest neighbours search
+    """This function transforms a factor matrix such that an angular nearest neighbours search
     will return top related items of the inner product.
 
     This involves transforming each row by adding one extra dimension as suggested in the paper:
@@ -24,7 +24,7 @@ def augment_inner_product_matrix(factors):
 
     Basically this involves transforming each feature vector so that they have the same norm, which
     means the cosine of this transformed vector is proportional to the dot product (if the other
-    vector in the cosine has a 0 in the extra dimension). """
+    vector in the cosine has a 0 in the extra dimension)."""
     norms = numpy.linalg.norm(factors, axis=1)
     max_norm = norms.max()
 
@@ -36,7 +36,7 @@ def augment_inner_product_matrix(factors):
 
 class NMSLibAlternatingLeastSquares(AlternatingLeastSquares):
 
-    """ Speeds up the base :class:`~implicit.als.AlternatingLeastSquares` model by using
+    """Speeds up the base :class:`~implicit.als.AlternatingLeastSquares` model by using
     `NMSLib <https://github.com/searchivarius/nmslib>`_ to create approximate nearest neighbours
     indices of the latent factors.
 
@@ -67,14 +67,21 @@ class NMSLibAlternatingLeastSquares(AlternatingLeastSquares):
         item_factors
     """
 
-    def __init__(self,
-                 approximate_similar_items=True, approximate_recommend=True,
-                 method='hnsw', index_params=None, query_params=None,
-                 random_state=None, *args, **kwargs):
+    def __init__(
+        self,
+        approximate_similar_items=True,
+        approximate_recommend=True,
+        method="hnsw",
+        index_params=None,
+        query_params=None,
+        random_state=None,
+        *args,
+        **kwargs
+    ):
         if index_params is None:
-            index_params = {'M': 16, 'post': 0, 'efConstruction': 400}
+            index_params = {"M": 16, "post": 0, "efConstruction": 400}
         if query_params is None:
-            query_params = {'ef': 90}
+            query_params = {"ef": 90}
 
         self.similar_items_index = None
         self.recommend_index = None
@@ -86,14 +93,14 @@ def __init__(self,
         self.index_params = index_params
         self.query_params = query_params
 
-        super(NMSLibAlternatingLeastSquares, self).__init__(*args,
-                                                            random_state=random_state,
-                                                            **kwargs)
+        super(NMSLibAlternatingLeastSquares, self).__init__(
+            *args, random_state=random_state, **kwargs
+        )
 
     def fit(self, Ciu, show_progress=True):
         # nmslib can be a little chatty when first imported, disable some of
         # the logging
-        logging.getLogger('nmslib').setLevel(logging.WARNING)
+        logging.getLogger("nmslib").setLevel(logging.WARNING)
         import nmslib
 
         # train the model
@@ -102,8 +109,7 @@ def fit(self, Ciu, show_progress=True):
         # create index for similar_items
         if self.approximate_similar_items:
             log.debug("Building nmslib similar items index")
-            self.similar_items_index = nmslib.init(
-                method=self.method, space='cosinesimil')
+            self.similar_items_index = nmslib.init(method=self.method, space="cosinesimil")
 
             # there are some numerical instability issues here with
             # building a cosine index with vectors with 0 norms, hack around this
@@ -116,18 +122,15 @@ def fit(self, Ciu, show_progress=True):
             ids = ids[norms != 0]
 
             self.similar_items_index.addDataPointBatch(item_factors, ids=ids)
-            self.similar_items_index.createIndex(self.index_params,
-                                                 print_progress=show_progress)
+            self.similar_items_index.createIndex(self.index_params, print_progress=show_progress)
             self.similar_items_index.setQueryTimeParams(self.query_params)
 
         # build up a separate index for the inner product (for recommend
         # methods)
         if self.approximate_recommend:
             log.debug("Building nmslib recommendation index")
-            self.max_norm, extra = augment_inner_product_matrix(
-                self.item_factors)
-            self.recommend_index = nmslib.init(
-                method='hnsw', space='cosinesimil')
+            self.max_norm, extra = augment_inner_product_matrix(self.item_factors)
+            self.recommend_index = nmslib.init(method="hnsw", space="cosinesimil")
             self.recommend_index.addDataPointBatch(extra)
             self.recommend_index.createIndex(self.index_params, print_progress=show_progress)
             self.recommend_index.setQueryTimeParams(self.query_params)
@@ -136,17 +139,26 @@ def similar_items(self, itemid, N=10):
         if not self.approximate_similar_items:
             return super(NMSLibAlternatingLeastSquares, self).similar_items(itemid, N)
 
-        neighbours, distances = self.similar_items_index.knnQuery(
-            self.item_factors[itemid], N)
+        neighbours, distances = self.similar_items_index.knnQuery(self.item_factors[itemid], N)
         return zip(neighbours, 1.0 - distances)
 
-    def recommend(self, userid, user_items, N=10, filter_already_liked_items=True,
-                  filter_items=None, recalculate_user=False):
+    def recommend(
+        self,
+        userid,
+        user_items,
+        N=10,
+        filter_already_liked_items=True,
+        filter_items=None,
+        recalculate_user=False,
+    ):
         if not self.approximate_recommend:
-            return super(NMSLibAlternatingLeastSquares,
-                         self).recommend(userid, user_items, N=N,
-                                         filter_items=filter_items,
-                                         recalculate_user=recalculate_user)
+            return super(NMSLibAlternatingLeastSquares, self).recommend(
+                userid,
+                user_items,
+                N=N,
+                filter_items=filter_items,
+                recalculate_user=recalculate_user,
+            )
 
         user = self._user_factor(userid, user_items, recalculate_user)
 
@@ -202,12 +214,20 @@ class AnnoyAlternatingLeastSquares(AlternatingLeastSquares):
         item_factors
     """
 
-    def __init__(self, approximate_similar_items=True, approximate_recommend=True,
-                 n_trees=50, search_k=-1, random_state=None, *args, **kwargs):
-
-        super(AnnoyAlternatingLeastSquares, self).__init__(*args,
-                                                           random_state=random_state,
-                                                           **kwargs)
+    def __init__(
+        self,
+        approximate_similar_items=True,
+        approximate_recommend=True,
+        n_trees=50,
+        search_k=-1,
+        random_state=None,
+        *args,
+        **kwargs
+    ):
+
+        super(AnnoyAlternatingLeastSquares, self).__init__(
+            *args, random_state=random_state, **kwargs
+        )
 
         self.similar_items_index = None
         self.recommend_index = None
@@ -230,8 +250,7 @@ def fit(self, Ciu, show_progress=True):
         if self.approximate_similar_items:
             log.debug("Building annoy similar items index")
 
-            self.similar_items_index = annoy.AnnoyIndex(
-                self.item_factors.shape[1], 'angular')
+            self.similar_items_index = annoy.AnnoyIndex(self.item_factors.shape[1], "angular")
             for i, row in enumerate(self.item_factors):
                 self.similar_items_index.add_item(i, row)
             self.similar_items_index.build(self.n_trees)
@@ -241,7 +260,7 @@ def fit(self, Ciu, show_progress=True):
         if self.approximate_recommend:
             log.debug("Building annoy recommendation index")
             self.max_norm, extra = augment_inner_product_matrix(self.item_factors)
-            self.recommend_index = annoy.AnnoyIndex(extra.shape[1], 'angular')
+            self.recommend_index = annoy.AnnoyIndex(extra.shape[1], "angular")
             for i, row in enumerate(extra):
                 self.recommend_index.add_item(i, row)
             self.recommend_index.build(self.n_trees)
@@ -250,19 +269,29 @@ def similar_items(self, itemid, N=10):
         if not self.approximate_similar_items:
             return super(AnnoyAlternatingLeastSquares, self).similar_items(itemid, N)
 
-        neighbours, dist = self.similar_items_index.get_nns_by_item(itemid, N,
-                                                                    search_k=self.search_k,
-                                                                    include_distances=True)
+        neighbours, dist = self.similar_items_index.get_nns_by_item(
+            itemid, N, search_k=self.search_k, include_distances=True
+        )
         # transform distances back to cosine from euclidean distance
         return zip(neighbours, 1 - (numpy.array(dist) ** 2) / 2)
 
-    def recommend(self, userid, user_items, N=10, filter_already_liked_items=True,
-                  filter_items=None, recalculate_user=False):
+    def recommend(
+        self,
+        userid,
+        user_items,
+        N=10,
+        filter_already_liked_items=True,
+        filter_items=None,
+        recalculate_user=False,
+    ):
         if not self.approximate_recommend:
-            return super(AnnoyAlternatingLeastSquares,
-                         self).recommend(userid, user_items, N=N,
-                                         filter_items=filter_items,
-                                         recalculate_user=recalculate_user)
+            return super(AnnoyAlternatingLeastSquares, self).recommend(
+                userid,
+                user_items,
+                N=N,
+                filter_items=filter_items,
+                recalculate_user=recalculate_user,
+            )
 
         user = self._user_factor(userid, user_items, recalculate_user)
 
@@ -276,8 +305,9 @@ def recommend(self, userid, user_items, N=10, filter_already_liked_items=True,
         count = N + len(liked)
 
         query = numpy.append(user, 0)
-        ids, dist = self.recommend_index.get_nns_by_vector(query, count, include_distances=True,
-                                                           search_k=self.search_k)
+        ids, dist = self.recommend_index.get_nns_by_vector(
+            query, count, include_distances=True, search_k=self.search_k
+        )
 
         # convert the distances from euclidean to cosine distance,
         # and then rescale the cosine distance to go back to inner product
@@ -288,7 +318,7 @@ def recommend(self, userid, user_items, N=10, filter_already_liked_items=True,
 
 class FaissAlternatingLeastSquares(AlternatingLeastSquares):
 
-    """ Speeds up the base :class:`~implicit.als.AlternatingLeastSquares` model by using
+    """Speeds up the base :class:`~implicit.als.AlternatingLeastSquares` model by using
     `Faiss <https://github.com/facebookresearch/faiss>`_ to create approximate nearest neighbours
     indices of the latent factors.
 
@@ -321,9 +351,17 @@ class FaissAlternatingLeastSquares(AlternatingLeastSquares):
         item_factors
     """
 
-    def __init__(self, approximate_similar_items=True, approximate_recommend=True,
-                 nlist=400, nprobe=20, use_gpu=implicit.gpu.HAS_CUDA, random_state=None,
-                 *args, **kwargs):
+    def __init__(
+        self,
+        approximate_similar_items=True,
+        approximate_recommend=True,
+        nlist=400,
+        nprobe=20,
+        use_gpu=implicit.gpu.HAS_CUDA,
+        random_state=None,
+        *args,
+        **kwargs
+    ):
 
         self.similar_items_index = None
         self.recommend_index = None
@@ -335,9 +373,9 @@ def __init__(self, approximate_similar_items=True, approximate_recommend=True,
         self.nlist = nlist
         self.nprobe = nprobe
         self.use_gpu = use_gpu
-        super(FaissAlternatingLeastSquares, self).__init__(*args,
-                                                           random_state=random_state,
-                                                           **kwargs)
+        super(FaissAlternatingLeastSquares, self).__init__(
+            *args, random_state=random_state, **kwargs
+        )
 
     def fit(self, Ciu, show_progress=True):
         import faiss
@@ -350,18 +388,20 @@ def fit(self, Ciu, show_progress=True):
         if self.use_gpu:
             self.gpu_resources = faiss.StandardGpuResources()
 
-        item_factors = self.item_factors.astype('float32')
+        item_factors = self.item_factors.astype("float32")
 
         if self.approximate_recommend:
             log.debug("Building faiss recommendation index")
 
             # build up a inner product index here
             if self.use_gpu:
-                index = faiss.GpuIndexIVFFlat(self.gpu_resources, self.factors, self.nlist,
-                                              faiss.METRIC_INNER_PRODUCT)
+                index = faiss.GpuIndexIVFFlat(
+                    self.gpu_resources, self.factors, self.nlist, faiss.METRIC_INNER_PRODUCT
+                )
             else:
-                index = faiss.IndexIVFFlat(self.quantizer, self.factors, self.nlist,
-                                           faiss.METRIC_INNER_PRODUCT)
+                index = faiss.IndexIVFFlat(
+                    self.quantizer, self.factors, self.nlist, faiss.METRIC_INNER_PRODUCT
+                )
 
             index.train(item_factors)
             index.add(item_factors)
@@ -376,13 +416,15 @@ def fit(self, Ciu, show_progress=True):
             norms = numpy.linalg.norm(item_factors, axis=1)
             norms[norms == 0] = 1e-10
 
-            normalized = (item_factors.T / norms).T.astype('float32')
+            normalized = (item_factors.T / norms).T.astype("float32")
             if self.use_gpu:
-                index = faiss.GpuIndexIVFFlat(self.gpu_resources, self.factors, self.nlist,
-                                              faiss.METRIC_INNER_PRODUCT)
+                index = faiss.GpuIndexIVFFlat(
+                    self.gpu_resources, self.factors, self.nlist, faiss.METRIC_INNER_PRODUCT
+                )
             else:
-                index = faiss.IndexIVFFlat(self.quantizer, self.factors, self.nlist,
-                                           faiss.METRIC_INNER_PRODUCT)
+                index = faiss.IndexIVFFlat(
+                    self.quantizer, self.factors, self.nlist, faiss.METRIC_INNER_PRODUCT
+                )
 
             index.train(normalized)
             index.add(normalized)
@@ -395,17 +437,28 @@ def similar_items(self, itemid, N=10):
 
         factors = self.item_factors[itemid]
         factors /= numpy.linalg.norm(factors)
-        (dist,), (ids,) = self.similar_items_index.search(factors.reshape(1, -1).astype('float32'),
-                                                          N)
+        (dist,), (ids,) = self.similar_items_index.search(
+            factors.reshape(1, -1).astype("float32"), N
+        )
         return zip(ids, dist)
 
-    def recommend(self, userid, user_items, N=10, filter_already_liked_items=True,
-                  filter_items=None, recalculate_user=False):
+    def recommend(
+        self,
+        userid,
+        user_items,
+        N=10,
+        filter_already_liked_items=True,
+        filter_items=None,
+        recalculate_user=False,
+    ):
         if not self.approximate_recommend:
-            return super(FaissAlternatingLeastSquares,
-                         self).recommend(userid, user_items, N=N,
-                                         filter_items=filter_items,
-                                         recalculate_user=recalculate_user)
+            return super(FaissAlternatingLeastSquares, self).recommend(
+                userid,
+                user_items,
+                N=N,
+                filter_items=filter_items,
+                recalculate_user=recalculate_user,
+            )
 
         user = self._user_factor(userid, user_items, recalculate_user)
 
@@ -421,14 +474,17 @@ def recommend(self, userid, user_items, N=10, filter_already_liked_items=True,
         # the GPU variant of faiss doesn't support returning more than 1024 results.
         # fall back to the exact match when this happens
         if self.use_gpu and count >= 1024:
-            return super(FaissAlternatingLeastSquares,
-                         self).recommend(userid, user_items, N=N,
-                                         filter_items=filter_items,
-                                         recalculate_user=recalculate_user)
+            return super(FaissAlternatingLeastSquares, self).recommend(
+                userid,
+                user_items,
+                N=N,
+                filter_items=filter_items,
+                recalculate_user=recalculate_user,
+            )
 
         # faiss expects multiple queries - convert query to a matrix
         # and results back to single vectors
-        query = user.reshape(1, -1).astype('float32')
+        query = user.reshape(1, -1).astype("float32")
         (dist,), (ids,) = self.recommend_index.search(query, count)
 
         # convert the distances from euclidean to cosine distance,
diff --git a/implicit/bpr.py b/implicit/bpr.py
index a54daf52..80cc258c 100644
--- a/implicit/bpr.py
+++ b/implicit/bpr.py
@@ -15,7 +15,7 @@ def BayesianPersonalizedRanking(
     verify_negative_samples=True,
     random_state=None,
 ):
-    """ Bayesian Personalized Ranking
+    """Bayesian Personalized Ranking
 
     A recommender model that learns  a matrix factorization embedding based off minimizing the
     pairwise ranking loss described in the paper `BPR: Bayesian Personalized Ranking from Implicit
diff --git a/implicit/cpu/_als.pyx b/implicit/cpu/_als.pyx
index 79ae1fc2..f0048b98 100644
--- a/implicit/cpu/_als.pyx
+++ b/implicit/cpu/_als.pyx
@@ -6,6 +6,7 @@ from cython cimport floating, integral
 from cython.parallel import parallel, prange
 
 cimport scipy.linalg.cython_blas as cython_blas
+
 # requires scipy v0.16
 cimport scipy.linalg.cython_lapack as cython_lapack
 from libc.stdlib cimport free, malloc
diff --git a/implicit/cpu/als.py b/implicit/cpu/als.py
index b89a47ee..0aa85a91 100644
--- a/implicit/cpu/als.py
+++ b/implicit/cpu/als.py
@@ -17,7 +17,7 @@
 
 
 class AlternatingLeastSquares(MatrixFactorizationBase):
-    """ Alternating Least Squares
+    """Alternating Least Squares
 
     A Recommendation Model based off the algorithms described in the paper 'Collaborative
     Filtering for Implicit Feedback Datasets' with performance optimizations described in
@@ -56,10 +56,18 @@ class AlternatingLeastSquares(MatrixFactorizationBase):
         Array of latent factors for each user in the training set
     """
 
-    def __init__(self, factors=100, regularization=0.01, dtype=np.float32,
-                 use_native=True, use_cg=True,
-                 iterations=15, calculate_training_loss=False, num_threads=0,
-                 random_state=None):
+    def __init__(
+        self,
+        factors=100,
+        regularization=0.01,
+        dtype=np.float32,
+        use_native=True,
+        use_cg=True,
+        iterations=15,
+        calculate_training_loss=False,
+        num_threads=0,
+        random_state=None,
+    ):
 
         super(AlternatingLeastSquares, self).__init__()
 
@@ -86,7 +94,7 @@ def __init__(self, factors=100, regularization=0.01, dtype=np.float32,
         check_blas_config()
 
     def fit(self, item_users, show_progress=True):
-        """ Factorizes the item_users matrix.
+        """Factorizes the item_users matrix.
 
         After calling this method, the members 'user_factors' and 'item_factors' will be
         initialized with a latent factor model of the input data.
@@ -150,15 +158,30 @@ def fit(self, item_users, show_progress=True):
             # alternate between learning the user_factors from the item_factors and vice-versa
             for iteration in range(self.iterations):
                 s = time.time()
-                solver(Cui, self.user_factors, self.item_factors, self.regularization,
-                       num_threads=self.num_threads)
-                solver(Ciu, self.item_factors, self.user_factors, self.regularization,
-                       num_threads=self.num_threads)
+                solver(
+                    Cui,
+                    self.user_factors,
+                    self.item_factors,
+                    self.regularization,
+                    num_threads=self.num_threads,
+                )
+                solver(
+                    Ciu,
+                    self.item_factors,
+                    self.user_factors,
+                    self.regularization,
+                    num_threads=self.num_threads,
+                )
                 progress.update(1)
 
                 if self.calculate_training_loss:
-                    loss = _als.calculate_loss(Cui, self.user_factors, self.item_factors,
-                                               self.regularization, num_threads=self.num_threads)
+                    loss = _als.calculate_loss(
+                        Cui,
+                        self.user_factors,
+                        self.item_factors,
+                        self.regularization,
+                        num_threads=self.num_threads,
+                    )
                     progress.set_postfix({"loss": loss})
 
                 if self.fit_callback:
@@ -170,17 +193,27 @@ def fit(self, item_users, show_progress=True):
         self._check_fit_errors()
 
     def recalculate_user(self, userid, user_items):
-        return user_factor(self.item_factors, self.YtY,
-                           user_items.tocsr(), userid,
-                           self.regularization, self.factors)
+        return user_factor(
+            self.item_factors,
+            self.YtY,
+            user_items.tocsr(),
+            userid,
+            self.regularization,
+            self.factors,
+        )
 
     def recalculate_item(self, itemid, react_users):
-        return item_factor(self.user_factors, self.XtX,
-                           react_users.tocsr(), itemid,
-                           self.regularization, self.factors)
+        return item_factor(
+            self.user_factors,
+            self.XtX,
+            react_users.tocsr(),
+            itemid,
+            self.regularization,
+            self.factors,
+        )
 
     def explain(self, userid, user_items, itemid, user_weights=None, N=10):
-        """ Provides explanations for why the item is liked by the user.
+        """Provides explanations for why the item is liked by the user.
 
         Parameters
         ---------
@@ -211,9 +244,9 @@ def explain(self, userid, user_items, itemid, user_weights=None, N=10):
         # from section 5 of the paper CF for Implicit Feedback Datasets
         user_items = user_items.tocsr()
         if user_weights is None:
-            A, _ = user_linear_equation(self.item_factors, self.YtY,
-                                        user_items, userid,
-                                        self.regularization, self.factors)
+            A, _ = user_linear_equation(
+                self.item_factors, self.YtY, user_items, userid, self.regularization, self.factors
+            )
             user_weights = scipy.linalg.cho_factor(A)
         seed_item = self.item_factors[itemid]
 
@@ -265,13 +298,12 @@ def XtX(self):
 
 
 def alternating_least_squares(Ciu, factors, **kwargs):
-    """ factorizes the matrix Cui using an implicit alternating least squares
+    """factorizes the matrix Cui using an implicit alternating least squares
     algorithm. Note: this method is deprecated, consider moving to the
     AlternatingLeastSquares class instead
 
     """
-    log.warning("This method is deprecated. Please use the AlternatingLeastSquares"
-                " class instead")
+    log.warning("This method is deprecated. Please use the AlternatingLeastSquares class instead")
 
     model = AlternatingLeastSquares(factors=factors, **kwargs)
     model.fit(Ciu)
@@ -279,7 +311,7 @@ def alternating_least_squares(Ciu, factors, **kwargs):
 
 
 def least_squares(Cui, X, Y, regularization, num_threads=0):
-    """ For each user in Cui, calculate factors Xu for them
+    """For each user in Cui, calculate factors Xu for them
     using least squares on Y.
 
     Note: this is at least 10 times slower than the cython version included
@@ -341,7 +373,7 @@ def least_squares_cg(Cui, X, Y, regularization, num_threads=0, cg_steps=3):
                 r += (confidence - (confidence - 1) * Y[i].dot(x)) * Y[i]
             else:
                 confidence *= -1
-                r += - (confidence - 1) * Y[i].dot(x) * Y[i]
+                r += -(confidence - 1) * Y[i].dot(x) * Y[i]
 
         p = r.copy()
         rsold = r.dot(r)
diff --git a/implicit/datasets/_download.py b/implicit/datasets/_download.py
index 53b75f53..0b252279 100644
--- a/implicit/datasets/_download.py
+++ b/implicit/datasets/_download.py
@@ -11,15 +11,17 @@
 
 
 def download_file(url, local_filename):
-    """ Simple wrapper around urlretrieve that uses tqdm to display a progress
-    bar of download progress """
+    """Simple wrapper around urlretrieve that uses tqdm to display a progress
+    bar of download progress"""
     local_filename = os.path.abspath(local_filename)
     path = os.path.dirname(local_filename)
     if not os.path.isdir(path):
         os.makedirs(path)
 
-    with tqdm(unit='B', unit_scale=True) as progress:
+    with tqdm(unit="B", unit_scale=True) as progress:
+
         def report(chunk, chunksize, total):
             progress.total = total
             progress.update(chunksize)
+
         return urlretrieve(url, local_filename, reporthook=report)
diff --git a/implicit/datasets/lastfm.py b/implicit/datasets/lastfm.py
index d6fc638d..ff2938eb 100644
--- a/implicit/datasets/lastfm.py
+++ b/implicit/datasets/lastfm.py
@@ -11,12 +11,12 @@
 log = logging.getLogger("implicit")
 
 
-URL = 'https://github.com/benfred/recommender_data/releases/download/v1.0/lastfm_360k.hdf5'
+URL = "https://github.com/benfred/recommender_data/releases/download/v1.0/lastfm_360k.hdf5"
 
 
 def get_lastfm():
-    """ Returns the lastfm360k dataset, downloading locally if necessary.
-    Returns a tuple of (artistids, userids, plays) where plays is a CSR matrix """
+    """Returns the lastfm360k dataset, downloading locally if necessary.
+    Returns a tuple of (artistids, userids, plays) where plays is a CSR matrix"""
 
     filename = os.path.join(_download.LOCAL_CACHE_DIR, "lastfm_360k.hdf5")
     if not os.path.isfile(filename):
@@ -25,14 +25,14 @@ def get_lastfm():
     else:
         log.info("Using cached dataset at '%s'", filename)
 
-    with h5py.File(filename, 'r') as f:
-        m = f.get('artist_user_plays')
-        plays = csr_matrix((m.get('data'), m.get('indices'), m.get('indptr')))
-        return np.array(f['artist']), np.array(f['user']), plays
+    with h5py.File(filename, "r") as f:
+        m = f.get("artist_user_plays")
+        plays = csr_matrix((m.get("data"), m.get("indices"), m.get("indptr")))
+        return np.array(f["artist"]), np.array(f["user"]), plays
 
 
 def generate_dataset(filename, outputfilename):
-    """ Generates a hdf5 lastfm datasetfile from the raw datafiles found at:
+    """Generates a hdf5 lastfm datasetfile from the raw datafiles found at:
     http://ocelma.net/MusicRecommendationDataset/lastfm-360K.html
 
     You shouldn't have to run this yourself, and can instead just download the
@@ -55,14 +55,13 @@ def _read_dataframe(filename):
     # get a model based off the input params
     start = time.time()
     log.debug("reading data from %s", filename)
-    data = pandas.read_table(filename,
-                             usecols=[0, 2, 3],
-                             names=['user', 'artist', 'plays'],
-                             na_filter=False)
+    data = pandas.read_table(
+        filename, usecols=[0, 2, 3], names=["user", "artist", "plays"], na_filter=False
+    )
 
     # map each artist and user to a unique numeric value
-    data['user'] = data['user'].astype("category")
-    data['artist'] = data['artist'].astype("category")
+    data["user"] = data["user"].astype("category")
+    data["artist"] = data["artist"].astype("category")
 
     # store as a CSR matrix
     log.debug("read data file in %s", time.time() - start)
@@ -72,21 +71,24 @@ def _read_dataframe(filename):
 
 def _hfd5_from_dataframe(data, outputfilename):
     # create a sparse matrix of all the users/plays
-    plays = coo_matrix((data['plays'].astype(np.float32),
-                       (data['artist'].cat.codes.copy(),
-                        data['user'].cat.codes.copy()))).tocsr()
+    plays = coo_matrix(
+        (
+            data["plays"].astype(np.float32),
+            (data["artist"].cat.codes.copy(), data["user"].cat.codes.copy()),
+        )
+    ).tocsr()
 
     with h5py.File(outputfilename, "w") as f:
-        g = f.create_group('artist_user_plays')
+        g = f.create_group("artist_user_plays")
         g.create_dataset("data", data=plays.data)
         g.create_dataset("indptr", data=plays.indptr)
         g.create_dataset("indices", data=plays.indices)
 
         dt = h5py.special_dtype(vlen=str)
-        artist = list(data['artist'].cat.categories)
-        dset = f.create_dataset('artist', (len(artist),), dtype=dt)
+        artist = list(data["artist"].cat.categories)
+        dset = f.create_dataset("artist", (len(artist),), dtype=dt)
         dset[:] = artist
 
-        user = list(data['user'].cat.categories)
-        dset = f.create_dataset('user', (len(user),), dtype=dt)
+        user = list(data["user"].cat.categories)
+        dset = f.create_dataset("user", (len(user),), dtype=dt)
         dset[:] = user
diff --git a/implicit/datasets/million_song_dataset.py b/implicit/datasets/million_song_dataset.py
index a11d06af..ea880725 100644
--- a/implicit/datasets/million_song_dataset.py
+++ b/implicit/datasets/million_song_dataset.py
@@ -12,11 +12,11 @@
 log = logging.getLogger("implicit")
 
 
-URL = 'https://github.com/benfred/recommender_data/releases/download/v1.0/msd_taste_profile.hdf5'
+URL = "https://github.com/benfred/recommender_data/releases/download/v1.0/msd_taste_profile.hdf5"
 
 
 def get_msd_taste_profile():
-    """ Returns the taste profile subset from the million song dataset:
+    """Returns the taste profile subset from the million song dataset:
     https://labrosa.ee.columbia.edu/millionsong/tasteprofile
 
     Data returned is a tuple of (trackinfo, user, plays) where
@@ -35,15 +35,18 @@ def get_msd_taste_profile():
     else:
         log.info("Using cached dataset at '%s'", filename)
 
-    with h5py.File(filename, 'r') as f:
-        m = f.get('track_user_plays')
-        plays = csr_matrix((m.get('data'), m.get('indices'), m.get('indptr')))
-        return np.array(f['track']), np.array(f['user']), plays
+    with h5py.File(filename, "r") as f:
+        m = f.get("track_user_plays")
+        plays = csr_matrix((m.get("data"), m.get("indices"), m.get("indptr")))
+        return np.array(f["track"]), np.array(f["user"]), plays
 
 
-def generate_dataset(triplets_filename, summary_filename="msd_summary_file.h5",
-                     outputfilename="msd_taste_profile.hdf5"):
-    """ Generates a hdf5 datasetfile from the raw datafiles:
+def generate_dataset(
+    triplets_filename,
+    summary_filename="msd_summary_file.h5",
+    outputfilename="msd_taste_profile.hdf5",
+):
+    """Generates a hdf5 datasetfile from the raw datafiles:
 
     You will need to download the train_triplets from here:
         https://labrosa.ee.columbia.edu/millionsong/tasteprofile#getting
@@ -68,11 +71,11 @@ def _read_triplets_dataframe(filename):
     # get a model based off the input params
     start = time.time()
     log.debug("reading data from %s", filename)
-    data = pandas.read_table("train_triplets.txt", names=['user', 'track', 'plays'])
+    data = pandas.read_table("train_triplets.txt", names=["user", "track", "plays"])
 
     # map each artist and user to a unique numeric value
-    data['user'] = data['user'].astype("category")
-    data['track'] = data['track'].astype("category")
+    data["user"] = data["user"].astype("category")
+    data["track"] = data["track"].astype("category")
 
     # store as a CSR matrix
     log.debug("read data file in %s", time.time() - start)
@@ -85,12 +88,12 @@ def _join_summary_file(data, summary_filename="msd_summary_file.h5"):
     msd = h5py.File(summary_filename)
 
     # create a lookup table of trackid -> position
-    track_lookup = dict((t.encode("utf8"), i) for i, t in enumerate(data['track'].cat.categories))
+    track_lookup = dict((t.encode("utf8"), i) for i, t in enumerate(data["track"].cat.categories))
 
     # join on trackid to the summary file to get the artist/album/songname
     track_info = np.empty(shape=(len(track_lookup), 4), dtype=np.object)
     with tqdm(total=len(track_info)) as progress:
-        for song in msd['metadata']['songs']:
+        for song in msd["metadata"]["songs"]:
             trackid = song[17]
             if trackid in track_lookup:
                 pos = track_lookup[trackid]
@@ -102,20 +105,23 @@ def _join_summary_file(data, summary_filename="msd_summary_file.h5"):
 
 def _hfd5_from_dataframe(data, track_info, outputfilename):
     # create a sparse matrix of all the users/plays
-    plays = coo_matrix((data['plays'].astype(np.float32),
-                       (data['track'].cat.codes.copy(),
-                        data['user'].cat.codes.copy()))).tocsr()
+    plays = coo_matrix(
+        (
+            data["plays"].astype(np.float32),
+            (data["track"].cat.codes.copy(), data["user"].cat.codes.copy()),
+        )
+    ).tocsr()
 
     with h5py.File(outputfilename, "w") as f:
-        g = f.create_group('track_user_plays')
+        g = f.create_group("track_user_plays")
         g.create_dataset("data", data=plays.data)
         g.create_dataset("indptr", data=plays.indptr)
         g.create_dataset("indices", data=plays.indices)
 
         dt = h5py.special_dtype(vlen=str)
-        dset = f.create_dataset('track', track_info.shape, dtype=dt)
+        dset = f.create_dataset("track", track_info.shape, dtype=dt)
         dset[:] = track_info
 
-        user = list(data['user'].cat.categories)
-        dset = f.create_dataset('user', (len(user),), dtype=dt)
+        user = list(data["user"].cat.categories)
+        dset = f.create_dataset("user", (len(user),), dtype=dt)
         dset[:] = user
diff --git a/implicit/datasets/movielens.py b/implicit/datasets/movielens.py
index 47375d17..777af8f7 100644
--- a/implicit/datasets/movielens.py
+++ b/implicit/datasets/movielens.py
@@ -10,11 +10,11 @@
 log = logging.getLogger("implicit")
 
 
-URL_BASE = 'https://github.com/benfred/recommender_data/releases/download/v1.0/'
+URL_BASE = "https://github.com/benfred/recommender_data/releases/download/v1.0/"
 
 
 def get_movielens(variant="20m"):
-    """ Gets movielens datasets
+    """Gets movielens datasets
 
     Parameters
     ---------
@@ -39,14 +39,14 @@ def get_movielens(variant="20m"):
     else:
         log.info("Using cached dataset at '%s'", path)
 
-    with h5py.File(path, 'r') as f:
-        m = f.get('movie_user_ratings')
-        plays = csr_matrix((m.get('data'), m.get('indices'), m.get('indptr')))
-        return np.array(f['movie']), plays
+    with h5py.File(path, "r") as f:
+        m = f.get("movie_user_ratings")
+        plays = csr_matrix((m.get("data"), m.get("indices"), m.get("indptr")))
+        return np.array(f["movie"]), plays
 
 
-def generate_dataset(path, variant='20m', outputpath="."):
-    """ Generates a hdf5 movielens datasetfile from the raw datafiles found at:
+def generate_dataset(path, variant="20m", outputpath="."):
+    """Generates a hdf5 movielens datasetfile from the raw datafiles found at:
     https://grouplens.org/datasets/movielens/20m/
 
     You shouldn't have to run this yourself, and can instead just download the
@@ -54,9 +54,9 @@ def generate_dataset(path, variant='20m', outputpath="."):
     """
     filename = os.path.join(outputpath, "movielens_%s.hdf5" % variant)
 
-    if variant == '20m':
+    if variant == "20m":
         ratings, movies = _read_dataframes_20M(path)
-    elif variant == '100k':
+    elif variant == "100k":
         ratings, movies = _read_dataframes_100k(path)
     else:
         ratings, movies = _read_dataframes(path)
@@ -78,43 +78,52 @@ def _read_dataframes_100k(path):
     """ reads in the movielens 100k dataset"""
     import pandas
 
-    ratings = pandas.read_table(os.path.join(path, "u.data"),
-                                names=['userId', 'movieId', 'rating', 'timestamp'])
+    ratings = pandas.read_table(
+        os.path.join(path, "u.data"), names=["userId", "movieId", "rating", "timestamp"]
+    )
 
-    movies = pandas.read_csv(os.path.join(path, "u.item"),
-                             names=['movieId', 'title'],
-                             usecols=[0, 1],
-                             delimiter='|',
-                             encoding='ISO-8859-1')
+    movies = pandas.read_csv(
+        os.path.join(path, "u.item"),
+        names=["movieId", "title"],
+        usecols=[0, 1],
+        delimiter="|",
+        encoding="ISO-8859-1",
+    )
 
     return ratings, movies
 
 
 def _read_dataframes(path):
     import pandas
-    ratings = pandas.read_csv(os.path.join(path, "ratings.dat"),  delimiter="::",
-                              names=['userId', 'movieId', 'rating', 'timestamp'])
 
-    movies = pandas.read_table(os.path.join(path, "movies.dat"), delimiter="::",
-                               names=['movieId', 'title', 'genres'])
+    ratings = pandas.read_csv(
+        os.path.join(path, "ratings.dat"),
+        delimiter="::",
+        names=["userId", "movieId", "rating", "timestamp"],
+    )
+
+    movies = pandas.read_table(
+        os.path.join(path, "movies.dat"), delimiter="::", names=["movieId", "title", "genres"]
+    )
     return ratings, movies
 
 
 def _hfd5_from_dataframe(ratings, movies, outputfilename):
     # transform ratings dataframe into a sparse matrix
-    m = coo_matrix((ratings['rating'].astype(np.float32),
-                   (ratings['movieId'], ratings['userId']))).tocsr()
+    m = coo_matrix(
+        (ratings["rating"].astype(np.float32), (ratings["movieId"], ratings["userId"]))
+    ).tocsr()
 
     with h5py.File(outputfilename, "w") as f:
         # write out the ratings matrix
-        g = f.create_group('movie_user_ratings')
+        g = f.create_group("movie_user_ratings")
         g.create_dataset("data", data=m.data)
         g.create_dataset("indptr", data=m.indptr)
         g.create_dataset("indices", data=m.indices)
 
         # write out the titles as a numpy array
-        titles = np.empty(shape=(movies.movieId.max()+1,), dtype=np.object)
+        titles = np.empty(shape=(movies.movieId.max() + 1,), dtype=np.object)
         titles[movies.movieId] = movies.title
         dt = h5py.special_dtype(vlen=str)
-        dset = f.create_dataset('movie', (len(titles),), dtype=dt)
+        dset = f.create_dataset("movie", (len(titles),), dtype=dt)
         dset[:] = titles
diff --git a/implicit/datasets/reddit.py b/implicit/datasets/reddit.py
index 1a54d0bf..2d5f373d 100644
--- a/implicit/datasets/reddit.py
+++ b/implicit/datasets/reddit.py
@@ -11,17 +11,17 @@
 log = logging.getLogger("implicit")
 
 
-URL = 'https://github.com/benfred/recommender_data/releases/download/v1.0/reddit.hdf5'
+URL = "https://github.com/benfred/recommender_data/releases/download/v1.0/reddit.hdf5"
 
 
 def get_reddit():
-    """ Returns the reddit dataset, downloading locally if necessary.
+    """Returns the reddit dataset, downloading locally if necessary.
 
     This dataset was released here:
     https://www.reddit.com/r/redditdev/comments/dtg4j/want_to_help_reddit_build_a_recommender_a_public/
     and contains 23M up/down votes from 44K users on 3.4M links.
 
-    Returns a CSR matrix of (item, user, rating """
+    Returns a CSR matrix of (item, user, rating"""
 
     filename = os.path.join(_download.LOCAL_CACHE_DIR, "reddit.hdf5")
     if not os.path.isfile(filename):
@@ -30,13 +30,13 @@ def get_reddit():
     else:
         log.info("Using cached dataset at '%s'", filename)
 
-    with h5py.File(filename, 'r') as f:
-        m = f.get('item_user_ratings')
-        return csr_matrix((m.get('data'), m.get('indices'), m.get('indptr')))
+    with h5py.File(filename, "r") as f:
+        m = f.get("item_user_ratings")
+        return csr_matrix((m.get("data"), m.get("indices"), m.get("indptr")))
 
 
 def generate_dataset(filename, outputfilename):
-    """ Generates a hdf5 reddit datasetfile from the raw datafiles found at:
+    """Generates a hdf5 reddit datasetfile from the raw datafiles found at:
     https://www.reddit.com/r/redditdev/comments/dtg4j/want_to_help_reddit_build_a_recommender_a_public/
 
     You shouldn't have to run this yourself, and can instead just download the
@@ -55,11 +55,11 @@ def _read_dataframe(filename):
     # get a model based off the input params
     start = time.time()
     log.debug("reading data from %s", filename)
-    data = pandas.read_table(filename, usecols=[0, 1, 3], names=['user', 'item', 'rating'])
+    data = pandas.read_table(filename, usecols=[0, 1, 3], names=["user", "item", "rating"])
 
     # map each artist and user to a unique numeric value
-    data['user'] = data['user'].astype("category")
-    data['item'] = data['item'].astype("category")
+    data["user"] = data["user"].astype("category")
+    data["item"] = data["item"].astype("category")
 
     # store as a CSR matrix
     log.debug("read data file in %s", time.time() - start)
@@ -67,15 +67,18 @@ def _read_dataframe(filename):
 
 
 def _hfd5_from_dataframe(data, outputfilename):
-    ratings = coo_matrix((data['rating'].astype(np.float32),
-                         (data['item'].cat.codes.copy(),
-                          data['user'].cat.codes.copy()))).tocsr()
+    ratings = coo_matrix(
+        (
+            data["rating"].astype(np.float32),
+            (data["item"].cat.codes.copy(), data["user"].cat.codes.copy()),
+        )
+    ).tocsr()
     print(repr(ratings))
     print(repr(ratings.indices))
     print(repr(ratings.indptr))
 
     with h5py.File(outputfilename, "w") as f:
-        g = f.create_group('item_user_ratings')
+        g = f.create_group("item_user_ratings")
         g.create_dataset("data", data=ratings.data)
         g.create_dataset("indptr", data=ratings.indptr)
         g.create_dataset("indices", data=ratings.indices)
diff --git a/implicit/datasets/sketchfab.py b/implicit/datasets/sketchfab.py
index 5ee53225..b8fc3b96 100644
--- a/implicit/datasets/sketchfab.py
+++ b/implicit/datasets/sketchfab.py
@@ -11,17 +11,17 @@
 log = logging.getLogger("implicit")
 
 
-URL = 'https://github.com/benfred/recommender_data/releases/download/v1.0/sketchfab.hdf5'
+URL = "https://github.com/benfred/recommender_data/releases/download/v1.0/sketchfab.hdf5"
 
 
 def get_sketchfab():
-    """ Returns the sketchfab dataset, downloading locally if necessary.
+    """Returns the sketchfab dataset, downloading locally if necessary.
 
     This dataset contains about 632K likes from 62K users on 28k items collected
     from the sketchfab website, as described here:
     http://blog.ethanrosenthal.com/2016/10/09/likes-out-guerilla-dataset/
 
-    Returns a tuple of (items, users, likes) where likes is a CSR matrix """
+    Returns a tuple of (items, users, likes) where likes is a CSR matrix"""
 
     filename = os.path.join(_download.LOCAL_CACHE_DIR, "sketchfab.hdf5")
     if not os.path.isfile(filename):
@@ -30,10 +30,10 @@ def get_sketchfab():
     else:
         log.info("Using cached dataset at '%s'", filename)
 
-    with h5py.File(filename, 'r') as f:
-        m = f.get('item_user_likes')
-        plays = csr_matrix((m.get('data'), m.get('indices'), m.get('indptr')))
-        return np.array(f['item']), np.array(f['user']), plays
+    with h5py.File(filename, "r") as f:
+        m = f.get("item_user_likes")
+        plays = csr_matrix((m.get("data"), m.get("indices"), m.get("indptr")))
+        return np.array(f["item"]), np.array(f["user"]), plays
 
 
 def generate_dataset(filename, outputfilename):
@@ -49,11 +49,11 @@ def _read_dataframe(filename):
     # get a model based off the input params
     start = time.time()
     log.debug("reading data from %s", filename)
-    data = pandas.read_csv(filename, delimiter='|', quotechar='\\')
+    data = pandas.read_csv(filename, delimiter="|", quotechar="\\")
 
     # map each artist and user to a unique numeric value
-    data['uid'] = data['uid'].astype("category")
-    data['mid'] = data['mid'].astype("category")
+    data["uid"] = data["uid"].astype("category")
+    data["mid"] = data["mid"].astype("category")
 
     # store as a CSR matrix
     log.debug("read data file in %s", time.time() - start)
@@ -61,24 +61,24 @@ def _read_dataframe(filename):
 
 
 def _hfd5_from_dataframe(data, outputfilename):
-    items = data['mid'].cat.codes.copy()
-    users = data['uid'].cat.codes.copy()
+    items = data["mid"].cat.codes.copy()
+    users = data["uid"].cat.codes.copy()
     values = np.ones(len(items)).astype(np.float32)
 
     # create a sparse matrix of all the item/users/likes
     likes = coo_matrix((values, (items, users))).astype(np.float32).tocsr()
 
     with h5py.File(outputfilename, "w") as f:
-        g = f.create_group('item_user_likes')
+        g = f.create_group("item_user_likes")
         g.create_dataset("data", data=likes.data)
         g.create_dataset("indptr", data=likes.indptr)
         g.create_dataset("indices", data=likes.indices)
 
         dt = h5py.special_dtype(vlen=str)
-        item = list(data['mid'].cat.categories)
-        dset = f.create_dataset('item', (len(item),), dtype=dt)
+        item = list(data["mid"].cat.categories)
+        dset = f.create_dataset("item", (len(item),), dtype=dt)
         dset[:] = item
 
-        user = list(data['uid'].cat.categories)
-        dset = f.create_dataset('user', (len(user),), dtype=dt)
+        user = list(data["uid"].cat.categories)
+        dset = f.create_dataset("user", (len(user),), dtype=dt)
         dset[:] = user
diff --git a/implicit/gpu/__init__.py b/implicit/gpu/__init__.py
index 67833936..f82f7c41 100644
--- a/implicit/gpu/__init__.py
+++ b/implicit/gpu/__init__.py
@@ -4,6 +4,7 @@
     import cupy  # noqa
 
     from ._cuda import *  # noqa
+
     HAS_CUDA = True
 except ImportError:
     HAS_CUDA = False
diff --git a/implicit/gpu/als.py b/implicit/gpu/als.py
index c94c56d0..3aec6680 100644
--- a/implicit/gpu/als.py
+++ b/implicit/gpu/als.py
@@ -19,7 +19,7 @@
 
 
 class AlternatingLeastSquares(MatrixFactorizationBase):
-    """ Alternating Least Squares
+    """Alternating Least Squares
 
     A Recommendation Model based off the algorithms described in the paper 'Collaborative
     Filtering for Implicit Feedback Datasets' with performance optimizations described in
@@ -48,9 +48,14 @@ class AlternatingLeastSquares(MatrixFactorizationBase):
         Array of latent factors for each user in the training set
     """
 
-    def __init__(self, factors=64, regularization=0.01,
-                 iterations=15, calculate_training_loss=False,
-                 random_state=None):
+    def __init__(
+        self,
+        factors=64,
+        regularization=0.01,
+        iterations=15,
+        calculate_training_loss=False,
+        random_state=None,
+    ):
         if not implicit.gpu.HAS_CUDA:
             raise ValueError("No CUDA extension has been built, can't train on GPU.")
 
@@ -62,8 +67,12 @@ def __init__(self, factors=64, regularization=0.01,
         # the 'dot' function in 'implicit/gpu/utils.cuh)
         if factors % 32:
             padding = 32 - factors % 32
-            log.warning("GPU training requires factor size to be a multiple of 32."
-                        " Increasing factors from %i to %i.", factors, factors + padding)
+            log.warning(
+                "GPU training requires factor size to be a multiple of 32."
+                " Increasing factors from %i to %i.",
+                factors,
+                factors + padding,
+            )
             factors += padding
 
         # parameters on how to factorize
@@ -78,7 +87,7 @@ def __init__(self, factors=64, regularization=0.01,
         self.cg_steps = 3
 
     def fit(self, item_users, show_progress=True):
-        """ Factorizes the item_users matrix.
+        """Factorizes the item_users matrix.
 
         After calling this method, the members 'user_factors' and 'item_factors' will be
         initialized with a latent factor model of the input data.
@@ -126,10 +135,10 @@ def fit(self, item_users, show_progress=True):
 
         # Initialize the variables randomly if they haven't already been set
         if self.user_factors is None:
-            self.user_factors = (random_state.rand(users, self.factors, dtype=cp.float32) - .5)
+            self.user_factors = random_state.rand(users, self.factors, dtype=cp.float32) - 0.5
             self.user_factors /= self.factors
         if self.item_factors is None:
-            self.item_factors = (random_state.rand(items, self.factors, dtype=cp.float32) - .5)
+            self.item_factors = random_state.rand(items, self.factors, dtype=cp.float32) - 0.5
             self.item_factors /= self.factors
 
         log.debug("Initialized factors in %s", time.time() - s)
diff --git a/implicit/gpu/bpr.py b/implicit/gpu/bpr.py
index 25a41835..ea200242 100644
--- a/implicit/gpu/bpr.py
+++ b/implicit/gpu/bpr.py
@@ -15,7 +15,7 @@
 
 
 class BayesianPersonalizedRanking(MatrixFactorizationBase):
-    """ Bayesian Personalized Ranking
+    """Bayesian Personalized Ranking
 
     A recommender model that learns  a matrix factorization embedding based off minimizing the
     pairwise ranking loss described in the paper `BPR: Bayesian Personalized Ranking from Implicit
@@ -46,16 +46,29 @@ class BayesianPersonalizedRanking(MatrixFactorizationBase):
     user_factors : ndarray
         Array of latent factors for each user in the training set
     """
-    def __init__(self, factors=100, learning_rate=0.01, regularization=0.01, dtype=np.float32,
-                 iterations=100, verify_negative_samples=True, random_state=None):
+
+    def __init__(
+        self,
+        factors=100,
+        learning_rate=0.01,
+        regularization=0.01,
+        dtype=np.float32,
+        iterations=100,
+        verify_negative_samples=True,
+        random_state=None,
+    ):
         super(BayesianPersonalizedRanking, self).__init__()
         if not implicit.gpu.HAS_CUDA:
             raise ValueError("No CUDA extension has been built, can't train on GPU.")
 
         if (factors + 1) % 32:
             padding = 32 - (factors + 1) % 32
-            log.warning("GPU training requires factor size to be a multiple of 32 - 1."
-                        " Increasing factors from %i to %i.", factors, factors + padding)
+            log.warning(
+                "GPU training requires factor size to be a multiple of 32 - 1."
+                " Increasing factors from %i to %i.",
+                factors,
+                factors + padding,
+            )
             factors += padding
 
         self.factors = factors
@@ -66,7 +79,7 @@ def __init__(self, factors=100, learning_rate=0.01, regularization=0.01, dtype=n
         self.random_state = random_state
 
     def fit(self, item_users, show_progress=True):
-        """ Factorizes the item_users matrix
+        """Factorizes the item_users matrix
 
         Parameters
         ----------
@@ -102,7 +115,7 @@ def fit(self, item_users, show_progress=True):
         # Note: the final dimension is for the item bias term - which is set to a 1 for all users
         # this simplifies interfacing with approximate nearest neighbours libraries etc
         if self.item_factors is None:
-            self.item_factors = rs.rand(items, self.factors + 1, dtype=cp.float32) - .5
+            self.item_factors = rs.rand(items, self.factors + 1, dtype=cp.float32) - 0.5
             self.item_factors /= self.factors
 
             # set factors to all zeros for items without any ratings
@@ -110,7 +123,7 @@ def fit(self, item_users, show_progress=True):
             self.item_factors[item_counts == 0] = cp.zeros(self.factors + 1)
 
         if self.user_factors is None:
-            self.user_factors = rs.rand(users, self.factors + 1, dtype=cp.float32) - .5
+            self.user_factors = rs.rand(users, self.factors + 1, dtype=cp.float32) - 0.5
             self.user_factors /= self.factors
 
             # set factors to all zeros for users without any ratings
@@ -130,14 +143,23 @@ def fit(self, item_users, show_progress=True):
         log.debug("Running %i BPR training epochs", self.iterations)
         with tqdm(total=self.iterations, disable=not show_progress) as progress:
             for epoch in range(self.iterations):
-                correct, skipped = implicit.gpu.cu_bpr_update(userids, itemids, indptr,
-                                                              X, Y, self.learning_rate,
-                                                              self.regularization,
-                                                              rs.randint(2**31),
-                                                              self.verify_negative_samples)
+                correct, skipped = implicit.gpu.cu_bpr_update(
+                    userids,
+                    itemids,
+                    indptr,
+                    X,
+                    Y,
+                    self.learning_rate,
+                    self.regularization,
+                    rs.randint(2 ** 31),
+                    self.verify_negative_samples,
+                )
                 progress.update(1)
                 total = len(user_items.data)
                 if total != 0 and total != skipped:
                     progress.set_postfix(
-                        {"correct": "%.2f%%" % (100.0 * correct / (total - skipped)),
-                         "skipped": "%.2f%%" % (100.0 * skipped / total)})
+                        {
+                            "correct": "%.2f%%" % (100.0 * correct / (total - skipped)),
+                            "skipped": "%.2f%%" % (100.0 * skipped / total),
+                        }
+                    )
diff --git a/implicit/gpu/matrix_factorization_base.py b/implicit/gpu/matrix_factorization_base.py
index 6dfcc41f..0f8718e1 100644
--- a/implicit/gpu/matrix_factorization_base.py
+++ b/implicit/gpu/matrix_factorization_base.py
@@ -11,7 +11,7 @@
 
 
 class MatrixFactorizationBase(RecommenderBase):
-    """ Base class for MF models running on the GPU.
+    """Base class for MF models running on the GPU.
 
     This adds support for inference to run on the GPU as well as training.
     Factors are stored as cupy arrays.
@@ -30,8 +30,15 @@ def __init__(self):
         self._item_norms = None
         self._user_norms = None
 
-    def recommend(self, userid, user_items,
-                  N=10, filter_already_liked_items=True, filter_items=None, recalculate_user=False):
+    def recommend(
+        self,
+        userid,
+        user_items,
+        N=10,
+        filter_already_liked_items=True,
+        filter_items=None,
+        recalculate_user=False,
+    ):
         if recalculate_user:
             raise NotImplementedError("recalculate_user isn't support on GPU yet")
 
@@ -130,7 +137,7 @@ def check_random_state(random_state):
 
     if isinstance(random_state, np.random.RandomState):
         # we need to convert from numpy random state to cupy random state.
-        return cp.random.RandomState(random_state.randint(2**63))
+        return cp.random.RandomState(random_state.randint(2 ** 63))
 
     # otherwise try to initialize a new one, and let it fail through
     # on the numpy side if it doesn't work
diff --git a/implicit/nearest_neighbours.py b/implicit/nearest_neighbours.py
index 97a1bfeb..1aa95ad0 100644
--- a/implicit/nearest_neighbours.py
+++ b/implicit/nearest_neighbours.py
@@ -10,7 +10,7 @@
 
 
 class ItemItemRecommender(RecommenderBase):
-    """ Base class for Item-Item Nearest Neighbour recommender models
+    """Base class for Item-Item Nearest Neighbour recommender models
     here.
 
     Parameters
@@ -22,6 +22,7 @@ class ItemItemRecommender(RecommenderBase):
         The number of threads to use for fitting the model. Specifying 0
         means to default to the number of cores on the machine.
     """
+
     def __init__(self, K=20, num_threads=0):
         self.similarity = None
         self.K = K
@@ -30,13 +31,20 @@ def __init__(self, K=20, num_threads=0):
 
     def fit(self, weighted, show_progress=True):
         """ Computes and stores the similarity matrix """
-        self.similarity = all_pairs_knn(weighted, self.K,
-                                        show_progress=show_progress,
-                                        num_threads=self.num_threads).tocsr()
+        self.similarity = all_pairs_knn(
+            weighted, self.K, show_progress=show_progress, num_threads=self.num_threads
+        ).tocsr()
         self.scorer = NearestNeighboursScorer(self.similarity)
 
-    def recommend(self, userid, user_items,
-                  N=10, filter_already_liked_items=True, filter_items=None, recalculate_user=False):
+    def recommend(
+        self,
+        userid,
+        user_items,
+        N=10,
+        filter_already_liked_items=True,
+        filter_items=None,
+        recalculate_user=False,
+    ):
         """ returns the best N recommendations for a user given its id"""
         if userid >= user_items.shape[0]:
             raise ValueError("userid is out of bounds of the user_items matrix")
@@ -46,9 +54,14 @@ def recommend(self, userid, user_items,
         if filter_items:
             items += len(filter_items)
 
-        indices, data = self.scorer.recommend(userid, user_items.indptr, user_items.indices,
-                                              user_items.data, K=items,
-                                              remove_own_likes=filter_already_liked_items)
+        indices, data = self.scorer.recommend(
+            userid,
+            user_items.indptr,
+            user_items.indices,
+            user_items.data,
+            K=items,
+            remove_own_likes=filter_already_liked_items,
+        )
         best = sorted(zip(indices, data), key=lambda x: -x[1])
 
         if not filter_items:
@@ -90,7 +103,7 @@ def similar_items(self, itemid, N=10):
     def __getstate__(self):
         state = self.__dict__.copy()
         # scorer isn't picklable
-        del state['scorer']
+        del state["scorer"]
         return state
 
     def __setstate__(self, state):
@@ -102,8 +115,9 @@ def __setstate__(self, state):
 
     def save(self, filename):
         m = self.similarity
-        numpy.savez(filename, data=m.data, indptr=m.indptr, indices=m.indices, shape=m.shape,
-                    K=self.K)
+        numpy.savez(
+            filename, data=m.data, indptr=m.indptr, indices=m.indices, shape=m.shape, K=self.K
+        )
 
     @classmethod
     def load(cls, filename):
@@ -112,17 +126,18 @@ def load(cls, filename):
             filename = filename + ".npz"
 
         m = numpy.load(filename)
-        similarity = csr_matrix((m['data'], m['indices'], m['indptr']), shape=m['shape'])
+        similarity = csr_matrix((m["data"], m["indices"], m["indptr"]), shape=m["shape"])
 
         ret = cls()
         ret.similarity = similarity
         ret.scorer = NearestNeighboursScorer(similarity)
-        ret.K = m['K']
+        ret.K = m["K"]
         return ret
 
 
 class CosineRecommender(ItemItemRecommender):
     """ An Item-Item Recommender on Cosine distances between items """
+
     def fit(self, counts, show_progress=True):
         # cosine distance is just the dot-product of a normalized matrix
         ItemItemRecommender.fit(self, normalize(counts), show_progress)
@@ -130,6 +145,7 @@ def fit(self, counts, show_progress=True):
 
 class TFIDFRecommender(ItemItemRecommender):
     """ An Item-Item Recommender on TF-IDF distances between items """
+
     def fit(self, counts, show_progress=True):
         weighted = normalize(tfidf_weight(counts))
         ItemItemRecommender.fit(self, weighted, show_progress)
@@ -137,7 +153,8 @@ def fit(self, counts, show_progress=True):
 
 class BM25Recommender(ItemItemRecommender):
     """ An Item-Item Recommender on BM25 distance between items """
-    def __init__(self, K=20, K1=1.2, B=.75, num_threads=0):
+
+    def __init__(self, K=20, K1=1.2, B=0.75, num_threads=0):
         super(BM25Recommender, self).__init__(K, num_threads)
         self.K1 = K1
         self.B = B
@@ -161,8 +178,8 @@ def tfidf_weight(X):
 
 
 def normalize(X):
-    """ equivalent to scipy.preprocessing.normalize on sparse matrices
-    , but lets avoid another depedency just for a small utility function """
+    """equivalent to scipy.preprocessing.normalize on sparse matrices
+    , but lets avoid another depedency just for a small utility function"""
     X = coo_matrix(X)
     X.data = X.data / sqrt(bincount(X.row, X.data ** 2))[X.row]
     return X
diff --git a/implicit/utils.py b/implicit/utils.py
index 5587571e..ec9e69af 100644
--- a/implicit/utils.py
+++ b/implicit/utils.py
@@ -6,7 +6,7 @@
 
 def nonzeros(m, row):
     """ returns the non zeroes of a row in csr_matrix """
-    for index in range(m.indptr[row], m.indptr[row+1]):
+    for index in range(m.indptr[row], m.indptr[row + 1]):
         yield m.indices[index], m.data[index]
 
 
@@ -14,21 +14,25 @@ def nonzeros(m, row):
 
 
 def check_blas_config():
-    """ checks to see if using OpenBlas/Intel MKL. If so, warn if the number of threads isn't set
-    to 1 (causes severe perf issues when training - can be 10x slower) """
+    """checks to see if using OpenBlas/Intel MKL. If so, warn if the number of threads isn't set
+    to 1 (causes severe perf issues when training - can be 10x slower)"""
     # don't warn repeatedly
     global _checked_blas_config
     if _checked_blas_config:
         return
     _checked_blas_config = True
 
-    if np.__config__.get_info('openblas_info') and os.environ.get('OPENBLAS_NUM_THREADS') != '1':
-        logging.warning("OpenBLAS detected. Its highly recommend to set the environment variable "
-                        "'export OPENBLAS_NUM_THREADS=1' to disable its internal multithreading")
-    if np.__config__.get_info('blas_mkl_info') and os.environ.get('MKL_NUM_THREADS') != '1':
-        logging.warning("Intel MKL BLAS detected. Its highly recommend to set the environment "
-                        "variable 'export MKL_NUM_THREADS=1' to disable its internal "
-                        "multithreading")
+    if np.__config__.get_info("openblas_info") and os.environ.get("OPENBLAS_NUM_THREADS") != "1":
+        logging.warning(
+            "OpenBLAS detected. Its highly recommend to set the environment variable "
+            "'export OPENBLAS_NUM_THREADS=1' to disable its internal multithreading"
+        )
+    if np.__config__.get_info("blas_mkl_info") and os.environ.get("MKL_NUM_THREADS") != "1":
+        logging.warning(
+            "Intel MKL BLAS detected. Its highly recommend to set the environment "
+            "variable 'export MKL_NUM_THREADS=1' to disable its internal "
+            "multithreading"
+        )
 
 
 def check_random_state(random_state):
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000..aa4949aa
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,2 @@
+[tool.black]
+line-length = 100
diff --git a/setup.cfg b/setup.cfg
index e94bcbc5..b6f6e242 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -11,11 +11,14 @@ max-line-length = 100
 exclude = build,.eggs,.tox
 
 [isort]
+multi_line_output = 3
+include_trailing_comma = True
+force_grid_wrap = 0
+use_parentheses = True
+ensure_newline_before_comments = True
 known_first_party = implicit
 known_third_party = scipy,annoy,numpy,cython,pandas
 line_length = 100
-balanced_wrapping = True
-indent = '    '
 skip = build,.eggs,.tox
 
 [bumpversion:file:implicit/__init__.py]
diff --git a/setup.py b/setup.py
index ec4a62f3..f4ef2f21 100644
--- a/setup.py
+++ b/setup.py
@@ -10,7 +10,7 @@
 
 from cuda_setup import CUDA, build_ext
 
-NAME = 'implicit'
+NAME = "implicit"
 VERSION = "0.4.4"
 
 
@@ -21,17 +21,17 @@ def define_extensions():
     if sys.platform.startswith("win"):
         # compile args from
         # https://msdn.microsoft.com/en-us/library/fwkeyyhe.aspx
-        compile_args = ['/O2', '/openmp']
+        compile_args = ["/O2", "/openmp"]
         link_args = []
     else:
         gcc = extract_gcc_binaries()
         if gcc is not None:
-            rpath = '/usr/local/opt/gcc/lib/gcc/' + gcc[-1] + '/'
-            link_args = ['-Wl,-rpath,' + rpath]
+            rpath = "/usr/local/opt/gcc/lib/gcc/" + gcc[-1] + "/"
+            link_args = ["-Wl,-rpath," + rpath]
         else:
             link_args = []
 
-        compile_args = ['-Wno-unused-function', '-Wno-maybe-uninitialized', '-O3', '-ffast-math']
+        compile_args = ["-Wno-unused-function", "-Wno-maybe-uninitialized", "-O3", "-ffast-math"]
         if use_openmp:
             compile_args.append("-fopenmp")
             link_args.append("-fopenmp")
@@ -45,38 +45,60 @@ def define_extensions():
     # except ImportError:
     #     raise ValueError("numpy is required to build from source")
 
-    src_ext = '.pyx'
-    modules = [Extension("implicit." + name,
-                         [os.path.join("implicit", name + src_ext)],
-                         language='c++',
-                         extra_compile_args=compile_args,
-                         extra_link_args=link_args)
-               for name in ['_nearest_neighbours', 'lmf', 'evaluation']]
-    modules.extend([Extension("implicit.cpu." + name,
-                              [os.path.join("implicit", "cpu", name + src_ext)],
-                              language='c++',
-                              extra_compile_args=compile_args,
-                              extra_link_args=link_args)
-                    for name in ['_als', 'bpr']])
-    modules.append(Extension("implicit." + 'recommender_base',
-                             [os.path.join("implicit", 'recommender_base' + src_ext),
-                              os.path.join("implicit", 'topnc.cpp')],
-                             language='c++',
-                             extra_compile_args=compile_args,
-                             extra_link_args=link_args))
+    src_ext = ".pyx"
+    modules = [
+        Extension(
+            "implicit." + name,
+            [os.path.join("implicit", name + src_ext)],
+            language="c++",
+            extra_compile_args=compile_args,
+            extra_link_args=link_args,
+        )
+        for name in ["_nearest_neighbours", "lmf", "evaluation"]
+    ]
+    modules.extend(
+        [
+            Extension(
+                "implicit.cpu." + name,
+                [os.path.join("implicit", "cpu", name + src_ext)],
+                language="c++",
+                extra_compile_args=compile_args,
+                extra_link_args=link_args,
+            )
+            for name in ["_als", "bpr"]
+        ]
+    )
+    modules.append(
+        Extension(
+            "implicit." + "recommender_base",
+            [
+                os.path.join("implicit", "recommender_base" + src_ext),
+                os.path.join("implicit", "topnc.cpp"),
+            ],
+            language="c++",
+            extra_compile_args=compile_args,
+            extra_link_args=link_args,
+        )
+    )
 
     if CUDA:
-        modules.append(Extension("implicit.gpu._cuda",
-                                 [os.path.join("implicit", "gpu", "_cuda" + src_ext),
-                                  os.path.join("implicit", "gpu", "als.cu"),
-                                  os.path.join("implicit", "gpu", "bpr.cu"),
-                                  os.path.join("implicit", "gpu", "matrix.cu")],
-                                 language="c++",
-                                 extra_compile_args=compile_args,
-                                 extra_link_args=link_args,
-                                 library_dirs=[CUDA['lib64']],
-                                 libraries=['cudart', 'cublas', 'curand'],
-                                 include_dirs=[CUDA['include'], '.']))
+        modules.append(
+            Extension(
+                "implicit.gpu._cuda",
+                [
+                    os.path.join("implicit", "gpu", "_cuda" + src_ext),
+                    os.path.join("implicit", "gpu", "als.cu"),
+                    os.path.join("implicit", "gpu", "bpr.cu"),
+                    os.path.join("implicit", "gpu", "matrix.cu"),
+                ],
+                language="c++",
+                extra_compile_args=compile_args,
+                extra_link_args=link_args,
+                library_dirs=[CUDA["lib64"]],
+                libraries=["cudart", "cublas", "curand"],
+                include_dirs=[CUDA["include"], "."],
+            )
+        )
     else:
         print("Failed to find CUDA toolkit. Building without GPU acceleration.")
 
@@ -86,13 +108,16 @@ def define_extensions():
 # set_gcc copied from glove-python project
 # https://github.com/maciejkula/glove-python
 
+
 def extract_gcc_binaries():
     """Try to find GCC on OSX for OpenMP support."""
-    patterns = ['/opt/local/bin/g++-mp-[0-9]*.[0-9]*',
-                '/opt/local/bin/g++-mp-[0-9]*',
-                '/usr/local/bin/g++-[0-9]*.[0-9]*',
-                '/usr/local/bin/g++-[0-9]*']
-    if platform.system() == 'Darwin':
+    patterns = [
+        "/opt/local/bin/g++-mp-[0-9]*.[0-9]*",
+        "/opt/local/bin/g++-mp-[0-9]*",
+        "/usr/local/bin/g++-[0-9]*.[0-9]*",
+        "/usr/local/bin/g++-[0-9]*",
+    ]
+    if platform.system() == "Darwin":
         gcc_binaries = []
         for pattern in patterns:
             gcc_binaries += glob.glob(pattern)
@@ -109,7 +134,7 @@ def extract_gcc_binaries():
 def set_gcc():
     """Try to use GCC on OSX for OpenMP support."""
     # For macports and homebrew
-    if platform.system() == 'Darwin':
+    if platform.system() == "Darwin":
         gcc = extract_gcc_binaries()
 
         if gcc is not None:
@@ -119,8 +144,9 @@ def set_gcc():
         else:
             global use_openmp
             use_openmp = False
-            logging.warning('No GCC available. Install gcc from Homebrew '
-                            'using brew install gcc.')
+            logging.warning(
+                "No GCC available. Install gcc from Homebrew " "using brew install gcc."
+            )
 
 
 set_gcc()
@@ -136,32 +162,31 @@ def read(file_name):
 setup(
     name=NAME,
     version=VERSION,
-    description='Collaborative Filtering for Implicit Feedback Datasets',
+    description="Collaborative Filtering for Implicit Feedback Datasets",
     long_description=read("README.md"),
     long_description_content_type="text/markdown",
-    url='http://github.com/benfred/implicit/',
-    author='Ben Frederickson',
-    author_email='ben@benfrederickson.com',
-    license='MIT',
+    url="http://github.com/benfred/implicit/",
+    author="Ben Frederickson",
+    author_email="ben@benfrederickson.com",
+    license="MIT",
     classifiers=[
-        'Development Status :: 4 - Beta',
-        'Natural Language :: English',
-        'Intended Audience :: Science/Research',
-        'License :: OSI Approved :: MIT License',
-        'Programming Language :: Python',
-        'Programming Language :: Python :: 2',
-        'Programming Language :: Python :: 3',
-        'Programming Language :: Cython',
-        'Operating System :: OS Independent',
-        'Topic :: Software Development :: Libraries :: Python Modules'],
-
-    keywords='Matrix Factorization, Implicit Alternating Least Squares, '
-             'Collaborative Filtering, Recommender Systems',
-
+        "Development Status :: 4 - Beta",
+        "Natural Language :: English",
+        "Intended Audience :: Science/Research",
+        "License :: OSI Approved :: MIT License",
+        "Programming Language :: Python",
+        "Programming Language :: Python :: 2",
+        "Programming Language :: Python :: 3",
+        "Programming Language :: Cython",
+        "Operating System :: OS Independent",
+        "Topic :: Software Development :: Libraries :: Python Modules",
+    ],
+    keywords="Matrix Factorization, Implicit Alternating Least Squares, "
+    "Collaborative Filtering, Recommender Systems",
     packages=find_packages(),
-    install_requires=['numpy', 'scipy>=0.16', 'tqdm>=4.27'],
+    install_requires=["numpy", "scipy>=0.16", "tqdm>=4.27"],
     setup_requires=["Cython>=0.24"],
     ext_modules=define_extensions(),
-    cmdclass={'build_ext': build_ext},
+    cmdclass={"build_ext": build_ext},
     test_suite="tests",
 )
diff --git a/tests/als_test.py b/tests/als_test.py
index 0292752b..fda1c628 100644
--- a/tests/als_test.py
+++ b/tests/als_test.py
@@ -12,35 +12,37 @@
 
 
 class ALSTest(unittest.TestCase, TestRecommenderBaseMixin):
-
     def _get_model(self):
-        return AlternatingLeastSquares(factors=3, regularization=0, use_gpu=False,
-                                       random_state=23)
+        return AlternatingLeastSquares(factors=3, regularization=0, use_gpu=False, random_state=23)
 
     def test_cg_nan(self):
         # test issue with CG code that was causing NaN values in output:
         # https://github.com/benfred/implicit/issues/19#issuecomment-283164905
-        raw = [[0.0, 2.0, 1.5, 1.33333333, 1.25, 1.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
-               [0.0, 0.0, 2.0, 1.5, 1.33333333, 1.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
-               [0.0, 0.0, 0.0, 2.0, 1.5, 1.33333333, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
-               [0.0, 0.0, 0.0, 0.0, 2.0, 1.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
-               [0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
-               [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
-               [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.5, 1.33333333, 1.25, 1.2],
-               [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.5, 1.33333333, 1.25],
-               [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.5, 1.33333333],
-               [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.5],
-               [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0],
-               [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]
+        raw = [
+            [0.0, 2.0, 1.5, 1.33333333, 1.25, 1.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+            [0.0, 0.0, 2.0, 1.5, 1.33333333, 1.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+            [0.0, 0.0, 0.0, 2.0, 1.5, 1.33333333, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+            [0.0, 0.0, 0.0, 0.0, 2.0, 1.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+            [0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.5, 1.33333333, 1.25, 1.2],
+            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.5, 1.33333333, 1.25],
+            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.5, 1.33333333],
+            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.5],
+            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0],
+            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+        ]
         counts = csr_matrix(raw, dtype=np.float64)
         for use_native in (True, False):
-            model = AlternatingLeastSquares(factors=3,
-                                            regularization=0.01,
-                                            dtype=np.float64,
-                                            use_native=use_native,
-                                            use_cg=True,
-                                            use_gpu=False,
-                                            random_state=23)
+            model = AlternatingLeastSquares(
+                factors=3,
+                regularization=0.01,
+                dtype=np.float64,
+                use_native=use_native,
+                use_cg=True,
+                use_gpu=False,
+                random_state=23,
+            )
             model.fit(counts, show_progress=False)
             rows, cols = model.item_factors, model.user_factors
 
@@ -49,38 +51,57 @@ def test_cg_nan(self):
 
     def test_cg_nan2(self):
         # test out Nan appearing in CG code (from https://github.com/benfred/implicit/issues/106)
-        Ciu = random(m=100, n=100, density=0.0005, format='coo', dtype=np.float32,
-                     random_state=42, data_rvs=None).T.tocsr()
-
-        configs = [{'use_native': True, 'use_gpu': False}, {'use_native': False, 'use_gpu': False}]
+        Ciu = random(
+            m=100,
+            n=100,
+            density=0.0005,
+            format="coo",
+            dtype=np.float32,
+            random_state=42,
+            data_rvs=None,
+        ).T.tocsr()
+
+        configs = [{"use_native": True, "use_gpu": False}, {"use_native": False, "use_gpu": False}]
         if HAS_CUDA:
-            configs.append({'use_gpu': True})
+            configs.append({"use_gpu": True})
 
         for options in configs:
-            model = AlternatingLeastSquares(factors=32, regularization=10, iterations=10,
-                                            dtype=np.float32, random_state=23,
-                                            **options)
+            model = AlternatingLeastSquares(
+                factors=32,
+                regularization=10,
+                iterations=10,
+                dtype=np.float32,
+                random_state=23,
+                **options
+            )
             model.fit(Ciu, show_progress=False)
 
             self.assertTrue(np.isfinite(model.item_factors).all())
             self.assertTrue(np.isfinite(model.user_factors).all())
 
     def test_factorize(self):
-        counts = csr_matrix([[1, 1, 0, 1, 0, 0],
-                             [0, 1, 1, 1, 0, 0],
-                             [1, 0, 1, 0, 0, 0],
-                             [1, 1, 0, 0, 0, 0],
-                             [0, 0, 1, 1, 0, 1],
-                             [0, 1, 0, 0, 0, 1],
-                             [0, 0, 0, 0, 1, 1]], dtype=np.float64)
+        counts = csr_matrix(
+            [
+                [1, 1, 0, 1, 0, 0],
+                [0, 1, 1, 1, 0, 0],
+                [1, 0, 1, 0, 0, 0],
+                [1, 1, 0, 0, 0, 0],
+                [0, 0, 1, 1, 0, 1],
+                [0, 1, 0, 0, 0, 1],
+                [0, 0, 0, 0, 1, 1],
+            ],
+            dtype=np.float64,
+        )
         user_items = counts * 2
 
         # try all 8 variants of native/python, cg/cholesky, and
         # 64 vs 32 bit factors
-        options = [(dtype, cg, native, False)
-                   for dtype in (np.float32, np.float64)
-                   for cg in (False, True)
-                   for native in (False, True)]
+        options = [
+            (dtype, cg, native, False)
+            for dtype in (np.float32, np.float64)
+            for cg in (False, True)
+            for native in (False, True)
+        ]
 
         # also try out GPU support if available
         if HAS_CUDA:
@@ -88,49 +109,61 @@ def test_factorize(self):
 
         for dtype, use_cg, use_native, use_gpu in options:
             try:
-                model = AlternatingLeastSquares(factors=6,
-                                                regularization=0,
-                                                dtype=dtype,
-                                                use_native=use_native,
-                                                use_cg=use_cg,
-                                                use_gpu=use_gpu,
-                                                random_state=23)
+                model = AlternatingLeastSquares(
+                    factors=6,
+                    regularization=0,
+                    dtype=dtype,
+                    use_native=use_native,
+                    use_cg=use_cg,
+                    use_gpu=use_gpu,
+                    random_state=23,
+                )
                 model.fit(user_items, show_progress=False)
                 rows, cols = model.item_factors, model.user_factors
 
             except Exception as e:
-                self.fail(msg="failed to factorize matrix. Error=%s"
-                              " dtype=%s, cg=%s, native=%s gpu=%s"
-                              % (e, dtype, use_cg, use_native, use_gpu))
+                self.fail(
+                    msg="failed to factorize matrix. Error=%s"
+                    " dtype=%s, cg=%s, native=%s gpu=%s" % (e, dtype, use_cg, use_native, use_gpu)
+                )
 
             reconstructed = rows.dot(cols.T)
             for i in range(counts.shape[0]):
                 for j in range(counts.shape[1]):
-                    self.assertAlmostEqual(counts[i, j], reconstructed[i, j],
-                                           delta=0.0001,
-                                           msg="failed to reconstruct row=%s, col=%s,"
-                                               " value=%.5f, dtype=%s, cg=%s, native=%s gpu=%s"
-                                               % (i, j, reconstructed[i, j], dtype, use_cg,
-                                                  use_native, use_gpu))
+                    self.assertAlmostEqual(
+                        counts[i, j],
+                        reconstructed[i, j],
+                        delta=0.0001,
+                        msg="failed to reconstruct row=%s, col=%s,"
+                        " value=%.5f, dtype=%s, cg=%s, native=%s gpu=%s"
+                        % (i, j, reconstructed[i, j], dtype, use_cg, use_native, use_gpu),
+                    )
 
     def test_explain(self):
-        counts = csr_matrix([[1, 1, 0, 1, 0, 0],
-                             [0, 1, 1, 1, 0, 0],
-                             [1, 4, 1, 0, 7, 0],
-                             [1, 1, 0, 0, 0, 0],
-                             [9, 0, 4, 1, 0, 1],
-                             [0, 1, 0, 0, 0, 1],
-                             [0, 0, 2, 0, 1, 1]], dtype=np.float64)
+        counts = csr_matrix(
+            [
+                [1, 1, 0, 1, 0, 0],
+                [0, 1, 1, 1, 0, 0],
+                [1, 4, 1, 0, 7, 0],
+                [1, 1, 0, 0, 0, 0],
+                [9, 0, 4, 1, 0, 1],
+                [0, 1, 0, 0, 0, 1],
+                [0, 0, 2, 0, 1, 1],
+            ],
+            dtype=np.float64,
+        )
         user_items = counts * 2
         item_users = user_items.T
 
-        model = AlternatingLeastSquares(factors=4,
-                                        regularization=20,
-                                        use_native=False,
-                                        use_cg=False,
-                                        use_gpu=False,
-                                        iterations=100,
-                                        random_state=23)
+        model = AlternatingLeastSquares(
+            factors=4,
+            regularization=20,
+            use_native=False,
+            use_cg=False,
+            use_gpu=False,
+            iterations=100,
+            random_state=23,
+        )
         model.fit(user_items, show_progress=False)
 
         userid = 0
@@ -154,7 +187,8 @@ def test_explain(self):
 
         # Assert explanation with precomputed user weights is correct
         top_score_explained, top_contributions, W = model.explain(
-            userid, item_users, itemid=top_rec, user_weights=W, N=2)
+            userid, item_users, itemid=top_rec, user_weights=W, N=2
+        )
         top_scores = [s for _, s in top_contributions]
         top_items = [i for i, _ in top_contributions]
         self.assertEqual(2, len(top_contributions))
@@ -180,20 +214,22 @@ def test_recommend_all(self):
 
         offset = 2
         recs = model.recommend_all(
-            user_items[[2, 3, 4]],
-            N=1,
-            show_progress=False,
-            users_items_offset=offset)
+            user_items[[2, 3, 4]], N=1, show_progress=False, users_items_offset=offset
+        )
 
         for userid in range(2, 5):
-            self.assertEqual(len(recs[userid-offset]), 1)
-            self.assertEqual(recs[userid-offset][0], userid)
+            self.assertEqual(len(recs[userid - offset]), 1)
+            self.assertEqual(recs[userid - offset][0], userid)
 
         # try asking for more items than possible
         self.assertRaises(ValueError, model.recommend_all, user_items, N=10000, show_progress=False)
         self.assertRaises(
-            ValueError, model.recommend_all, user_items, filter_items=list(range(10000)),
-            show_progress=False)
+            ValueError,
+            model.recommend_all,
+            user_items,
+            filter_items=list(range(10000)),
+            show_progress=False,
+        )
 
         # filter recommended items using an additional filter list
         recs = model.recommend_all(user_items, N=1, filter_items=[0], show_progress=False)
@@ -201,10 +237,11 @@ def test_recommend_all(self):
 
 
 if HAS_CUDA:
+
     class GPUALSTest(unittest.TestCase, TestRecommenderBaseMixin):
         def _get_model(self):
-            return AlternatingLeastSquares(factors=32, regularization=0,
-                                           random_state=23)
+            return AlternatingLeastSquares(factors=32, regularization=0, random_state=23)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/approximate_als_test.py b/tests/approximate_als_test.py
index 90a2d391..09b48981 100644
--- a/tests/approximate_als_test.py
+++ b/tests/approximate_als_test.py
@@ -2,8 +2,11 @@
 
 import unittest
 
-from implicit.approximate_als import (AnnoyAlternatingLeastSquares, FaissAlternatingLeastSquares,
-                                      NMSLibAlternatingLeastSquares)
+from implicit.approximate_als import (
+    AnnoyAlternatingLeastSquares,
+    FaissAlternatingLeastSquares,
+    NMSLibAlternatingLeastSquares,
+)
 from implicit.gpu import HAS_CUDA
 
 from .recommender_base_test import TestRecommenderBaseMixin
@@ -14,13 +17,13 @@
 
     class AnnoyALSTest(unittest.TestCase, TestRecommenderBaseMixin):
         def _get_model(self):
-            return AnnoyAlternatingLeastSquares(factors=2, regularization=0,
-                                                random_state=23)
+            return AnnoyAlternatingLeastSquares(factors=2, regularization=0, random_state=23)
 
         def test_pickle(self):
             # pickle isn't supported on annoy indices
             pass
 
+
 except ImportError:
     pass
 
@@ -29,14 +32,15 @@ def test_pickle(self):
 
     class NMSLibALSTest(unittest.TestCase, TestRecommenderBaseMixin):
         def _get_model(self):
-            return NMSLibAlternatingLeastSquares(factors=2, regularization=0,
-                                                 index_params={'post': 2},
-                                                 random_state=23)
+            return NMSLibAlternatingLeastSquares(
+                factors=2, regularization=0, index_params={"post": 2}, random_state=23
+            )
 
         def test_pickle(self):
             # pickle isn't supported on nmslib indices
             pass
 
+
 except ImportError:
     pass
 
@@ -45,21 +49,28 @@ def test_pickle(self):
 
     class FaissALSTest(unittest.TestCase, TestRecommenderBaseMixin):
         def _get_model(self):
-            return FaissAlternatingLeastSquares(nlist=1, nprobe=1, factors=2, regularization=0,
-                                                use_gpu=False, random_state=23)
+            return FaissAlternatingLeastSquares(
+                nlist=1, nprobe=1, factors=2, regularization=0, use_gpu=False, random_state=23
+            )
 
         def test_pickle(self):
             # pickle isn't supported on faiss indices
             pass
 
     if HAS_CUDA:
+
         class FaissALSGPUTest(unittest.TestCase, TestRecommenderBaseMixin):
             __regularization = 0
 
             def _get_model(self):
-                return FaissAlternatingLeastSquares(nlist=1, nprobe=1, factors=32,
-                                                    regularization=self.__regularization,
-                                                    use_gpu=True, random_state=23)
+                return FaissAlternatingLeastSquares(
+                    nlist=1,
+                    nprobe=1,
+                    factors=32,
+                    regularization=self.__regularization,
+                    use_gpu=True,
+                    random_state=23,
+                )
 
             def test_similar_items(self):
                 # For the GPU version, we currently have to have factors be a multiple of 32
@@ -89,6 +100,7 @@ def test_pickle(self):
                 # pickle isn't supported on faiss indices
                 pass
 
+
 except ImportError:
     pass
 
diff --git a/tests/bpr_test.py b/tests/bpr_test.py
index 82835706..7c4db32c 100644
--- a/tests/bpr_test.py
+++ b/tests/bpr_test.py
@@ -9,10 +9,10 @@
 
 
 class BPRTest(unittest.TestCase, TestRecommenderBaseMixin):
-
     def _get_model(self):
-        return BayesianPersonalizedRanking(factors=3, regularization=0, use_gpu=False,
-                                           random_state=42)
+        return BayesianPersonalizedRanking(
+            factors=3, regularization=0, use_gpu=False, random_state=42
+        )
 
     # Test issue #264 causing crashes on empty matrices
     def test_fit_empty_matrix(self):
@@ -26,11 +26,13 @@ def test_fit_almost_empty_matrix(self):
 
 
 if HAS_CUDA:
-    class BPRGPUTest(unittest.TestCase, TestRecommenderBaseMixin):
 
+    class BPRGPUTest(unittest.TestCase, TestRecommenderBaseMixin):
         def _get_model(self):
-            return BayesianPersonalizedRanking(factors=31, regularization=0, use_gpu=True,
-                                               learning_rate=0.02, random_state=42)
+            return BayesianPersonalizedRanking(
+                factors=31, regularization=0, use_gpu=True, learning_rate=0.02, random_state=42
+            )
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/knn_test.py b/tests/knn_test.py
index 07448b0c..97fa7e9e 100644
--- a/tests/knn_test.py
+++ b/tests/knn_test.py
@@ -27,13 +27,18 @@ def _get_model(self):
 
 class NearestNeighboursTest(unittest.TestCase):
     def test_all_pairs_knn(self):
-        counts = csr_matrix([[5, 1, 0, 9, 0, 0],
-                             [0, 2, 1, 1, 0, 0],
-                             [7, 0, 3, 0, 0, 0],
-                             [1, 8, 0, 0, 0, 0],
-                             [0, 0, 4, 4, 0, 0],
-                             [0, 3, 0, 0, 0, 2],
-                             [0, 0, 0, 0, 6, 0]], dtype=np.float64)
+        counts = csr_matrix(
+            [
+                [5, 1, 0, 9, 0, 0],
+                [0, 2, 1, 1, 0, 0],
+                [7, 0, 3, 0, 0, 0],
+                [1, 8, 0, 0, 0, 0],
+                [0, 0, 4, 4, 0, 0],
+                [0, 3, 0, 0, 0, 2],
+                [0, 0, 0, 0, 6, 0],
+            ],
+            dtype=np.float64,
+        )
         counts = implicit.nearest_neighbours.tfidf_weight(counts).tocsr()
 
         # compute all neighbours using matrix dot product
@@ -48,9 +53,13 @@ def test_all_pairs_knn(self):
 
             # make sure top K selected
             row = all_neighbours[rowid]
-            self.assertEqual(set(knn[rowid].indices),
-                             set(colid for colid, _ in
-                                 sorted(zip(row.indices, row.data), key=lambda x: -x[1])[:K]))
+            self.assertEqual(
+                set(knn[rowid].indices),
+                set(
+                    colid
+                    for colid, _ in sorted(zip(row.indices, row.data), key=lambda x: -x[1])[:K]
+                ),
+            )
 
 
 if __name__ == "__main__":
diff --git a/tests/lmf_test.py b/tests/lmf_test.py
index d9983a86..26a952aa 100644
--- a/tests/lmf_test.py
+++ b/tests/lmf_test.py
@@ -7,8 +7,9 @@
 
 class LMFTest(unittest.TestCase, TestRecommenderBaseMixin):
     def _get_model(self):
-        return LogisticMatrixFactorization(factors=3, regularization=0, use_gpu=False,
-                                           random_state=43)
+        return LogisticMatrixFactorization(
+            factors=3, regularization=0, use_gpu=False, random_state=43
+        )
 
 
 if __name__ == "__main__":
diff --git a/tests/recommender_base_test.py b/tests/recommender_base_test.py
index 7f1bf815..8900fa79 100644
--- a/tests/recommender_base_test.py
+++ b/tests/recommender_base_test.py
@@ -12,8 +12,8 @@
 
 
 class TestRecommenderBaseMixin(object):
-    """ Mixin to test a bunch of common functionality in models
-    deriving from RecommenderBase """
+    """Mixin to test a bunch of common functionality in models
+    deriving from RecommenderBase"""
 
     def _get_model(self):
         raise NotImplementedError()
@@ -59,8 +59,9 @@ def test_recalculate_user(self):
 
             # we should get the same item if we recalculate_user
             try:
-                recs_from_liked = model.recommend(userid=0, user_items=user_vector,
-                                                  N=1, recalculate_user=True)
+                recs_from_liked = model.recommend(
+                    userid=0, user_items=user_vector, N=1, recalculate_user=True
+                )
                 self.assertEqual(recs[0][0], recs_from_liked[0][0])
 
                 # TODO: if we set regularization for the model to be sufficiently high, the
@@ -80,8 +81,9 @@ def test_evaluation(self):
 
         # we've withheld the diagnoal for testing, and have verified that in test_recommend
         # it is returned for each user. So p@1 should be 1.0
-        p = precision_at_k(model, user_items.tocsr(), csr_matrix(np.eye(50)), K=1,
-                           show_progress=False)
+        p = precision_at_k(
+            model, user_items.tocsr(), csr_matrix(np.eye(50)), K=1, show_progress=False
+        )
         self.assertEqual(p, 1)
 
     def test_similar_users(self):
@@ -168,9 +170,9 @@ def test_pickle(self):
         pickle.loads(pickled)
 
     def get_checker_board(self, X):
-        """ Returns a 'checkerboard' matrix: where every even userid has liked
+        """Returns a 'checkerboard' matrix: where every even userid has liked
         every even itemid and every odd userid has liked every odd itemid.
-        The diagonal is withheld for testing recommend methods """
+        The diagonal is withheld for testing recommend methods"""
         ret = np.zeros((X, X))
         for i in range(X):
             for j in range(i % 2, X, 2):