Skip to content

Commit

Permalink
Run black/isort (#407)
Browse files Browse the repository at this point in the history
Run black/isort on all the files. Also run as part of CI to ensure consistent formatting
  • Loading branch information
benfred committed Nov 15, 2020
1 parent c2cd6c3 commit 80f0e4d
Show file tree
Hide file tree
Showing 35 changed files with 1,083 additions and 700 deletions.
5 changes: 4 additions & 1 deletion .github/workflows/build.yml
Expand Up @@ -27,7 +27,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install flake8 isort cpplint
pip install flake8 isort cpplint black
pip install -r requirements.txt
- name: Lint with flake8
run: |
Expand All @@ -36,6 +36,9 @@ jobs:
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=11 --max-line-length=127 --statistics
flake8 --filename='*.pyx,*.px*' --ignore E901,E225,E226,E227,E402,E999
- name: Lint with black
run: |
black --check .
- name: Lint with isort
run: |
isort -c .
Expand Down
140 changes: 84 additions & 56 deletions benchmarks/benchmark_als.py
Expand Up @@ -17,6 +17,7 @@

try:
import implicit.gpu # noqa

has_cuda = True
except ImportError:
has_cuda = False
Expand All @@ -30,25 +31,29 @@ def inner(iteration, elapsed):
loss = calculate_loss(plays, model.item_factors, model.user_factors, 0)
print("model %s iteration %i loss %.5f" % (name, iteration, loss))
output[name].append(loss)

return inner

for steps in [2, 3, 4]:
model = AlternatingLeastSquares(factors=100, use_native=True, use_cg=True, regularization=0,
iterations=25)
model = AlternatingLeastSquares(
factors=100, use_native=True, use_cg=True, regularization=0, iterations=25
)
model.cg_steps = steps
model.fit_callback = store_loss(model, 'cg%i' % steps)
model.fit_callback = store_loss(model, "cg%i" % steps)
model.fit(plays)

if has_cuda:
model = AlternatingLeastSquares(factors=100, use_native=True, use_gpu=True,
regularization=0, iterations=25)
model.fit_callback = store_loss(model, 'gpu')
model = AlternatingLeastSquares(
factors=100, use_native=True, use_gpu=True, regularization=0, iterations=25
)
model.fit_callback = store_loss(model, "gpu")
model.use_gpu = True
model.fit(plays)

model = AlternatingLeastSquares(factors=100, use_native=True, use_cg=False, regularization=0,
iterations=25)
model.fit_callback = store_loss(model, 'cholesky')
model = AlternatingLeastSquares(
factors=100, use_native=True, use_cg=False, regularization=0, iterations=25
)
model.fit_callback = store_loss(model, "cholesky")
model.fit(plays)

return output
Expand All @@ -61,99 +66,122 @@ def store_time(model, name):
def inner(iteration, elapsed):
print(name, model.factors, iteration, elapsed)
times[name][model.factors].append(elapsed)

return inner

output = defaultdict(list)
for factors in range(32, 257, 32):
for steps in [2, 3, 4]:
model = AlternatingLeastSquares(factors=factors, use_native=True, use_cg=True,
regularization=0, iterations=iterations)
model.fit_callback = store_time(model, 'cg%i' % steps)
model = AlternatingLeastSquares(
factors=factors,
use_native=True,
use_cg=True,
regularization=0,
iterations=iterations,
)
model.fit_callback = store_time(model, "cg%i" % steps)
model.cg_steps = steps
model.fit(plays)

model = AlternatingLeastSquares(factors=factors, use_native=True, use_cg=False,
regularization=0, iterations=iterations)
model.fit_callback = store_time(model, 'cholesky')
model = AlternatingLeastSquares(
factors=factors, use_native=True, use_cg=False, regularization=0, iterations=iterations
)
model.fit_callback = store_time(model, "cholesky")
model.fit(plays)

if has_cuda:
model = AlternatingLeastSquares(factors=factors, use_native=True, use_gpu=True,
regularization=0, iterations=iterations)
model.fit_callback = store_time(model, 'gpu')
model = AlternatingLeastSquares(
factors=factors,
use_native=True,
use_gpu=True,
regularization=0,
iterations=iterations,
)
model.fit_callback = store_time(model, "gpu")
model.fit(plays)

# take the min time for the output
output['factors'].append(factors)
output["factors"].append(factors)
for name, stats in times.items():
output[name].append(min(stats[factors]))

return output


LABELS = {'cg2': 'CG (2 Steps/Iteration)',
'cg3': 'CG (3 Steps/Iteration)',
'cg4': 'CG (4 Steps/Iteration)',
'gpu': 'GPU',
'cholesky': 'Cholesky'}

COLOURS = {'cg2': "#2ca02c",
'cg3': "#ff7f0e",
'cg4': "#c5b0d5",
'gpu': "#1f77b4",
'cholesky': "#d62728"}


def generate_speed_graph(data, filename="als_speed.png", keys=['gpu', 'cg2', 'cg3', 'cholesky'],
labels=None, colours=None):
LABELS = {
"cg2": "CG (2 Steps/Iteration)",
"cg3": "CG (3 Steps/Iteration)",
"cg4": "CG (4 Steps/Iteration)",
"gpu": "GPU",
"cholesky": "Cholesky",
}

COLOURS = {
"cg2": "#2ca02c",
"cg3": "#ff7f0e",
"cg4": "#c5b0d5",
"gpu": "#1f77b4",
"cholesky": "#d62728",
}


def generate_speed_graph(
data,
filename="als_speed.png",
keys=["gpu", "cg2", "cg3", "cholesky"],
labels=None,
colours=None,
):
labels = labels or {}
colours = colours or {}

seaborn.set()
fig, ax = plt.subplots()

factors = data['factors']
factors = data["factors"]
for key in keys:
ax.plot(factors, data[key],
color=colours.get(key, COLOURS.get(key)),
marker='o', markersize=6)
ax.plot(
factors, data[key], color=colours.get(key, COLOURS.get(key)), marker="o", markersize=6
)

ax.text(factors[-1] + 5, data[key][-1], labels.get(key, LABELS[key]), fontsize=10)

ax.set_ylabel("Seconds per Iteration")
ax.set_xlabel("Factors")
plt.savefig(filename, bbox_inches='tight', dpi=300)
plt.savefig(filename, bbox_inches="tight", dpi=300)


def generate_loss_graph(data, filename="als_speed.png", keys=['gpu', 'cg2', 'cg3', 'cholesky']):
def generate_loss_graph(data, filename="als_speed.png", keys=["gpu", "cg2", "cg3", "cholesky"]):
seaborn.set()

fig, ax = plt.subplots()

iterations = range(1, len(data['cholesky']) + 1)
iterations = range(1, len(data["cholesky"]) + 1)
for key in keys:
ax.plot(iterations, data[key],
color=COLOURS[key],
marker='o', markersize=6)
ax.plot(iterations, data[key], color=COLOURS[key], marker="o", markersize=6)
ax.text(iterations[-1] + 1, data[key][-1], LABELS[key], fontsize=10)

ax.set_ylabel("Mean Squared Error")
ax.set_xlabel("Iteration")
plt.savefig(filename, bbox_inches='tight', dpi=300)
plt.savefig(filename, bbox_inches="tight", dpi=300)


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Benchmark CG version against Cholesky",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)

parser.add_argument('--input', type=str, required=True,
dest='inputfile', help='dataset file in matrix market format')
parser.add_argument('--graph', help='generates graphs',
action="store_true")
parser.add_argument('--loss', help='test training loss',
action="store_true")
parser.add_argument('--speed', help='test training speed',
action="store_true")
parser = argparse.ArgumentParser(
description="Benchmark CG version against Cholesky",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)

parser.add_argument(
"--input",
type=str,
required=True,
dest="inputfile",
help="dataset file in matrix market format",
)
parser.add_argument("--graph", help="generates graphs", action="store_true")
parser.add_argument("--loss", help="test training loss", action="store_true")
parser.add_argument("--speed", help="test training speed", action="store_true")

args = parser.parse_args()
if not (args.speed or args.loss):
Expand Down
60 changes: 38 additions & 22 deletions benchmarks/benchmark_qmf.py
Expand Up @@ -32,15 +32,24 @@ def benchmark_implicit(matrix, factors, reg, iterations):
def benchmark_qmf(qmfpath, matrix, factors, reg, iterations):
matrix = matrix.tocoo()
datafile = "qmf_data.txt"
open(datafile, "w").write("\n".join("%s %s %s" % vals
for vals in zip(matrix.row, matrix.col, matrix.data)))
open(datafile, "w").write(
"\n".join("%s %s %s" % vals for vals in zip(matrix.row, matrix.col, matrix.data))
)

def get_qmf_command(nepochs):
return [qmfpath, "--train_dataset", datafile,
"--nfactors", str(factors),
"--confidence_weight", "1",
"--nepochs", str(nepochs),
"--regularization_lambda", str(reg)]
return [
qmfpath,
"--train_dataset",
datafile,
"--nfactors",
str(factors),
"--confidence_weight",
"1",
"--nepochs",
str(nepochs),
"--regularization_lambda",
str(reg),
]

# ok, so QMF needs to read the data in - and including
# that in the timing isn't fair. So run it once with no iterations
Expand All @@ -58,8 +67,9 @@ def get_qmf_command(nepochs):
def run_benchmark(args):
plays = bm25_weight(scipy.io.mmread(args.inputfile))

qmf_time = benchmark_qmf(args.qmfpath, plays, args.factors, args.regularization,
args.iterations)
qmf_time = benchmark_qmf(
args.qmfpath, plays, args.factors, args.regularization, args.iterations
)

implicit_time = benchmark_implicit(plays, args.factors, args.regularization, args.iterations)

Expand All @@ -69,19 +79,25 @@ def run_benchmark(args):


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Generates Benchmark",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)

parser.add_argument('--input', type=str,
dest='inputfile', help='dataset file in matrix market format')
parser.add_argument('--qmfpath', type=str,
dest='qmfpath', help='full path to qmf wals.bin file', required=True)
parser.add_argument('--factors', type=int, default=50, dest='factors',
help='Number of factors to calculate')
parser.add_argument('--reg', type=float, default=0.8, dest='regularization',
help='regularization weight')
parser.add_argument('--iter', type=int, default=15, dest='iterations',
help='Number of ALS iterations')
parser = argparse.ArgumentParser(
description="Generates Benchmark", formatter_class=argparse.ArgumentDefaultsHelpFormatter
)

parser.add_argument(
"--input", type=str, dest="inputfile", help="dataset file in matrix market format"
)
parser.add_argument(
"--qmfpath", type=str, dest="qmfpath", help="full path to qmf wals.bin file", required=True
)
parser.add_argument(
"--factors", type=int, default=50, dest="factors", help="Number of factors to calculate"
)
parser.add_argument(
"--reg", type=float, default=0.8, dest="regularization", help="regularization weight"
)
parser.add_argument(
"--iter", type=int, default=15, dest="iterations", help="Number of ALS iterations"
)
args = parser.parse_args()

logging.basicConfig(level=logging.DEBUG)
Expand Down

0 comments on commit 80f0e4d

Please sign in to comment.