Skip to content

Commit

Permalink
fixes to get benchmark to work (#276)
Browse files Browse the repository at this point in the history
* fixes to get benchmark to work

* use dask tmpdir
  • Loading branch information
Kyle Lahnakoski authored and martindurant committed Jan 17, 2018
1 parent b565f8b commit 90e31d6
Showing 1 changed file with 46 additions and 14 deletions.
60 changes: 46 additions & 14 deletions fastparquet/benchmarks/columns.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
from contextlib import contextmanager
import os
import numpy as np
import os
import pandas as pd
import shutil
import sys
import tempfile
import time

from fastparquet.util import join_path
from contextlib import contextmanager

from fastparquet import write, ParquetFile
from dask.utils import tmpdir
from fastparquet.util import join_path


@contextmanager
Expand All @@ -23,7 +24,7 @@ def time_column():
result = {}
fn = join_path(tempdir, 'temp.parq')
n = 10000000
r = np.random.randint(-1e10, 1e10, n)
r = np.random.randint(-1e10, 1e10, n, dtype='int64')
d = pd.DataFrame({'w': pd.Categorical(np.random.choice(
['hi', 'you', 'people'], size=n)),
'x': r.view('timedelta64[ns]'),
Expand Down Expand Up @@ -110,14 +111,6 @@ def time_text():
return result


if __name__ == '__main__':
result = {}
for f in [time_column, time_text]:
result.update(f())
for k in sorted(result):
print(k, result[k])


def time_find_nulls(N=10000000):
x = np.random.random(N)
df = pd.DataFrame({'x': x})
Expand Down Expand Up @@ -165,3 +158,42 @@ def run_find_nulls(df, res):
with measure((df.x.dtype.kind, nvalid, 'count'), res):
df.x.count()



# from https://github.com/dask/dask/blob/6cbcf0813af48597a427a1fe6c71cce2a79086b0/dask/utils.py#L78
@contextmanager
def ignoring(*exceptions):
try:
yield
except exceptions:
pass

# from https://github.com/dask/dask/blob/6cbcf0813af48597a427a1fe6c71cce2a79086b0/dask/utils.py#L116
@contextmanager
def tmpdir(dir=None):
dirname = tempfile.mkdtemp(dir=dir)

try:
yield dirname
finally:
if os.path.exists(dirname):
if os.path.isdir(dirname):
with ignoring(OSError):
shutil.rmtree(dirname)
else:
with ignoring(OSError):
os.remove(dirname)


if __name__ == '__main__':
result = {}

print("sys.version = " + sys.version)
print("sys.platform = " + sys.platform)

for f in [time_column, time_text]:
result.update(f())
for k in sorted(result):
print(k, result[k])


0 comments on commit 90e31d6

Please sign in to comment.