Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Increase testing parallelism #3620

Merged
merged 17 commits into from May 25, 2018
132 changes: 36 additions & 96 deletions build.gradle
Expand Up @@ -101,13 +101,6 @@ task generateBuildInfo(type: Exec) {
outputs.upToDateWhen { false }
}

task generateDistLinks(type: Exec) {
executable 'sh'
args(['generate-dist-links.sh'])
args(hailVersion)
args(deployedSparkVersions)
}

compileScala {
dependsOn generateBuildInfo

Expand Down Expand Up @@ -183,6 +176,8 @@ task(checkSettings) << {
System.setProperty("check.count", checkCount)
}

String parallelism = System.getProperty("test.parallelism", "2")

test {
useTestNG {}

Expand All @@ -199,25 +194,34 @@ test {
beforeTest { descriptor ->
logger.lifecycle("Running test: " + descriptor)
}

maxParallelForks parallelism.toInteger()
}

test.dependsOn(checkSettings)

task testPython(type: Exec, dependsOn: shadowJar) {
commandLine 'python3',
'-m',
'nose',
'-c',
'python/hail/setup.cfg',
'python/hail/tests'
environment SPARK_HOME: sparkHome
environment PYTHONPATH: '' + projectDir + '/python:' + sparkHome + '/python:' + sparkHome + '/python/lib/py4j-' + py4jVersion + '-src.zip'
environment PYSPARK_SUBMIT_ARGS: '--conf spark.driver.extraClassPath=' + projectDir + '/build/libs/hail-all-spark.jar --conf spark.executor.extraClassPath=' + projectDir + '/build/libs/hail-all-spark.jar pyspark-shell'
environment PYSPARK_PYTHON: 'python3'
environment NOSE_NOLOGCAPTURE: '1'
commandLine 'pytest',
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I figured why pytest was faster than nose -- we aren't running pytest with coverage enabled

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I always get error messages abt coverage not being available. I think this was on the cloud too. I can double check.

'-n',
parallelism,
'--dist=loadscope',
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can't find the docs for this option, can you point me to them?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Disregard I see that this is an option on pytest-xdist rather than pytest.

'--noconftest',
'python/hail/tests'
environment SPARK_HOME: sparkHome
environment PYTHONPATH: '' + projectDir + '/python:' + sparkHome + '/python:' + sparkHome + '/python/lib/py4j-' + py4jVersion + '-src.zip'
environment PYSPARK_SUBMIT_ARGS: '--conf spark.driver.extraClassPath=' + projectDir + '/build/libs/hail-all-spark.jar --conf spark.executor.extraClassPath=' + projectDir + '/build/libs/hail-all-spark.jar pyspark-shell'
environment PYSPARK_PYTHON: 'python3'
}

task testAll(dependsOn: ['testPython', 'test'])
task doctest(type: Exec, dependsOn: shadowJar) {
commandLine 'sh', 'python/hail/docs/doctest.sh', '-n', parallelism
environment SPARK_HOME: sparkHome
environment PYTHONPATH: '' + projectDir + '/python:' + sparkHome + '/python:' + sparkHome + '/python/lib/py4j-' + py4jVersion + '-src.zip'
environment PYSPARK_SUBMIT_ARGS: '--conf spark.driver.extraClassPath=' + projectDir + '/build/libs/hail-all-spark.jar --conf spark.executor.extraClassPath=' + projectDir + '/build/libs/hail-all-spark.jar pyspark-shell'
environment PYSPARK_PYTHON: 'python3'
}

task testAll(dependsOn: ['testPython', 'test', 'doctest'])

tasks.withType(ShadowJar) {
manifest {
Expand Down Expand Up @@ -371,102 +375,38 @@ task wrapper(type: Wrapper) {
gradleVersion = '2.14.1'
}

task cleanHailDocs(type: Exec) {
task cleanDocs(type: Exec) {
commandLine 'rm', '-rf', 'build/www/', 'build/tmp/python', 'build/tmp/docs'
}

task setupDocsDirs(type: Exec, dependsOn: cleanHailDocs) {
commandLine 'mkdir', '-p', 'build/www/', 'build/tmp/python/', 'build/tmp/docs', 'build/www/docs'
}

task copyPDF(type: Copy, dependsOn: setupDocsDirs) {
from 'docs/LeveneHaldane.pdf'
into 'build/www'
}

task copyWebsiteContent(type: Copy, dependsOn: setupDocsDirs) {
from 'www/'
include '*.js'
include '*.css'
include '*.css.map'
include '*.html'
include '*.png'
include 'annotationdb/*'
into 'build/www'
}

task readmeToHtml(type: Exec, dependsOn: setupDocsDirs) {
commandLine 'pandoc', '-s', 'README.md', '-f', 'markdown', '-t', 'html', '--mathjax',
'--highlight-style=pygments', '--columns', '10000', '-o', 'build/tmp/README.html'
}

task jobsToHtml(type: Exec, dependsOn: setupDocsDirs) {
commandLine 'pandoc', '-s', 'www/jobs.md', '-f', 'markdown', '-t', 'html', '--mathjax',
'--highlight-style=pygments', '--columns', '10000', '-o', 'build/tmp/jobs.html'
}

task aboutToHtml(type: Exec, dependsOn: setupDocsDirs) {
commandLine 'pandoc', '-s', 'www/about.md', '-f', 'markdown', '-t', 'html', '--mathjax',
'--highlight-style=pygments', '--columns', '10000', '-o', 'build/tmp/about.html'
}

task buildIndex(type: Exec, dependsOn: ['readmeToHtml']) {
args('--html', '-o', 'build/www/index.html', 'www/readme-to-index.xslt', 'build/tmp/README.html')
executable 'xsltproc'
}

task buildJobs(type: Exec, dependsOn: ['jobsToHtml']) {
args('--html', '-o', 'build/www/jobs.html', 'www/jobs.xslt', 'build/tmp/jobs.html')
executable 'xsltproc'
}

task buildAbout(type: Exec, dependsOn: ['aboutToHtml']) {
args('--html', '-o', 'build/www/about.html', 'www/about.xslt', 'build/tmp/about.html')
executable 'xsltproc'
}

task copyPythonDirToTmp(type: Copy, dependsOn: setupDocsDirs) {
from 'python'
into 'build/tmp/python'
task generateDistLinks(type: Exec, dependsOn: ['cleanDocs']) {
executable 'sh'
args(['generate-dist-links.sh'])
args(hailVersion)
args(deployedSparkVersions)
}

task makeHailDocs(type: Exec, dependsOn: ['shadowJar', 'generateDistLinks', 'copyPythonDirToTmp']) {
workingDir 'build/tmp/python/hail/docs'
commandLine 'make', 'SPHINXOPTS="-tchecktutorial"', 'clean', 'html', 'doctest'
task makeDocs(type: Exec, dependsOn: ['shadowJar', 'generateDistLinks']) {
commandLine 'sh', 'python/hail/docs/makeDocs.sh'
environment SPARK_HOME: sparkHome
environment PYSPARK_SUBMIT_ARGS: '--conf spark.driver.extraClassPath=' + projectDir + '/build/libs/hail-all-spark.jar --conf spark.executor.extraClassPath=' + projectDir + '/build/libs/hail-all-spark.jar pyspark-shell'
environment PYTHONPATH: '' + projectDir + '/python:' + sparkHome + '/python:' + sparkHome + '/python/lib/py4j-' + py4jVersion + '-src.zip'
environment HAIL_VERSION: hailVersion
environment HAIL_RELEASE: hailVersion + '-' + gitHash
environment SPHINXOPTS: '-tchecktutorial'
}

task makeHailDocsNoTest(type: Exec, dependsOn: ['shadowJar', 'generateDistLinks', 'copyPythonDirToTmp']) {
workingDir 'build/tmp/python/hail/docs'
commandLine 'make', 'clean', 'html'
task makeDocsNoTest(type: Exec, dependsOn: ['shadowJar', 'generateDistLinks']) {
commandLine 'sh', 'python/hail/docs/makeDocs.sh'
environment SPARK_HOME: sparkHome
environment PYSPARK_SUBMIT_ARGS: '--conf spark.driver.extraClassPath=' + projectDir + '/build/libs/hail-all-spark.jar --conf spark.executor.extraClassPath=' + projectDir + '/build/libs/hail-all-spark.jar pyspark-shell'
environment PYTHONPATH: '' + projectDir + '/python:' + sparkHome + '/python:' + sparkHome + '/python/lib/py4j-' + py4jVersion + '-src.zip'
environment HAIL_VERSION: hailVersion
environment HAIL_RELEASE: hailVersion + '-' + gitHash
environment SPHINXOPTS: ''
}

task copyHailDocs(type: Exec, dependsOn: ['makeHailDocs', 'setupDocsDirs']) {
commandLine 'mv', 'build/tmp/python/hail/docs/_build/html', 'build/www/docs/' + hailVersion
}

task copyHailDocsNoTest(type: Exec, dependsOn: ['makeHailDocsNoTest', 'setupDocsDirs']) {
commandLine 'mv', 'build/tmp/python/hail/docs/_build/html', 'build/www/docs/' + hailVersion
}

task createWebsite(dependsOn: ['copyPDF', 'copyWebsiteContent', 'buildIndex', 'buildJobs', 'buildAbout', 'copyHailDocs'])

task createDocs(dependsOn: createWebsite)

task createDocsNoTest(dependsOn: ['copyPDF', 'copyWebsiteContent', 'buildIndex', 'buildJobs', 'buildAbout', 'copyHailDocsNoTest', 'copyPythonDirToTmp'])

task testDocs(dependsOn: ['copyPDF', 'copyWebsiteContent', 'buildIndex', 'buildJobs', 'buildAbout'])

task assemblePackage(type: Copy, dependsOn: ['createDocs' , 'shadowJar']) {
task assemblePackage(type: Copy, dependsOn: ['makeDocs' , 'shadowJar']) {
from('python') {
into 'python'
}
Expand Down
4 changes: 4 additions & 0 deletions generate-dist-links.sh
@@ -1,5 +1,9 @@
#!/bin/sh

set -e

mkdir -p build/tmp/python/hail/docs

TARGET=build/tmp/python/hail/docs/distLinks.rst
rm -f $TARGET

Expand Down
101 changes: 101 additions & 0 deletions python/hail/conftest.py
@@ -0,0 +1,101 @@
import pytest
import os
import shutil
import hail as hl
import hail.expr.aggregators as agg


@pytest.fixture(autouse=True)
def always_true(monkeypatch):
# FIXME: remove once test output matches docs
monkeypatch.setattr('doctest.OutputChecker.check_output', lambda a, b, c, d: True)
yield
monkeypatch.undo()


@pytest.fixture(scope="session", autouse=True)
def init(doctest_namespace):
# This gets run once per process -- must avoid race conditions
print("setting up doctest...")

olddir = os.getcwd()
os.chdir("docs/")

doctest_namespace['hl'] = hl
doctest_namespace['agg'] = agg

if not os.path.isdir("output/"):
try:
os.mkdir("output/")
except OSError:
pass

files = ["sample.vds", "sample.qc.vds", "sample.filtered.vds"]
for f in files:
if os.path.isdir(f):
shutil.rmtree(f)

ds = hl.read_matrix_table('data/example.vds')
doctest_namespace['ds'] = ds
doctest_namespace['dataset'] = ds
doctest_namespace['dataset2'] = ds.annotate_globals(global_field=5)
doctest_namespace['dataset_to_union_1'] = ds
doctest_namespace['dataset_to_union_2'] = ds

v_metadata = ds.rows().annotate_globals(global_field=5).annotate(consequence='SYN')
doctest_namespace['v_metadata'] = v_metadata

s_metadata = ds.cols().annotate(pop='AMR', is_case=False, sex='F')
doctest_namespace['s_metadata'] = s_metadata

# Table
table1 = hl.import_table('data/kt_example1.tsv', impute=True, key='ID')
table1 = table1.annotate_globals(global_field_1=5, global_field_2=10)
doctest_namespace['table1'] = table1
doctest_namespace['other_table'] = table1

table2 = hl.import_table('data/kt_example2.tsv', impute=True, key='ID')
doctest_namespace['table2'] = table2

table4 = hl.import_table('data/kt_example4.tsv', impute=True,
types={'B': hl.tstruct(B0=hl.tbool, B1=hl.tstr),
'D': hl.tstruct(cat=hl.tint32, dog=hl.tint32),
'E': hl.tstruct(A=hl.tint32, B=hl.tint32)})
doctest_namespace['table4'] = table4

people_table = hl.import_table('data/explode_example.tsv', delimiter='\\s+',
types={'Age': hl.tint32, 'Children': hl.tarray(hl.tstr)})
doctest_namespace['people_table'] = people_table

# TDT
doctest_namespace['tdt_dataset'] = hl.import_vcf('data/tdt_tiny.vcf')

ds2 = hl.variant_qc(ds)
doctest_namespace['ds2'] = ds2.select_rows(AF = ds2.variant_qc.AF)

# Expressions
doctest_namespace['names'] = hl.literal(['Alice', 'Bob', 'Charlie'])
doctest_namespace['a1'] = hl.literal([0, 1, 2, 3, 4, 5])
doctest_namespace['a2'] = hl.literal([1, -1, 1, -1, 1, -1])
doctest_namespace['t'] = hl.literal(True)
doctest_namespace['f'] = hl.literal(False)
doctest_namespace['na'] = hl.null(hl.tbool)
doctest_namespace['call'] = hl.call(0, 1, phased=False)
doctest_namespace['a'] = hl.literal([1, 2, 3, 4, 5])
doctest_namespace['d'] = hl.literal({'Alice': 43, 'Bob': 33, 'Charles': 44})
doctest_namespace['interval'] = hl.interval(3, 11)
doctest_namespace['locus_interval'] = hl.parse_locus_interval("1:53242-90543")
doctest_namespace['locus'] = hl.locus('1', 1034245)
doctest_namespace['x'] = hl.literal(3)
doctest_namespace['y'] = hl.literal(4.5)
doctest_namespace['s1'] = hl.literal({1, 2, 3})
doctest_namespace['s2'] = hl.literal({1, 3, 5})
doctest_namespace['s3'] = hl.literal({'Alice', 'Bob', 'Charlie'})
doctest_namespace['struct'] = hl.struct(a=5, b='Foo')
doctest_namespace['tup'] = hl.literal(("a", 1, [1, 2, 3]))
doctest_namespace['s'] = hl.literal('The quick brown fox')
doctest_namespace['interval2'] = hl.Interval(3, 6)

print("finished setting up doctest...")
yield
os.chdir(olddir)
3 changes: 2 additions & 1 deletion python/hail/dev-environment.yml
@@ -1,7 +1,6 @@
name: hail
dependencies:
- python=3.6
- nose
- Sphinx=1.7
- sphinxcontrib
- conda-forge::nbsphinx
Expand All @@ -13,6 +12,8 @@ dependencies:
- bokeh
- jupyter
- pip
- pytest
- pip:
- parsimonious
- ipykernel
- pytest-xdist
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we need pytest in the dependencies as well.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you can remove nose also

1 change: 0 additions & 1 deletion python/hail/docs/Makefile
Expand Up @@ -2,7 +2,6 @@
#

# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = python3 -msphinx -T
PAPER =
BUILDDIR = _build
Expand Down