diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000..fffed01 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,3 @@ +[run] +branch = True +include = code/*, data/* diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..fe26e60 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,31 @@ +# vim ft=yaml +language: python + +env: + global: + - DEPENDS="requirements.txt" + - COVERAGE=1 + +python: + - 3.4 + - 2.7 + +install: + - source tools/travis_tools.sh + - virtualenv --python=python venv + - source venv/bin/activate + - python --version # just to check + - retry pip install nose # always + - sudo apt-get install libblas-dev liblapack-dev libatlas3gf-base + - wheelhouse_pip_install ${DEPENDS} + - if [ "${COVERAGE}" == "1" ]; then + pip install coverage; + pip install coveralls; + fi + +script: + - make coverage + + +after_success: + - if [ "${COVERAGE}" == "1" ]; then coveralls; fi diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..9057f37 --- /dev/null +++ b/Makefile @@ -0,0 +1,15 @@ +.PHONY: all clean coverage test + +all: clean + +clean: + find . -name "*.so" -o -name "*.pyc" -o -name "*.pyx.md5" | xargs rm -f + +coverage: + nosetests code/utils data --with-coverage --cover-package=data --cover-package=utils + +test: + nosetests code/utils data + +verbose: + nosetests -v code/utils data diff --git a/code/Makefile b/code/Makefile new file mode 100644 index 0000000..75b5c5d --- /dev/null +++ b/code/Makefile @@ -0,0 +1,5 @@ +test: + nosetests utils + +coverage: + nosetests utils --with-coverage --cover-package=utils diff --git a/code/utils/__init__.py b/code/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/code/utils/pearson.py b/code/utils/pearson.py new file mode 100644 index 0000000..0e8eac5 --- /dev/null +++ b/code/utils/pearson.py @@ -0,0 +1,81 @@ +""" +This will be your new module with the Pearson correlation function + +First try to fill in the "pearson_1d" function. + +When you are done with "pearson_1d", you should be able to run the following +command (from the day5 directory) and see no errors or failures:: + + nosetests test_pearson_1d.py + +Then, if you are feeling brave, fill in the "pearson_2d" function. Test with:: + + nosetests test_pearson_2d.py +""" +# Python 3 compatibility +from __future__ import print_function, division + +import numpy as np + + +def pearson_1d(x, y): + """ Pearson product-moment correlation of vectors `x` and `y` + + Parameters + ---------- + x : array shape (N,) + One-dimensional array to correlate with `y` + y : array shape (N,) + One dimensional array to correlate with `x` + + Returns + ------- + r_xy : scalar + Pearson product-moment correlation of vectors `x` and `y`. + """ + # Mean-center x -> mc_x + mc_x = x - np.mean(x) + # Mean-center y -> mc_y + mc_y = y - np.mean(y) + # a : Get sum of products of mc_x, mc_y + a = mc_x.dot(mc_y) + # b : Get sum of products of mc_x on mc_x + b = mc_x.dot(mc_x) + # c : Get sum of products of mc_y on mc_y + c = mc_y.dot(mc_y) + # return a / (sqrt(b) * sqrt(c)) + return a / (np.sqrt(b) * np.sqrt(c)) + + +def pearson_2d(x, Y): + """ Pearson product-moment correlation of vectors `x` and array `Y` + + Parameters + ---------- + x : array shape (N,) + One-dimensional array to correlate with every column of `Y` + Y : array shape (N, P) + 2D array where we correlate each column of `Y` with `x`. + + Returns + ------- + r_xy : array shape (P,) + Pearson product-moment correlation of vectors `x` and the columns of + `Y`, with one correlation value for every column of `Y`. + """ + # Mean-center x -> mc_x + mc_x = x - np.mean(x) + # Mean-center every column of Y -> mc_Y + mean_Y = np.mean(Y, axis=0) + mean_Y_expanded = np.tile(mean_Y, (len(x), 1)) + mc_Y = Y - mean_Y_expanded + # (Hint: np.tile, or (advanced, not yet covered) numpy broadcasting) + # a : Get sum of products of mc_x and every column of mc_Y + a = mc_x.dot(mc_Y) + # b : Get sum of products of mc_x on mc_x + b = mc_x.dot(mc_x) + # c : Get sum of products of every column of mc_Y[:, i] on itself + c = np.sum(mc_Y ** 2, axis=0) + # return a / (sqrt(b) * sqrt(c)) + return a / (np.sqrt(b) * np.sqrt(c)) + diff --git a/code/utils/tests/__init__.py b/code/utils/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/code/utils/tests/test_pearson_1d.py b/code/utils/tests/test_pearson_1d.py new file mode 100644 index 0000000..4eeb384 --- /dev/null +++ b/code/utils/tests/test_pearson_1d.py @@ -0,0 +1,46 @@ +""" +Test Pearson module, pearson_1d function + +Run with:: + + nosetests test_pearson_1d.py + +This is a test module. + +It is designed to be run the with the "nose" testing package (via the +"nosetests" script. + +Nose will look for any functions with "test" in their names, and run them. +Nose reports any errors, or any failures. + +A failure is where one of the test conditions run with an "assert" command +fails. For example, if I did: + + assert_almost_equal(1, 2) + +then this would "fail". + +So we use the tests to check that the results of our function are (still) as we +expect. +""" +# Python 3 compatibility +from __future__ import absolute_import, division, print_function + +import numpy as np + +from .. import pearson + +from numpy.testing import assert_almost_equal + + +def test_pearson_1d(): + # Test pearson_1d routine + x = np.random.rand(22) + y = np.random.rand(22) + # Does routine give same answer as np.corrcoef? + expected = np.corrcoef(x, y)[0, 1] + actual = pearson.pearson_1d(x, y) + # Did you, gentle user, forget to return the value? + if actual is None: + raise RuntimeError("function returned None") + assert_almost_equal(expected, actual) diff --git a/code/utils/tests/test_pearson_2d.py b/code/utils/tests/test_pearson_2d.py new file mode 100644 index 0000000..05e4cce --- /dev/null +++ b/code/utils/tests/test_pearson_2d.py @@ -0,0 +1,28 @@ +""" +Test Pearson module, pearson_2d function + +Run with:: + + nosetests test_pearson_2d.py +""" +# Python 3 compatibility +from __future__ import print_function, division + +import numpy as np + +from .. import pearson + +from numpy.testing import assert_almost_equal + + +def test_pearson_2d(): + # Test pearson_2d routine + x = np.random.rand(22) + Y = np.random.normal(size=(22, 12)) + # Does routine give same answers as np.corrcoef? + expected = np.corrcoef(x, Y.T)[0, 1:] + actual = pearson.pearson_2d(x, Y) + # Did you, gentle user, forget to return the value? + if actual is None: + raise RuntimeError("function returned None") + assert_almost_equal(expected, actual) diff --git a/data/Makefile b/data/Makefile new file mode 100644 index 0000000..0f71063 --- /dev/null +++ b/data/Makefile @@ -0,0 +1,5 @@ +data: + wget http://www.jarrodmillman.com/rcsds/_downloads/ds107_sub001_highres.nii + +validate: + python data.py diff --git a/data/__init__.py b/data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/data/data.py b/data/data.py new file mode 100644 index 0000000..4c85305 --- /dev/null +++ b/data/data.py @@ -0,0 +1,34 @@ +from __future__ import print_function, division + +import hashlib +import os + + +d = {'ds107_sub001_highres.nii': "fd733636ae8abe8f0ffbfadedd23896c"} + + +def generate_file_md5(filename, blocksize=2**20): + m = hashlib.md5() + with open(filename, "rb") as f: + while True: + buf = f.read(blocksize) + if not buf: + break + m.update(buf) + return m.hexdigest() + + +def check_hashes(d): + all_good = True + for k, v in d.items(): + digest = generate_file_md5(k) + if v == digest: + print("The file {0} has the correct hash.".format(k)) + else: + print("ERROR: The file {0} has the WRONG hash!".format(k)) + all_good = False + return all_good + + +if __name__ == "__main__": + check_hashes(d) diff --git a/data/tests/__init__.py b/data/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/data/tests/test_data.py b/data/tests/test_data.py new file mode 100644 index 0000000..7d3cd92 --- /dev/null +++ b/data/tests/test_data.py @@ -0,0 +1,18 @@ +from __future__ import absolute_import, division, print_function + +import tempfile + +from .. import data + + +def test_check_hashes(): + tf = tempfile.NamedTemporaryFile(delete=False) + fname = tf.name + with tempfile.NamedTemporaryFile() as temp: + temp.write(b'Some data') + temp.flush() + fname = temp.name + d = {fname: "5b82f8bf4df2bfb0e66ccaa7306fd024"} + assert data.check_hashes(d) + d = {fname: "4b82f8bf4df2bfb0e66ccaa7306fd024"} + assert not data.check_hashes(d) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..3f12604 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +nose==1.3.7 +numpy==1.9.2 +scipy==0.16.0 +matplotlib==1.4.3 diff --git a/slides/.gitignore b/slides/.gitignore index e69de29..bdbeee1 100644 --- a/slides/.gitignore +++ b/slides/.gitignore @@ -0,0 +1,2 @@ +progress.pdf +final.pdf diff --git a/slides/Makefile b/slides/Makefile new file mode 100644 index 0000000..6f463ac --- /dev/null +++ b/slides/Makefile @@ -0,0 +1,9 @@ +.PHONY: all clean + +all: clean progress.pdf + +clean: + rm -f progress.pdf + +progress.pdf: progress.md + pandoc -t beamer -s progress.md -o progress.pdf diff --git a/slides/README.md b/slides/README.md new file mode 100644 index 0000000..b8f0722 --- /dev/null +++ b/slides/README.md @@ -0,0 +1,15 @@ +# Slides + +* http://pandoc.org/demo/example9/producing-slide-shows-with-pandoc.html + +## Progress report presentation + +Use the above guide (link) to learn how to use pandoc with beamer to produce +slides. I've made a template `progress.md` for you to modify. I've also +added a `Makefile` with some recipes to build your slides and delete the +generated files. + +## Final report presentation + +Create a new file `final.md` and add appropriate entries to the `Makefile`. + diff --git a/slides/progress.md b/slides/progress.md new file mode 100644 index 0000000..e4848b6 --- /dev/null +++ b/slides/progress.md @@ -0,0 +1,36 @@ +% Project Aleph Progress Report +% Ross Barnowski, Matthew Brett, Jarrod Millman +% November 12, 2015 + +# Background + +## The Paper + +- from OpenFMRI.org +- ds001 + +## The Data + +- 12 subjects +- 2 conditions per subject + +## The Method + +- linear regression + +# Initial work + +## EDA + +- downloaded data +- simple plots, summary statistics + +# Next steps + +## Preprocessing / Validation + +- PCA + +## Statistical Analysis + +- linear model diff --git a/tools/travis_tools.sh b/tools/travis_tools.sh new file mode 100644 index 0000000..2b11bb3 --- /dev/null +++ b/tools/travis_tools.sh @@ -0,0 +1,25 @@ +# Tools for working with travis-ci +export WHEELHOUSE="http://travis-wheels.scikit-image.org/" + +retry () { + # https://gist.github.com/fungusakafungus/1026804 + local retry_max=5 + local count=$retry_max + while [ $count -gt 0 ]; do + "$@" && break + count=$(($count - 1)) + sleep 1 + done + + [ $count -eq 0 ] && { + echo "Retry failed [$retry_max]: $@" >&2 + return 1 + } + return 0 +} + + +wheelhouse_pip_install() { + # Install pip requirements via travis wheelhouse + retry pip install --timeout=60 --no-index --find-links $WHEELHOUSE -r $@ +}