Merge aed6a03 into a85202d

cosanlab · Jul 24, 2021 · e1afa6c · e1afa6c
2 parents a85202d + aed6a03
commit e1afa6c
Show file tree

Hide file tree

Showing 9 changed files with 210 additions and 65 deletions.
diff --git a/.github/workflows/auto_deploy_pypi_onrelease.yml b/.github/workflows/auto_deploy_pypi_onrelease.yml
@@ -0,0 +1,29 @@
+name: (Auto-On-Release) PyPI Deploy
+
+on: [release, workflow_dispatch]
+
+jobs:
+  deploy:
+    name: Build & deploy package
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout Code
+        uses: actions/checkout@v2
+
+      - name: Setup Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: "3.8"
+
+      - name: Pypi build
+        run: |
+          python3 -m pip install build --user
+
+      - name: Wheel and source build
+        run: |
+          python3 -m build --sdist --wheel --outdir dist/
+
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@master
+        with:
+          password: ${{ secrets.PYPI_API_TOKEN }}
diff --git a/.github/workflows/auto_tests.yml b/.github/workflows/auto_tests.yml
@@ -0,0 +1,70 @@
+name: Tests (Auto-Weekly)
+
+on:
+  schedule:
+    - cron: "0 0 * * 0"
+
+jobs:
+  # Job (1): Run testing in parallel against multiples OSs and Python versions
+  test:
+    if: "!contains(github.event.head_commit.message, 'skip ci')"
+    name: Test
+    runs-on: ${{ matrix.os }}
+    # Determines whether the entire workflow should pass/fail based on parallel jobs
+    continue-on-error: ${{ matrix.ok-fail }}
+    defaults:
+      # This ensures each step gets properly configured bash shell for conda commands to work
+      run:
+        shell: bash -l {0}
+    strategy:
+      fail-fast: false
+      matrix:
+        # OSs to test
+        os: [ubuntu-latest, macos-latest, windows-latest]
+        # Python versions to test
+        python-version: [3.7, 3.8]
+        # By default everything should pass for the workflow to pass
+        ok-fail: [false]
+        include:
+          # Rather than include 3.9 in the python versions, do it here so we can ignore failures on mac and windows with 3.9 (they have install issues)
+          - os: ubuntu-latest
+            python-version: 3.9
+            ok-fail: true
+          - os: macos-latest
+            python-version: 3.9
+            ok-fail: true
+          - os: windows-latest
+            python-version: 3.9
+            ok-fail: true
+    steps:
+      # Step up miniconda
+      - name: Download and setup Miniconda
+        uses: conda-incubator/setup-miniconda@059455a698430d8b68fa317268fa2e3da3492a98
+        with:
+          miniconda-version: "latest"
+          python-version: ${{ matrix.python-version }}
+
+      # Check out latest code on github
+      - name: Checkout Code
+        uses: actions/checkout@v2
+
+      # Install common sci-py packages via conda as well as testing packages and requirements
+      - name: Install Dependencies
+        run: |
+          conda activate test
+          conda env list
+          conda install -y numpy numba pandas scipy seaborn
+          conda install -y -c conda-forge pytest pytest-cov pytest-xdist pytest-sugar coveralls black
+          pip install . -r requirements.txt
+
+      # Check code formatting
+      - name: Check code formatting
+        run: |
+          black . --check --diff
+
+      # Actually run the tests with coverage
+      - name: Run Tests
+        run: |
+          conda activate test
+          conda env list
+          pytest --cov=emotioncf -rs -n auto
diff --git a/.github/workflows/conda_ci.yml → .github/workflows/tests_and_docs.yml b/.github/workflows/conda_ci.yml → .github/workflows/tests_and_docs.yml
@@ -1,4 +1,4 @@
-name: EmotionCF
+name: Tests & Docs
 
 on:
   push:
@@ -147,15 +147,3 @@ jobs:
         with:
           github_token: ${{ secrets.GITHUB_TOKEN }}
           publish_dir: ./site
-
-  # Job (4): Build package and upload to conda/pypi
-  deploy:
-    if: "!contains(github.event.head_commit.message, 'skip ci')"
-    name: Build & deploy package
-    runs-on: ubuntu-latest
-    needs: test
-    steps:
-      - name: Say Hi
-        shell: bash
-        run: |
-          echo "hello world. I havent been configured for package deployment yet!"
diff --git a/README.md b/README.md
@@ -1,61 +1,55 @@
-# EmotionCF
+# Emotion CF
 [![Build Status](https://github.com/cosanlab/emotionCF/workflows/EmotionCF/badge.svg)](https://github.com/cosanlab/emotionCF/actions?query=workflow%3AEmotionCF)
 [![Coverage Status](https://coveralls.io/repos/github/cosanlab/emotionCF/badge.svg?branch=master)](https://coveralls.io/github/cosanlab/emotionCF?branch=master)
 ![Python Versions](https://img.shields.io/badge/python-3.7%20%7C%203.8%20%7C%203.9-blue)
 ![Platforms](https://img.shields.io/badge/platform-linux%20%7C%20osx%20%7C%20win-blue)
 
-A python package to perform collaborative filtering on emotion datasets.  Compatible with Python 3 only.
+**A Python package for collaborative filtering on social datasets**
 
-## [Documentation Site](https://cosanlab.github.io/emotionCF)
+## Installation
 
-### Installation
+1. Pip (official releases): `pip install emotioncf`
+2. Github (bleeding edge): `pip install git+https://github.com/cosanlab/emotionCF.git`
 
-```
-pip install git+https://github.com/cosanlab/emotionCF.git
-```
-
----
-
-## Development
-
-To develop this package or its documentation locally you will need to install a few extra dependencies.
-
-### Installation
+## Getting started
 
-`pip install -r requirements-dev.txt`
+The best way to learn how to use the package is by checking out the 3 usage tutorials on working with [dense](examples/dense-data), [sparse](examples/sparse-data), and [time-series](examples/timeseries-data). For more detailed usage on specific function arguments and model parameters check out the API reference on the left. 
 
-### Testing
+### Quick Demo Usage
 
-`pytest -rs -n auto`
+```python  
+from emotioncf.models import NNMF_sgd
+from emotioncf.utils create_user_item_matrix, estimate_performance
 
-New tests can be added in `emotioncf/tests/`.
+# Assuming data is 3 column pandas df with 'User', 'Item', 'Rating'
+# convert it to a (possibly sparse) user x item matrix
+mat = create_user_item_matrix(df)
 
-### Formatting
+# Initialize a model
+model = NNMF_sgd(mat)
 
-Please format your code using black. If you've installed the development dependencies, then you can configure `git` to tell if you any new changes are not formatted by setting up a **pre-commit hook:**  
+# Fit
+model.fit()
 
-- `cd .git/hooks`
-- Create a new file called `pre-commit` with the following contents:
+# If data are time-series optionally fit model using dilation
+# to leverage auto-correlation and improve performance
+model.fit(dilate_by_nsamples=60)
 
-     ```
-     #!/bin/sh
-     black --check .
-     ```
-- Make sure the file is executable `chmod 775 pre-commit`
+# Visualize results
+model.plot_predictions()
 
-Now anytime you try to commit new changes, git will automatically run black before the commit and warn you if certain files need to be formatted.
-
-### Documentation
-
-Documentation is built with [mkdocs](https://www.mkdocs.org/) using the [mkdocs material theme](https://squidfunk.github.io/mkdocs-material/), [mkdocstrings](https://pawamoy.github.io/mkdocstrings/) extension, and [mkdocs-jupyter](https://github.com/danielfrg/mkdocs-jupyter) plugins.  
-
-
-#### Live server
+# Estimate algorithm performance using
+# Repeated refitting with random masking (dense data)
+# Or cross-validation (sparse data)
+group_results, user_results = estimate_performance(NNMF_sgd, mat)
+```
 
-After installation above, simply run `mkdocs serve` this the project root to start a hot-reloading server of the documentation at `http://localhost:8000`.  
 
-To alter the layout of the docs site adjust settings in `mkdocs.yml`. To add or edit pages simply create markdown files within the `docs/` folder.
+## Algorithms
 
-#### Deploying
+Currently supported algorithms include:  
 
-You can use the `mkdocs gh-deploy` command in order to build and push the documentation site to the [github-pages branch](https://github.com/cosanlab/emotionCF/tree/gh-pages) of this repo.
+- `Mean` - a baseline model
+- `KNN` - k-nearest neighbors
+- `NNMF_mult` - non-negative matrix factorization trained via multiplicative updating
+- `NNMF_sgd` - non-negative matrix factorization trained via stochastic gradient descent
diff --git a/docs/index.md b/docs/index.md
@@ -4,21 +4,46 @@
 ![Python Versions](https://img.shields.io/badge/python-3.7%20%7C%203.8%20%7C%203.9-blue)
 ![Platforms](https://img.shields.io/badge/platform-linux%20%7C%20osx%20%7C%20win-blue)
 
-## **A Python package for collaborative filtering on emotion datasets**
+**A Python package for collaborative filtering on social datasets**
 
 ## Installation
 
-```bash
-pip install git+https://github.com/cosanlab/emotionCF.git
-```  
+1. Pip (official releases): `pip install emotioncf`
+2. Github (bleeding edge): `pip install git+https://github.com/cosanlab/emotionCF.git`
 
 ## Getting started
 
-Checkout the [quick overview](examples/overview) for examples to help you get started.  
+The best way to learn how to use the package is by checking out the 3 usage tutorials on working with [dense](examples/dense-data), [sparse](examples/sparse-data), and [time-series](examples/timeseries-data). For more detailed usage on specific function arguments and model parameters check out the API reference on the left. 
 
-Or check out the API reference on the left to explore the details of specific models.
+### Quick Demo Usage
+
+```python  
+from emotioncf.models import NNMF_sgd
+from emotioncf.utils create_user_item_matrix, estimate_performance
+
+# Assuming data is 3 column pandas df with 'User', 'Item', 'Rating'
+# convert it to a (possibly sparse) user x item matrix
+mat = create_user_item_matrix(df)
+
+# Initialize a model
+model = NNMF_sgd(mat)
+
+# Fit
+model.fit()
+
+# If data are time-series optionally fit model using dilation
+# to leverage auto-correlation and improve performance
+model.fit(dilate_by_nsamples=60)
+
+# Visualize results
+model.plot_predictions()
+
+# Estimate algorithm performance using
+# Repeated refitting with random masking (dense data)
+# Or cross-validation (sparse data)
+group_results, user_results = estimate_performance(NNMF_sgd, mat)
+```
 
-A unique feature of this toolbox is its support for [working with time-series data](timeseries.md).
 
 ## Algorithms
 

diff --git a/docs/releasenotes.md b/docs/releasenotes.md
@@ -1,15 +1,20 @@
 # Release Notes
 
+## 0.1.0
+- **Official pypi public release**
+- Package rename
+
 ## 0.0.4
 - standardize codebase with `black`
-- complete API rewrite
+- **complete API rewrite**
 - new `estimate_performance` function
 - all new tests with `pytest` fixtures
 - new docs site with `mkdocs`
 
 ## 0.0.3
 - Fix dilation and convolution issues
 - Update tests
+- Drop support for Python 2
 
 ## 0.0.2
 - Fixed pandas `.apply` bug

diff --git a/emotioncf/tests/conftest.py b/emotioncf/tests/conftest.py
@@ -1,5 +1,38 @@
 """
-Define pytest fixtures, i.e. reusable test initializations or parameters that can be used to automatically generated a grid of tests by test functions.
+Define pytest fixtures, i.e. reusable test initializations or parameters that can be used to automatically generated a grid of tests by test functions. Brief explanation on how they work and how to write new fixtures + tests:
+
+Each function below is passed in as a argument to a test function in one of the test_*.py files. The value of that argument == whatever the function definition returns in this file. For example:
+
+# From test_models.py
+test_init_and_dilate(init, mask, n_mask_items) <- These arguments are functions defined in *this* file.
+
+init() <- returns an initialized model or skips a test
+So within test_init_and_dilate(), init == model instance
+
+At the same time, fixtures in this file can make use of *other* fixtures passed in as arguments, e.g.
+mask(request, simulate_wide_data) <- simulate_wide_data() is defined below and returns a dataframe available inside of mask()
+
+Arguments to fixtures that are not other fixtures, such as `request` in mask(), are special arguments that can create test grids based on an iterable of parameter values. These values are defined using the @fixture decorator. So `request` in mask() is defined with 2 parameter values: None and "masked". This means that wherever mask() is invoked in other tests, it will be called 2x: once with None and once with "masked".
+
+This is used for example in init() where for some tests a model instance is created with masking and for other tests without masking. Then whatever tests make use of init(), such as test_init_and_dilate() will be run at least twice because mask() takes a `request` with 2 parameter values.
+
+While a bit complicated at first, this make it easy to create testing grids based on the dependencies between fixtures. Here's a simplified example dependency graph (ignoring other fixtures):
+
+Test 1:
+simulate_wide_data() -> returns df -> mask()
+request.param == None -> mask()
+mask() -> init()
+init() -> test_init_and_dilate() -> tests non-masked model initialization
+
+Test 2:
+simulate_wide_data() -> returns df -> mask()
+request.param == "masked" -> mask()
+mask() -> init()
+init() -> test_init_and_dilate() -> tests masked model initialization
+
+In reality there is an entire grid of tests because init() accepts several other fixtures which also have their own parameterizations, e.g. n_mask_items() creates 4 tests with None, 0.1, 0.5, 0.9. Combining this with the 2 parameterizations for mask() results in *8 unique tests*.
+
+Hopefully this provides a relatively clear example of how to do exhaustive testing by defining parameter grids that are automatically created by pytest based on these definitions.
 """
 
 import pytest

diff --git a/emotioncf/version.py b/emotioncf/version.py
@@ -1 +1 @@
-__version__ = "0.0.4"
+__version__ = "0.1.0"
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -9,6 +9,8 @@ markdown_extensions:
   - admonition
   - codehilite
   - pymdownx.highlight
+  - pymdownx.superfences
+  - pymdownx.inlinehilite
 nav:
   - Home: index.md
   - Tutorials:
@@ -18,13 +20,12 @@ nav:
   - Contributing to Development: development.md
   - Release Notes: releasenotes.md
   - API Reference:
-      - emotioncf.utils: api/utils.md
-      - emotioncf.base: api/base.md
       - emotioncf.models:
           - Mean: api/mean.md
           - KNN: api/knn.md
           - NNMF_mult: api/nmf_m.md
           - NNMF_sgd: api/nmf_s.md
+      - emotioncf.utils: api/utils.md
 plugins:
   - search
   - mkdocs-jupyter