Skip to content

Commit

Permalink
rename fscLVM to slalom
Browse files Browse the repository at this point in the history
  • Loading branch information
flophys committed Oct 24, 2017
1 parent b085a5e commit 974e968
Show file tree
Hide file tree
Showing 21 changed files with 120 additions and 119 deletions.
4 changes: 2 additions & 2 deletions .travis.yml
Expand Up @@ -33,7 +33,7 @@ script:
- python setup.py sdist
- pip install dist/`ls dist | grep -i -E '\.(gz)$' | head -1`
- pushd /
- python -c "import fscLVM; print(fscLVM.__version__)"
- python -c "import slalom; print(slalom.__version__)"
- popd
notifications:
email: false
Expand All @@ -47,4 +47,4 @@ deploy:
secure: buXXT5VTU6j0ArBCXLvkLAfEP/qIppuc4SRXQTSf6nkxok5fwmVmvP9CSJQrpgpJG6soL6rZhcowACjboN/YZBvlYLR9KoBu371dqL1AvIw2XFRNUa7EDOCdQpYlt3NviPcVnqF5ZEIfULxTGDlKnhRBU1olZCM/JJCexGcT4NSPGuZLXjMsjFd3uR8WqgvpCfJPXZzSaOqVIslnkIb0ZvTB9Z5sYMDe8xgOXnO5C1+aMeyIQnthxmXPXEQUtntg8YCUenuPfyuUf2gNXrx1W8uqIsmX3ivhR2N5i+s5JniBiTqenfVUrvgDiOolRF7vPA8jU+6ZCZCy0hTdPI2QuAChkb0Uw9CdcPZyL1hS61A3KxJ3AKRAkqCiBlbSH2v8gqjcVGtGL4CfbS9Im0BoUoGvhWqLREzk5Mjnw6Ed2w8QhmwU2cTEY8qVsmZpz3hci1YIHVHEW6W0moV7rj3uW/dfceTbWtA4+yh5IOOu0CIVkgjKHL+3QBSDynqL+EuJQhafn7tI+ow9R3msEVYMway7d1XcJAabIv/0SVQu97NpHYd7KoD2kk5HdyvJPsIu4ugUi0zcVe5EnxG7qVqEAdvcG6MyMRkRLXq/8vjqg7BDVlZeORc/jkTZFsc/6qH/eYX3G/MoIJTDv609SM4mb8pyVbt0hAeJrQqPE8SyNvM=
on:
distributions: sdist bdist_wheel
repo: PMBio/f-scLVM
repo: bioFAM/slalom
2 changes: 1 addition & 1 deletion MANIFEST.in
@@ -1,4 +1,4 @@
include LICENSE
include README.md
include setup.cfg
recursive-include fscLVM *.py
recursive-include slalom *.py
8 changes: 4 additions & 4 deletions doc/Makefile
Expand Up @@ -91,9 +91,9 @@ qthelp:
@echo
@echo "Build finished; now you can run "qcollectiongenerator" with the" \
".qhcp project file in $(BUILDDIR)/qthelp, like this:"
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/fscLVM.qhcp"
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/slalom.qhcp"
@echo "To view the help file:"
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/fscLVM.qhc"
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/slalom.qhc"

.PHONY: applehelp
applehelp:
Expand All @@ -110,8 +110,8 @@ devhelp:
@echo
@echo "Build finished."
@echo "To view the help file:"
@echo "# mkdir -p $$HOME/.local/share/devhelp/fscLVM"
@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/fscLVM"
@echo "# mkdir -p $$HOME/.local/share/devhelp/slalom"
@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/slalom"
@echo "# devhelp"

.PHONY: epub
Expand Down
18 changes: 9 additions & 9 deletions doc/conf.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
#
# fscLVM documentation build configuration file, created by
# slalom documentation build configuration file, created by
# sphinx-quickstart on Tue Nov 1 14:32:47 2016.
#
# This file is execfile()d with the current directory set to its
Expand All @@ -20,7 +20,7 @@
import sys
sys.path.insert(0, os.path.abspath('py/'))

from fscLVM import __version__
from slalom import __version__

# -- General configuration ------------------------------------------------

Expand Down Expand Up @@ -60,7 +60,7 @@
master_doc = 'index'

# General information about the project.
project = u'fscLVM'
project = u'slalom'
copyright = u'2016-2017, Florian Buettner and Oliver Stegle'
author = u'Florian Buettner'

Expand Down Expand Up @@ -146,7 +146,7 @@
# The name for this set of Sphinx documents.
# "<project> v<release> documentation" by default.
#
# html_title = u'fscLVM v1.0.0'
# html_title = u'slalom v1.0.0'

# A shorter title for the navigation bar. Default is the same as html_title.
#
Expand Down Expand Up @@ -246,7 +246,7 @@
# html_search_scorer = 'scorer.js'

# Output file base name for HTML help builder.
htmlhelp_basename = 'fscLVMdoc'
htmlhelp_basename = 'slalomdoc'

# -- Options for LaTeX output ---------------------------------------------

Expand All @@ -272,7 +272,7 @@
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, 'fscLVM.tex', u'fscLVM Documentation',
(master_doc, 'slalom.tex', u'slalom Documentation',
u'Florian Buettner', 'manual'),
]

Expand Down Expand Up @@ -314,7 +314,7 @@
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
(master_doc, 'fsclvm', u'fscLVM Documentation',
(master_doc, 'slalom', u'slalom Documentation',
[author], 1)
]

Expand All @@ -329,8 +329,8 @@
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(master_doc, 'fscLVM', u'fscLVM Documentation',
author, 'fscLVM', 'One line description of project.',
(master_doc, 'slalom', u'slalom Documentation',
author, 'slalom', 'One line description of project.',
'Miscellaneous'),
]

Expand Down
44 changes: 22 additions & 22 deletions doc/index.rst
@@ -1,9 +1,9 @@
fscLVM documentation
slalom documentation
======================

f-scLVM is a scalable modelling framework for single-cell RNA-seq data that can be used to dissect and model single-cell transcriptome heterogeneity, thereby allowing to identify biological drivers of cell-to-cell variability and model confounding factors.
slalom is a scalable modelling framework for single-cell RNA-seq data that can be used to dissect and model single-cell transcriptome heterogeneity, thereby allowing to identify biological drivers of cell-to-cell variability and model confounding factors.

Software by Florian Buettner and Oliver Stegle. f-scLVM is explained in detail in the accompanying publication [1].
Software by Florian Buettner and Oliver Stegle. slalom is explained in detail in the accompanying publication [1].


[1] Buettner, F.,Pratanwanich, N., Marioni, J., Stegle, O. Scalable latent-factor models applied to single-cell RNA-seq data separate biological drivers from confounding effects. Submitted
Expand All @@ -12,11 +12,11 @@ Software by Florian Buettner and Oliver Stegle. f-scLVM is explained in detail i
Installation
----------

f-scLVM requires Python 2.7 or newer with
slalom requires Python 2.7 or newer with

* scipy, h5py, numpy, matplotlib, scikit-learn, re

f-scLVM can be installed via pip with `pip install fscLVM`.
slalom can be installed via pip with `pip install slalom`.
For best results, we recommend the Anaconda python distribution (https://anaconda.org).

Quickstart
Expand All @@ -25,18 +25,18 @@ Quickstart
******
Input
******
f-scLVM requires two input files, a gene expression file and an annotation file. The gene expression file is a text file containing the normalised, log-transformed gene expression matrix, with every row corresponding to a cell. Column names should be gene identifiers matching those in the annotation file (i.e. if gene symbols are used in the annotation file, column names should also be gene symbols). Row names are optional and can eg be a known covariate which is used for plotting.
slalom requires two input files, a gene expression file and an annotation file. The gene expression file is a text file containing the normalised, log-transformed gene expression matrix, with every row corresponding to a cell. Column names should be gene identifiers matching those in the annotation file (i.e. if gene symbols are used in the annotation file, column names should also be gene symbols). Row names are optional and can eg be a known covariate which is used for plotting.
The annotation file is a text file with every row containing the name of a gene set, followed by the gene identifiers annotated to that gene set. We recommend using annotations such as those published in the REACTOME database or the Molecular signature database (MSigDB) and provide an annotation file containing annotations from the RECTOME database. The license of MSigDB does not permit redistribution of the raw annotation files. To use MSigDB annotations, please register at http://software.broadinstitute.org/gsea/msigdb, download the hallmark gene sets (gene symbols) and place the file in data folder.
Both text files can then be loaded using the ``load_text`` function.

NB: f-scLVM works best on a subset of highly variable genes; these can be identified using a variance filter based on a mean-variance trend fitted using spike-in transcripts or endogenous genes. A step-by-step workflow on low-level processing of scRNA-seq data (including gene filtering) can be found here: https://f1000research.com/articles/5-2122/v2
NB: slalom works best on a subset of highly variable genes; these can be identified using a variance filter based on a mean-variance trend fitted using spike-in transcripts or endogenous genes. A step-by-step workflow on low-level processing of scRNA-seq data (including gene filtering) can be found here: https://f1000research.com/articles/5-2122/v2


********************************
Model initialisation and fitting
********************************

An f-scLVM model can be initialised using the ``initFA`` convenience function. Arguments can be used to specify options, incuding number of unannotated factors (`nHidden`), minimum number of genes in a pathway (`minGenes`), whether to use the fast option by pruning genes (`pruneGenes`), noise model (`noise`) and the data directory (`data_dir`). Once a model is initialised, it can be fit using the ``train`` method.
An slalom model can be initialised using the ``initFA`` convenience function. Arguments can be used to specify options, incuding number of unannotated factors (`nHidden`), minimum number of genes in a pathway (`minGenes`), whether to use the fast option by pruning genes (`pruneGenes`), noise model (`noise`) and the data directory (`data_dir`). Once a model is initialised, it can be fit using the ``train`` method.

********************************
Diagnostics, plotting and saving.
Expand All @@ -47,12 +47,12 @@ The ``printDiagnostics`` function can be used to print diagnositcs based on the
Tutorial
--------

All steps required to run f-scLVM are illustrated in a jupyter notebook that can be viewed `interactively <http://nbviewer.jupyter.org/github/pmbio/f-scLVM/blob/master/ipynb/f-scLVM.ipynb>`_.
All steps required to run slalom are illustrated in a jupyter notebook that can be viewed `interactively <http://nbviewer.jupyter.org/github/pmbio/slalom/blob/master/ipynb/slalom.ipynb>`_.


The factorial single-cell latent variable model (f-scLVM)
The factorial single-cell latent variable model (slalom)
---------------------------------------------------------
A detailed statistical description of the f-scLVM model can be found in teh accompanyin publicaiton [1]. Here, a brief summary is given. f-scLVM is based on a variant of matrix factorization, decomposing the observed gene matrix into a sum of sum of contributions from A annotated factors, whose inference is guided by pathway gene sets, and H additional unannotated factors:
A detailed statistical description of the slalom model can be found in teh accompanyin publicaiton [1]. Here, a brief summary is given. slalom is based on a variant of matrix factorization, decomposing the observed gene matrix into a sum of sum of contributions from A annotated factors, whose inference is guided by pathway gene sets, and H additional unannotated factors:

.. math::
Expand All @@ -65,31 +65,31 @@ For the statistical derivation in the accompanying publication and the implement
\mathbf{Y} = \mathbf{X}\mathbf{W}^T +\mathbf{\psi} .
We employ two levels of regularization on the parts of the weight matrix :math:`\mathbf{W}` corresponding to annotated factors. First, gene sets are used to guide a spike-and-slab prior on the rows of :math:`\mathbf{W}` thereby confining the inferred weights to the set of genes annotated in the pathway database. To this end :math:`\mathbf{W}` is modelled as elementwise product of a Bernoulli random variable :math:`\mathbf{Z}`, indicating whether a gene is active for a given factor and a Gaussian random variable :math:`\widetilde{\mathbf{W}}`, quantifying the corresponding effect size (for details see [1]). A second level of regularization is then used to achieve sparseness on the level of factors, allowing the model to deactivate factors that are not needed to explain variation in the data; this is achieved using an automatic relevance determination (ARD) prior (i.e. factor-specific Gamma prior on the precision of the weights). The inverse of this ARD prior (:math:`1/\alpha_k`) can be interpreted as a measure of the regulatory impact of factor :math:`k` and corresponds to the expected variance explained by this factor, for the subset of genes with a regulatory effect. It is therefore also referred to as relevance parameter. The fscLVM software implements an efficent deterministic approximate Bayesian inference scheme based on variational methods, allowing for the inference of :math:`\mathbf{X}`, :math:`\mathbf{Z}`, :math:`\widetilde{\mathbf{W}}`, :math:`\mathbf{\alpha}`, :math:`\mathbf{\psi}` and other parameters.
We employ two levels of regularization on the parts of the weight matrix :math:`\mathbf{W}` corresponding to annotated factors. First, gene sets are used to guide a spike-and-slab prior on the rows of :math:`\mathbf{W}` thereby confining the inferred weights to the set of genes annotated in the pathway database. To this end :math:`\mathbf{W}` is modelled as elementwise product of a Bernoulli random variable :math:`\mathbf{Z}`, indicating whether a gene is active for a given factor and a Gaussian random variable :math:`\widetilde{\mathbf{W}}`, quantifying the corresponding effect size (for details see [1]). A second level of regularization is then used to achieve sparseness on the level of factors, allowing the model to deactivate factors that are not needed to explain variation in the data; this is achieved using an automatic relevance determination (ARD) prior (i.e. factor-specific Gamma prior on the precision of the weights). The inverse of this ARD prior (:math:`1/\alpha_k`) can be interpreted as a measure of the regulatory impact of factor :math:`k` and corresponds to the expected variance explained by this factor, for the subset of genes with a regulatory effect. It is therefore also referred to as relevance parameter. The slalom software implements an efficent deterministic approximate Bayesian inference scheme based on variational methods, allowing for the inference of :math:`\mathbf{X}`, :math:`\mathbf{Z}`, :math:`\widetilde{\mathbf{W}}`, :math:`\mathbf{\alpha}`, :math:`\mathbf{\psi}` and other parameters.
Loading data and model initialisation
-------------------------------------

.. autofunction:: fscLVM.load_txt
.. autofunction:: fscLVM.load_hdf5
.. autofunction:: fscLVM.initFA
.. autofunction:: fscLVM.preTrain
.. autofunction:: slalom.load_txt
.. autofunction:: slalom.load_hdf5
.. autofunction:: slalom.initFA
.. autofunction:: slalom.preTrain



Model fitting
-------------------------------------
.. autoclass:: fscLVM.core.CSparseFA
.. autoclass:: slalom.core.CSparseFA
:members:


Plotting and saving results
----------------------------
.. autofunction:: fscLVM.plotTerms
.. autofunction:: fscLVM.plotFactors
.. autofunction:: fscLVM.plotRelevance
.. autofunction:: fscLVM.saveFA
.. autofunction:: fscLVM.dumpFA
.. autofunction:: slalom.plotTerms
.. autofunction:: slalom.plotFactors
.. autofunction:: slalom.plotRelevance
.. autofunction:: slalom.saveFA
.. autofunction:: slalom.dumpFA

Contents:

Expand Down
4 changes: 2 additions & 2 deletions doc/make.bat
Expand Up @@ -129,9 +129,9 @@ if "%1" == "qthelp" (
echo.
echo.Build finished; now you can run "qcollectiongenerator" with the ^
.qhcp project file in %BUILDDIR%/qthelp, like this:
echo.^> qcollectiongenerator %BUILDDIR%\qthelp\fscLVM.qhcp
echo.^> qcollectiongenerator %BUILDDIR%\qthelp\slalom.qhcp
echo.To view the help file:
echo.^> assistant -collectionFile %BUILDDIR%\qthelp\fscLVM.ghc
echo.^> assistant -collectionFile %BUILDDIR%\qthelp\slalom.ghc
goto end
)

Expand Down
26 changes: 13 additions & 13 deletions ipynb/f-scLVM.ipynb
Expand Up @@ -4,14 +4,14 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# fscLVM tutorial "
"# slalom tutorial "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"In this notebook we illustrate how f-scLVM can be used to identify biological drivers on the mESC cell cycle staged dataset."
"In this notebook we illustrate how slalom can be used to identify biological drivers on the mESC cell cycle staged dataset."
]
},
{
Expand All @@ -38,9 +38,9 @@
],
"source": [
"import os\n",
"import fscLVM\n",
"import slalom\n",
"import pdb\n",
"from fscLVM import plotFactors, plotRelevance, plotLoadings, saveFA, dumpFA\n",
"from slalom import plotFactors, plotRelevance, plotLoadings, saveFA, dumpFA\n",
"%pylab inline\n",
"\n",
"#specify where the hdf5 file is\n",
Expand All @@ -53,7 +53,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"f-scLVM expects an expression file, typically with log transformed gene expression values as well as a gene set annotation. These data can be provided as single hdf5 file, which can be generated using separate R scripts (in the R folder). Alternatively, the expression matrix and the annotation can be loaded as text files in python. \n",
"slalom expects an expression file, typically with log transformed gene expression values as well as a gene set annotation. These data can be provided as single hdf5 file, which can be generated using separate R scripts (in the R folder). Alternatively, the expression matrix and the annotation can be loaded as text files in python. \n",
"\n",
"\n",
"\n",
Expand Down Expand Up @@ -94,11 +94,11 @@
"####\n",
"#Option 1: load a pre-defined hdf5 file\n",
"#We provide an (optional) hdf file with the required data - this was generated using\n",
"#the R script write_fscLVM.R in the R folder\n",
"#the R script write_slalom.R in the R folder\n",
"if 0:\n",
" annoDB = 'MSigDB'\n",
" dFile = os.path.join(data_dir,'Buettneretal2015.hdf5')\n",
" data = fscLVM.load_hdf5(dFile, anno=annoDB)\n",
" data = slalom.load_hdf5(dFile, anno=annoDB)\n",
" \n",
"####\n",
"\n",
Expand All @@ -116,12 +116,12 @@
"\n",
" #dataFile: csv file with log expresison values\n",
" dataFile = os.path.join(data_dir,'Buettneretal.csv.gz') # note that the first column (row names) contains the cell cycle stage in numeric form\n",
" data = fscLVM.utils.load_txt(dataFile=dataFile,annoFiles=annoFile,annoDBs=annoDB)\n",
" data = slalom.utils.load_txt(dataFile=dataFile,annoFiles=annoFile,annoDBs=annoDB)\n",
"####\n",
"\n",
"###alternatively the data can be loaded from the provided hdf5 file\n",
"#dFile = 'Buettneretal2015.hdf5'\n",
"#data = fscLVM.load_hdf5(dFile, data_dir=data_dir)\n",
"#data = slalom.load_hdf5(dFile, data_dir=data_dir)\n",
"\n",
"\n",
"#print statistics for the loaded dataset\n",
Expand All @@ -135,7 +135,7 @@
"collapsed": false
},
"source": [
"# Initializing the f-scLVM model"
"# Initializing the slalom model"
]
},
{
Expand All @@ -149,9 +149,9 @@
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/flo/projects/Auto_Bionf/scLVM2/fscLVM/core.py:392: RuntimeWarning: divide by zero encountered in true_divide\n",
"/Users/flo/projects/Auto_Bionf/scLVM2/slalom/core.py:392: RuntimeWarning: divide by zero encountered in true_divide\n",
" logPi = SP.log(self.Pi[:,m]/(1-self.Pi[:,m]))\n",
"/Users/flo/projects/Auto_Bionf/scLVM2/fscLVM/core.py:394: RuntimeWarning: divide by zero encountered in true_divide\n",
"/Users/flo/projects/Auto_Bionf/scLVM2/slalom/core.py:394: RuntimeWarning: divide by zero encountered in true_divide\n",
" logPi = SP.log(self.Pi[:,m]/(1-self.Pi[:,m]))\n"
]
}
Expand All @@ -168,7 +168,7 @@
"gene_ids = data['genes']\n",
"\n",
"#initialize FA instance, here using a Gaussian noise model and fitting 3 dense hidden factors\n",
"FA = fscLVM.initFA(Y, terms,I, gene_ids=gene_ids, noise='gauss', nHidden=3, minGenes=15)"
"FA = slalom.initFA(Y, terms,I, gene_ids=gene_ids, noise='gauss', nHidden=3, minGenes=15)"
]
},
{
Expand Down

0 comments on commit 974e968

Please sign in to comment.