From 2421de8819ac33eaff52a27fe5387fa7c2ca7cbf Mon Sep 17 00:00:00 2001 From: Arik Fraimovich Date: Mon, 20 Jul 2015 23:06:30 +0300 Subject: [PATCH] Add Sphinx based documentation to the project. --- .gitignore | 1 + docs/Makefile | 192 +++++++++++++++++++ docs/conf.py | 120 ++++++++++++ docs/datasources.rst | 245 ++++++++++++++++++++++++ docs/dev.rst | 11 ++ docs/dev/query_execution.rst | 94 +++++++++ docs/dev/results_format.rst | 30 +++ docs/dev/vagrant.rst | 46 +++++ docs/index.rst | 68 +++++++ docs/misc.rst | 10 + docs/misc/google_developers_project.rst | 50 +++++ docs/misc/ssl.rst | 59 ++++++ docs/requirements.txt | 3 + docs/setup.rst | 159 +++++++++++++++ docs/upgrade.rst | 34 ++++ docs/usage.rst | 12 ++ docs/usage/elasticsearch_querying.rst | 48 +++++ docs/usage/maintenance.rst | 94 +++++++++ docs/usage/mongodb_querying.rst | 74 +++++++ docs/usage/users.rst | 39 ++++ 20 files changed, 1389 insertions(+) create mode 100644 docs/Makefile create mode 100644 docs/conf.py create mode 100644 docs/datasources.rst create mode 100644 docs/dev.rst create mode 100644 docs/dev/query_execution.rst create mode 100644 docs/dev/results_format.rst create mode 100644 docs/dev/vagrant.rst create mode 100644 docs/index.rst create mode 100644 docs/misc.rst create mode 100644 docs/misc/google_developers_project.rst create mode 100644 docs/misc/ssl.rst create mode 100644 docs/requirements.txt create mode 100644 docs/setup.rst create mode 100644 docs/upgrade.rst create mode 100644 docs/usage.rst create mode 100644 docs/usage/elasticsearch_querying.rst create mode 100644 docs/usage/maintenance.rst create mode 100644 docs/usage/mongodb_querying.rst create mode 100644 docs/usage/users.rst diff --git a/.gitignore b/.gitignore index af25d29b571..fda51661a6a 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ celerybeat-schedule* .#* \#*# *~ +_build # Vagrant related .vagrant diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 00000000000..ea58f760fc7 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,192 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +PAPER = +BUILDDIR = _build + +# User-friendly check for sphinx-build +ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) +$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) +endif + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . +# the i18n builder cannot share the environment and doctrees with the others +I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . + +.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext + +help: + @echo "Please use \`make ' where is one of" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " singlehtml to make a single large HTML file" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " applehelp to make an Apple Help Book" + @echo " devhelp to make HTML files and a Devhelp project" + @echo " epub to make an epub" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " latexpdf to make LaTeX files and run them through pdflatex" + @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" + @echo " text to make text files" + @echo " man to make manual pages" + @echo " texinfo to make Texinfo files" + @echo " info to make Texinfo files and run them through makeinfo" + @echo " gettext to make PO message catalogs" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " xml to make Docutils-native XML files" + @echo " pseudoxml to make pseudoxml-XML files for display purposes" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + @echo " coverage to run coverage check of the documentation (if enabled)" + +clean: + rm -rf $(BUILDDIR)/* + +html: + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + +dirhtml: + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." + +singlehtml: + $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml + @echo + @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." + +pickle: + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +json: + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json + @echo + @echo "Build finished; now you can process the JSON files." + +htmlhelp: + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in $(BUILDDIR)/htmlhelp." + +qthelp: + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp + @echo + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ + ".qhcp project file in $(BUILDDIR)/qthelp, like this:" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/redash.qhcp" + @echo "To view the help file:" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/redash.qhc" + +applehelp: + $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp + @echo + @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." + @echo "N.B. You won't be able to view it unless you put it in" \ + "~/Library/Documentation/Help or install it in your application" \ + "bundle." + +devhelp: + $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp + @echo + @echo "Build finished." + @echo "To view the help file:" + @echo "# mkdir -p $$HOME/.local/share/devhelp/redash" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/redash" + @echo "# devhelp" + +epub: + $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub + @echo + @echo "Build finished. The epub file is in $(BUILDDIR)/epub." + +latex: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." + @echo "Run \`make' in that directory to run these through (pdf)latex" \ + "(use \`make latexpdf' here to do that automatically)." + +latexpdf: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through pdflatex..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +latexpdfja: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through platex and dvipdfmx..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +text: + $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text + @echo + @echo "Build finished. The text files are in $(BUILDDIR)/text." + +man: + $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man + @echo + @echo "Build finished. The manual pages are in $(BUILDDIR)/man." + +texinfo: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo + @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." + @echo "Run \`make' in that directory to run these through makeinfo" \ + "(use \`make info' here to do that automatically)." + +info: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo "Running Texinfo files through makeinfo..." + make -C $(BUILDDIR)/texinfo info + @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." + +gettext: + $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale + @echo + @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." + +changes: + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes + @echo + @echo "The overview file is in $(BUILDDIR)/changes." + +linkcheck: + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in $(BUILDDIR)/linkcheck/output.txt." + +doctest: + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in $(BUILDDIR)/doctest/output.txt." + +coverage: + $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage + @echo "Testing of coverage in the sources finished, look at the " \ + "results in $(BUILDDIR)/coverage/python.txt." + +xml: + $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml + @echo + @echo "Build finished. The XML files are in $(BUILDDIR)/xml." + +pseudoxml: + $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml + @echo + @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 00000000000..df0b17e6a54 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,120 @@ +# -*- coding: utf-8 -*- +# +# re:dash documentation build configuration file, created by +# sphinx-quickstart on Mon Jul 20 22:40:24 2015. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys +import os +import shlex + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# source_suffix = ['.rst', '.md'] +source_suffix = '.rst' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u're:dash' +copyright = u'2015, EverythingMe' +author = u'EverythingMe' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = '0.6.4' +# The full version, including alpha/beta/rc tags. +release = '0.6.4' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = None + +exclude_patterns = ['_build'] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = False + +# -- Options for HTML output ---------------------------------------------- +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +import sphinx_rtd_theme +html_theme = "sphinx_rtd_theme" +html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# If true, links to the reST sources are added to the pages. +html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +html_show_sphinx = False + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +html_show_copyright = False + +# Output file base name for HTML help builder. +htmlhelp_basename = 'redashdoc' + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + (master_doc, 'redash', u're:dash Documentation', + [author], 1) +] + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + (master_doc, 'redash', u're:dash Documentation', + author, 'redash', 'One line description of project.', + 'Miscellaneous'), +] + diff --git a/docs/datasources.rst b/docs/datasources.rst new file mode 100644 index 00000000000..01cc9db748d --- /dev/null +++ b/docs/datasources.rst @@ -0,0 +1,245 @@ +Supported Data Sources +###################### + +re:dash supports several types of data sources (see below the full list) +and their management is done with the CLI (``manage.py``): + +Create new data source +====================== + +.. code:: bash + + $ cd /opt/redash/current + $ sudo -u redash bin/run ./manage.py ds new -n {name} -t {type} -o {options} + +If you omit any of the options (-n, -t, -o) it will show a prompt asking +for it. Options is a JSON string with the connection parameters. Unless +you're doing some sort of automation, it's probably easier to leave it +empty and fill out the prompt. + +See below for the different supported data sources types and the +relevant options string format. + +Listing existing data sources +============================= + +.. code:: bash + + $ sudo -u redash bin/run ./manage.py ds list + +Supported data sources +====================== + +PostgreSQL / Redshift +--------------------- + +- **Type**: pg +- **Options**: + + - User (user) + - Password (password) + - Host (host) + - Port (port) + - Database name (dbname) (mandatory) + +- **Options string format (for v0.5 and older)**: "user= password= + host= port=5439 dbname=" + +MySQL +----- + +- **Type**: mysql +- **Options**: + + - User (user) + - Password (passwd) + - Host (host) + - Port (port) + - Database name (db) (mandatory) + +- **Options string format (for v0.5 and older)**: + "Server=localhost;User=;Pwd=;Database=" + +Note that you need to install the MySQLDb package as it is not included +in the ``requirements.txt`` file. + +Graphite +-------- + +- **Type**: graphite +- **Options**: + + - Url (url) (mandatory) + - User (username) + - Password (password) + - Verify SSL ceritficate (verify) + +- **Options string format**: '{"url": + "https://graphite.yourcompany.com", "auth": ["user", "password"], + "verify": true}' + +Google BigQuery +--------------- + +- **Type**: bigquery +- **Options**: + + - Service Account (serviceAccount) (mandatory) + - Project ID (projectId) (mandatory) + - Private Key filename (privateKey) (mandatory) + +- **Options string format (for v0.5 and older)**: {"serviceAccount" : + "43242343247-fjdfakljr3r2@developer.gserviceaccount.com", + "privateKey" : "/somewhere/23fjkfjdsfj21312-privatekey.p12", + "projectId" : "myproject-123" } + +Notes: + +1. To obtain BigQuery credentials follow the guidelines at: + https://developers.google.com/bigquery/authorization#service-accounts +2. You need to install the ``google-api-python-client``, + ``oauth2client`` and ``pyopenssl`` packages (PyOpenSSL requires + ``libffi-dev`` and ``libssl-dev`` packages), as they are not included + in the ``requirements.txt`` file. + +Google Spreadsheets +------------------- + +(supported from v0.6.4) + +- **Type**: google\_spreadsheets +- **Options**: + + - Credentials filename (credentialsFilePath) (mandatory) + +Notes: + +1. To obtain Google ServiceAccount credentials follow the guidelines at: + https://developers.google.com/console/help/new/#serviceaccounts (save + the JSON version of the credentials file) +2. To be able to load the spreadsheet in re:dash - share your it with + your ServiceAccount's email (it can be found in the credentials json + file, for example + 43242343247-fjdfakljr3r2@developer.gserviceaccount.com) Note: all the + service account details can be seen inside the json file you should + obtain following step #1 +3. The query format is "DOC\_UUID\|SHEET\_NUM" (for example + "kjsdfhkjh4rsEFSDFEWR232jkddsfh\|0") +4. You (might) need to install the ``gspread``, ``oauth2client`` and + ``dateutil`` packages as they are not included in the + ``requirements.txt`` file. + +MongoDB +------- + +- **Type**: mongo +- **Options**: + + - Connection String (connectionString) (mandatory) + - Database name (dbName) + - Replica set name (replicaSetName) + +- **Options string format (for v0.5 and older)**: { "connectionString" + : "mongodb://user:password@localhost:27017/mydb", "dbName" : "mydb" } + +For ReplicaSet databases use the following connection string: \* +**Options string format**: { "connectionString" : +"mongodb://user:pasword@server1:27017,server2:27017/mydb", "dbName" : +"mydb", "replicaSetName" : "myreplicaSet" } + +Notes: + +1. You need to install ``pymongo``, as it is not included in the + ``requirements.txt`` file. + +URL +--- + +A URL based data source which requests URLs that conforms to the +supported :doc:`results JSON +format `. + +Very useful in situations where you want to expose the data without +connecting directly to the database. + +The query itself inside re:dash will simply contain the URL to be +executed (i.e. http://myserver/path/myquery) + +- **Type**: url +- **Options**: + + - Url (url) + +- **Options string format (optional) (for v0.5 and older)**: + http://myserver/path/ + +Notes: + +1. All URLs must return the supported :doc:`results JSON + format `. +2. If the Options string is set, only URLs that are part of the supplied + path can be executed using this data source. Not setting the options + path allows any URL to be executed as long as it returns the + supported :doc:`results JSON + format `. + +Script +------ + +Allows executing any executable script residing on the server as long as +its standard output conforms to the supported :doc:`results JSON +format `. + +This integration is useful in situations where you need more than just a +query and requires some processing to happen. + +Once the path to scripts is configured in the datasource the query needs +to contain the file name of the script as well as any command line +parameters the script requires (i.e. myscript.py param1 param2 +--param3=value) + +- **Type**: script +- **Options**: + + - Scripts Path (path) (mandatory) + +- **Options string format (for v0.5 and older)**: /path/to/scripts/ + +Notes: + +1. You MUST set a path to execute the scripts, otherwise the data source + will not work. +2. All scripts must be executable, otherwise results won't return +3. The script data source does not allow relative paths in the form of + "../". You may use a relative sub path such as "./mydir/myscript". +4. All scripts must output to the standard output the supported :doc:`results + JSON format ` and + only that, otherwise the data source will not be able to load the + data. + +Python +------ + +Execute other queries, manipulate and compute with Python code +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The Python data source allows running Python code in a secure and safe +environment. It won't allow writing files to disk, importing modules +that were not pre-approved in the configuration etc. + +One of the benefits of using the Python data source is its ability to +execute queries (or saved queries) which you can store in a variable and +then manipulate/transform/merge with other data and queries. + +You can import data analysis libraries such as Pandas, NumPy and SciPy. + +This saved the trouble of having outside scripts do the synthesis of +data from multiple sources to create a single data set that can then be +used in dashboards. + +- **Type**: Python +- **Options**: + + - Allowed Modules in a comma separated list (optional). **NOTE:** + You MUST make sure these modules are installed on the machine + running the Celery workers diff --git a/docs/dev.rst b/docs/dev.rst new file mode 100644 index 00000000000..a05d1578822 --- /dev/null +++ b/docs/dev.rst @@ -0,0 +1,11 @@ +Developer Information +===================== + +.. toctree:: + :maxdepth: 2 + :glob: + + dev/vagrant + dev/* + + diff --git a/docs/dev/query_execution.rst b/docs/dev/query_execution.rst new file mode 100644 index 00000000000..25c65c5d1b9 --- /dev/null +++ b/docs/dev/query_execution.rst @@ -0,0 +1,94 @@ +Query Execution Model +##################### + +Introduction +============ + +The first datasource which was used with re:dash was Redshift. Because +we had billions of records in Redshift, and some queries were costly to +re-run, from the get go there was the idea of caching query results in +re:dash. + +This was to relieve stress from the Redshift cluster and also to improve +user experience. + +How queries get executed and cached in re:dash? +=============================================== + +Server +------ + +To make sure each query is executed only once at any giving time, we +translate the query to a ``query hash``, using the following code: + +.. code:: python + + COMMENTS_REGEX = re.compile("/\*.*?\*/") + + def gen_query_hash(sql): + sql = COMMENTS_REGEX.sub("", sql) + sql = "".join(sql.split()).lower() + return hashlib.md5(sql.encode('utf-8')).hexdigest() + +When query execution is done, the result gets stored to +``query_results`` table. Also we check for all queries in the +``queries`` table that have the same query hash and update their +reference to the query result we just saved +(`code `__). + +Client +------ + +The client (UI) will execute queries in two scenarios: + +1. (automatically) When opening a query page of a query that doesn't + have a result yet. +2. (manually) When the user clicks on "Execute". + +In each case the client does a POST request to ``/api/query_results`` +with the following parameters: ``query`` (the query text), +``data_source_id`` (data source to execute the query with) and ``ttl``. + +When loading a cached result, ``ttl`` will be the one set to the query +(if it was set). This is a relic from previous versions, and I'm not +sure if it's really used anymore, as usually we will fetch query result +using its id. + +When loading a non cached result, ``ttl`` will be 0 which will "force" +the server to execute the query. + +As a response to ``/api/query_results`` the server will send either the +query results (in case of a cached query) or job id of the currently +executing query. When job id received the client will start polling on +this id, until a query result received (this is encapsulated in +``Query`` and ``QueryResult`` services). + +Ideas on how to implement query parameters +========================================== + +Client side only implementation +------------------------------- + +(This was actually implemented in. See pull request `#363 `__ for details.) + +The basic idea of how to implement parametized queries is to treat the +query as a template and merge it with parameters taken from query string +or UI (or both). + +When the caching facility isn't required (with queries that return in a +reasonable time frame) the implementation can be completly client side +and the backend can be "blind" to the parameters - it just receives the +final query to execute and returns result. + +As one improvement over this, we can let the UI/user specify the TTL +value when making the request to ``/api/query_results``, in which case +caching will be availble too, while not having to make the server aware +of the parameters. + +Hybrid +------ + +Another option, will be to store the list of possible parameters for a +query, with their default/optional values. In such case, the server can +prefetch all the options and cache them to provide faster results to the +client. diff --git a/docs/dev/results_format.rst b/docs/dev/results_format.rst new file mode 100644 index 00000000000..73b51fd32dc --- /dev/null +++ b/docs/dev/results_format.rst @@ -0,0 +1,30 @@ +Data Source Results Format +========================== + +All data sources in re:dash return the following results in JSON format: + +.. code:: javascript + + { + "columns" : [ + { + // Required: a unique identifier of the column name in this result + "name" : "COLUMN_NAME", + // Required: friendly name of the column that will appear in the results + "friendly_name" : "FRIENDLY_NAME", + // Optional: If not specified sort might not work well. + // Supported types: integer, float, boolean, string (default), datetime (ISO-8601 text format) + "type" : "VALUE_TYPE" + }, + ... + ], + "rows" : [ + { + // name is the column name as it appears in the columns above. + // VALUE is a valid JSON value. For dates its an ISO-8601 string. + "name" : VALUE, + "name2" : VALUE2 + }, + ... + ] + } diff --git a/docs/dev/vagrant.rst b/docs/dev/vagrant.rst new file mode 100644 index 00000000000..39282f7f745 --- /dev/null +++ b/docs/dev/vagrant.rst @@ -0,0 +1,46 @@ +Setting up development environment (using Vagrant) +================================================== + +To simplify contribution there is a `Vagrant +box `__ available with all +the needed software to run re:dash for development (use it only for +development, for demo purposes there is +`redash/demo `__ box and the +AWS/GCE images). + +To get started with this box: + +1. Make sure you have recent version of + `Vagrant `__ installed. +2. Clone the re:dash repository: + ``git clone https://github.com/EverythingMe/redash.git``. +3. Change dir into the repository (``cd redash``) and run run + ``vagrant up``. This might take some time the first time you run it, + as it downloads the Vagrant virtual box. +4. Once Vagrant is ready, ssh into the instance (``vagrant ssh``), and + change dir to ``/opt/redash/current`` -- this is where your local + repository copy synced to. +5. Copy ``.env`` file into this directory (``cp ../.env ./``). +6. From ``/opt/redash/current/rd_ui`` run ``bower install`` to install + frontend packages. This can be done from your host machine as well, + if you have bower installed. +7. Go back to ``/opt/redash/current`` and install python dependencies + ``sudo pip install -r requirements.txt`` +8. Apply migrations + + :: + + PYTHONPATH=. bin/run python migrations/0001_allow_delete_query.py + PYTHONPATH=. bin/run python migrations/0002_fix_timestamp_fields.py + PYTHONPATH=. bin/run python migrations/0003_update_data_source_config.py + PYTHONPATH=. bin/run python migrations/0004_allow_null_in_event_user.py + PYTHONPATH=. bin/run python migrations/0005_add_updated_at.py + PYTHONPATH=. bin/run python migrations/0006_queries_last_edit_by.py + PYTHONPATH=. bin/run python migrations/0007_add_schedule_to_queries.py + PYTHONPATH=. bin/run python migrations/0008_make_ds_name_unique.py + PYTHONPATH=. bin/run python migrations/0009_add_api_key_to_user.py + +9. Start the server and background workers with + ``bin/run honcho start -f Procfile.dev``. +10. Now the server should be available on your host on port 9001 and you + can login with username admin and password admin. diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 00000000000..8712c0f9379 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,68 @@ +.. image:: http://redash.io/static/img/redash_logo.png + :width: 200px + +Open Source Data Collaboration and Visualization Platform +=================================== + +**re:dash** is our take on freeing the data within our company in a way that will better fit our culture and usage patterns. + +Prior to **re:dash**, we tried to use traditional BI suites and discovered a set of bloated, technically challenged and slow tools/flows. What we were looking for was a more hacker'ish way to look at data, so we built one. + +**re:dash** was built to allow fast and easy access to billions of records, that we process and collect using Amazon Redshift ("petabyte scale data warehouse" that "speaks" PostgreSQL). +Today **_re:dash_** has support for querying multiple databases, including: Redshift, Google BigQuery,Google Spreadsheets, PostgreSQL, MySQL, Graphite and custom scripts. + +Features +######## + +1. **Query Editor**: think of `JS Fiddle`_ for SQL queries. It's your way to share data in the organization in an open way, by sharing both the dataset and the query that generated it. This way everyone can peer review not only the resulting dataset but also the process that generated it. +2. **Visualizations**: once you have a dataset, you can create different visualizations out of it. Currently it supports charts, pivot table and cohorts. +3. **Dashboards**: combine several visualizations into a single dashboard. + +Demo +#### + +.. figure:: https://raw.github.com/EverythingMe/redash/screenshots/screenshots.gif + :alt: Screenshots + +You can try out the demo instance: `http://demo.redash.io`_ (login with any Google account). + +.. _http://demo.redash.io: http://demo.redash.io +.. _JS Fiddle: http://jsfiddle.net + +Getting Started +############### + +:doc:`Setting up re:dash instance ` (includes links to ready made AWS/GCE images). + +Getting Help +############ + +* Source: https://github.com/everythingme/redash +* Issues: https://github.com/everythingme/redash/issues +* Mailing List: https://groups.google.com/forum/#!forum/redash-users +* Gitter (chat): https://gitter.im/EverythingMe/redash +* Contact Arik, the maintainer directly: arik@everything.me. + +Contributing +###### +**re:dash** has a growing community and contributions are always welcome (particularly documentation). To contribute, fork the project on GitHub and send a pull request. + +.. toctree:: + :maxdepth: 2 + + setup + upgrade + datasources + usage + dev + misc + + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` + diff --git a/docs/misc.rst b/docs/misc.rst new file mode 100644 index 00000000000..415b0184115 --- /dev/null +++ b/docs/misc.rst @@ -0,0 +1,10 @@ +Miscellaneous +============= + +.. toctree:: + :maxdepth: 2 + :glob: + + misc/* + + diff --git a/docs/misc/google_developers_project.rst b/docs/misc/google_developers_project.rst new file mode 100644 index 00000000000..86668914eb4 --- /dev/null +++ b/docs/misc/google_developers_project.rst @@ -0,0 +1,50 @@ +How To: Create a Google Developers Project +========================================== + +1. Go to the `Google Developers + Console `__. +2. Select a project, or create a new one by clicking Create Project: + + 1. In the Project name field, type in a name for your project. + 2. In the Project ID field, optionally type in a project ID for your + project or use the one that the console has created for you. This + ID must be unique world-wide. + 3. Click the **Create** button and wait for the project to be + created. + 4. Click on the new project name in the list to start editing the + project. + +3. In the left sidebar, select the **APIs** item below "APIs & auth". A + list of Google web services appears. +4. Find the **Google+ API** service and set its status to **ON**—notice + that this action moves the service to the top of the list. +5. In the sidebar under "APIs & auth", select **Consent screen**. + + - Choose an Email Address and specify a Product Name. + +6. In the sidebar under "APIs & auth", select **Credentials**. +7. Click **Create a new Client ID** — a dialog box appears. + + - In the **Application type** section of the dialog, select **Web + application**. + - In the **Authorized JavaScript origins** field, enter the origin + for your app. You can enter multiple origins to use with multiple + re:dash instance. Wildcards are not allowed. In the example below, + we assume your re:dash instance address is *redash.example.com*: + + :: + + http://redash.example.com + https://redash.example.com + + - In the Authorized redirect URI field, enter the redirect URI + callback: + + :: + + http://redash.example.com/oauth/google_callback + + - Click the ``Create Client ID`` button. + +8. In the resulting **Client ID for web application** section, copy the + **Client ID** and **Client secret** to your ``.env`` file. diff --git a/docs/misc/ssl.rst b/docs/misc/ssl.rst new file mode 100644 index 00000000000..6f189663779 --- /dev/null +++ b/docs/misc/ssl.rst @@ -0,0 +1,59 @@ +SSL (HTTPS) Setup +================= + +If you used the provided images or the bootstrap script, to start using +SSL with your instance you need to: + +1. Update the nginx config file (``/etc/nginx/sites-available/redash``) + with SSL configuration (see below an example). Make sure to upload + the certificate to the server, and set the paths correctly in the new + config. + +2. Open port 443 in your security group (if using AWS or GCE). + +.. code:: nginx + + upstream redash_servers { + server 127.0.0.1:5000; + } + + server { + listen 80; + + # Allow accessing /ping without https. Useful when placing behind load balancer. + location /ping { + proxy_set_header Host $http_host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_pass http://redash_servers; + } + + location / { + # Enforce SSL. + return 301 https://$host$request_uri; + } + } + + server { + listen 443 ssl; + + # Make sure to set paths to your certificate .pem and .key files. + ssl on; + ssl_certificate /path-to/cert.pem; # or crt + ssl_certificate_key /path-to/cert.key; + + access_log /var/log/nginx/redash.access.log; + + gzip on; + gzip_types *; + gzip_proxied any; + + location / { + proxy_set_header Host $http_host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_pass http://redash_servers; + proxy_redirect off; + } + } diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 00000000000..04c857701aa --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,3 @@ +sphinx +sphinx-autobuild +sphinx_rtd_theme diff --git a/docs/setup.rst b/docs/setup.rst new file mode 100644 index 00000000000..8bb119bdd1b --- /dev/null +++ b/docs/setup.rst @@ -0,0 +1,159 @@ +Setting up re:dash instance +########################### + +The `provisioning +script `__ +works on Ubuntu 12.04, Ubuntu 14.04 and Debian Wheezy. This script +installs all needed dependencies and creates basic setup. + +To ease the process, there are also images for AWS and Google Compute +Cloud. These images created with the same provision script using Packer. + +Create an instance +================== + +Google Compute Engine +--------------------- + +First, you need to add the images to your account: + +.. code:: bash + + $ gcloud compute images add redash-063-b906 gs://redash-images/redash.0.6.3.b906.tar.gz + +Next you need to launch an instance using this image (n1-standard-1 +instance type is recommended). If you plan using re:dash with BigQuery, +you can use a dedicated image which comes with BigQuery preconfigured +(using instance permissions): + +.. code:: bash + + $ gcloud compute images add redash-063-b906-bq gs://redash-images/redash.0.6.3.b906-bq.tar.gz + +Note that you need to launch this instance with BigQuery access: + +.. code:: bash + + $ gcloud compute instances create --image redash-060-b812-bq --scopes storage-ro bigquery + +(the same can be done from the web interface, just make sure to enable +BigQuery access) + +Now proceed to `"Setup" <#setup>`__. + +AWS +--- + +Launch the instance with from the pre-baked AMI (for small deployments +t2.micro should be enough): + +- us-east-1: + `ami-47b4612c `__ +- us-west-1: + `ami-a72edde3 `__ +- us-west-2: + `ami-f9d6d5c9 `__ +- eu-central-1: + `ami-72eed46f `__ +- eu-west-1: + `ami-5a135c2d `__ +- sa-east-1: + `ami-2b78f436 `__ +- ap-northeast-1: + `ami-0a55fd0a `__ +- ap-southeast-2: + `ami-9f793ea5 `__ +- ap-southeast-1: + `ami-12545740 `__ + +Now proceed to `"Setup" <#setup>`__. + +Other +----- + +Download the provision script and run it on your machine. Note that: + +1. You need to run the script as root. +2. It was tested only on Ubuntu 12.04, Ubuntu 14.04 and Debian Wheezy. + +Setup +===== + +Once you created the instance with either the image or the script, you +should have a running re:dash instance with everything you need to get +started. You can even login to it with the user "admin" (password: +"admin"). But to make it useful, there are a few more steps that you +need to manually do to complete the setup: + +First ssh to your instance and change directory to ``/opt/redash``. If +you're using the GCE image, switch to root (``sudo su``). + +Users & Google Authentication setup +----------------------------------- + +Most of the settings you need to edit are in the ``/opt/redash/.env`` +file. + +1. Update the cookie secret (important! otherwise anyone can sign new + cookies and impersonate users): change "veryverysecret" in the line: + ``export REDASH_COOKIE_SECRET=veryverysecret`` to something else (you + can use ``pwgen 32 -1`` to generate random string). + +2. By default we create an admin user with the password "admin". You + need to change the password: + + - ``cd /opt/redash/current`` + - ``sudo -u redash bin/run ./manage.py users password admin {new password}`` + +3. If you want to use Google OAuth to authenticate users, you need to + create a Google Developers project (see :doc:`instructions `) + and then add the needed configuration in the ``.env`` file: + +.. code:: + + export REDASH_GOOGLE_CLIENT_ID="" + export REDASH_GOOGLE_CLIENT_SECRET="" + export REDASH_GOOGLE_APPS_DOMAIN="" + + + +``REDASH_GOOGLE_CLIENT_ID`` and ``REDASH_GOOGLE_CLIENT_SECRET`` are the values you get after registering with Google. ``READASH_GOOGLE_APPS_DOMAIN`` is used in case you want to limit access to single Google apps domain (*if you leave it empty anyone with a Google account can access your instance*). + +4. Restart the web server to apply the configuration changes: + ``sudo supervisorctl restart redash_server``. + +5. Once you have Google OAuth enabled, you can login using your Google + Apps account. If you want to grant admin permissions to some users, + you can do it with the ``users grant_admin`` command: + ``sudo -u redash bin/run ./manage.py users grant_admin {email}``. + +6. If you don't use Google OAuth or just need username/password logins, + you can create additional users using the CLI (see :doc:`documentation `). + +Datasources +----------- + +To make re:dash truly useful, you need to setup your data sources in it. +Currently all data sources management is done with the CLI. + +See +:doc:`documentation ` +for the different options. Your instance comes ready with dependencies +needed to setup supported sources. + +Follow issue +`#193 `__ to know +when UI was implemented to manage data sources. + +How to upgrade? +--------------- + +It's recommended to upgrade once in a while your re:dash instance to +benefit from bug fixes and new features. See :doc:`here ` for full upgrade +instructions (including Fabric script). + +Notes +===== + +- If this is a production setup, you should enforce HTTPS and make sure + you set the cookie secret (see :doc:`instructions `). diff --git a/docs/upgrade.rst b/docs/upgrade.rst new file mode 100644 index 00000000000..5df1b2e718b --- /dev/null +++ b/docs/upgrade.rst @@ -0,0 +1,34 @@ +How to Upgrade +############## + +It's recommended to upgrade your re:dash instance once there are new +releases, to benefit from new features and bug fixes. The upgrade +process is relatively simple, and assuming you used one of the base +images we provide, you can just use the +`Fabric `__ script provided here: +https://gist.github.com/arikfr/440d1403b4aeb76ebaf8. + +How to run the Fabric script +============================ + +1. Install Fabric: ``pip install fabric requests`` (needed only once) +2. Download the ``fabfile.py`` from the gist. +3. Run the script: + ``fab -H{your re:dash host} -u{the ssh user for this host} deploy_latest_release`` + +What the Fabric script does +=========================== + +Even if you didn't use the image, it's very likely you can reuse most of +this script with small modifications. What this script does is: + +1. Find the URL of the latest release tarball (from `GitHub releases + page `__). +2. Download it. +3. Create new directory for this version (for example: + ``/opt/redash/redash.0.5.0.b685``). +4. Unpack that (``tar -C {dir} -xvf {tarball path}``). +5. Link ``/opt/redash/.env`` file into this directory. +6. Apply any new migrations. +7. Link ``/opt/redash/current`` to new version. +8. Restart web server and celery workers. diff --git a/docs/usage.rst b/docs/usage.rst new file mode 100644 index 00000000000..5aac023acee --- /dev/null +++ b/docs/usage.rst @@ -0,0 +1,12 @@ +Usage +===== + +.. toctree:: + :maxdepth: 2 + :glob: + + usage/maintenance.rst + usage/users.rst + usage/* + + diff --git a/docs/usage/elasticsearch_querying.rst b/docs/usage/elasticsearch_querying.rst new file mode 100644 index 00000000000..437e5b293d9 --- /dev/null +++ b/docs/usage/elasticsearch_querying.rst @@ -0,0 +1,48 @@ +ElasticSearch: Querying +####################### + +ElasticSearch currently supports only simple Lucene style queries (like +Kibana but without the aggregation). + +Full blown JSON based ElasticSearch queries (including aggregations) +will be added later. + +Simple query example: +===================== + +- Query the index named "twitter" +- Filter by "user:kimchy" +- Return the fields: "@timestamp", "tweet" and "user" +- Return up to 15 results +- Sort by @timestamp ascending + +.. code:: json + + { + "index" : "twitter", + "query" : "user:kimchy", + "fields" : ["@timestamp", "tweet", "user"], + "size" : 15, + "sort" : "@timestamp:asc" + } + +Simple query on a logstash ElasticSearch instance: +================================================== + +- Query the index named "logstash-2015.04.\*" (in this case its all of + April 2015) +- Filter by type:events AND eventName:UserUpgrade AND channel:selfserve +- Return fields: "@timestamp", "userId", "channel", "utm\_source", + "utm\_medium", "utm\_campaign", "utm\_content" +- Return up to 250 results +- Sort by @timestamp ascending + +.. code:: json + + { + "index" : "logstash-2015.04.*", + "query" : "type:events AND eventName:UserUpgrade AND channel:selfserve", + "fields" : ["@timestamp", "userId", "channel", "utm_source", "utm_medium", "utm_campaign", "utm_content"], + "size" : 250, + "sort" : "@timestamp:asc" + } diff --git a/docs/usage/maintenance.rst b/docs/usage/maintenance.rst new file mode 100644 index 00000000000..fad18d06b27 --- /dev/null +++ b/docs/usage/maintenance.rst @@ -0,0 +1,94 @@ +Ongoing Maintanence and Basic Operations +######################################## + +Configuration and logs +====================== + +The supervisor config can be found in +``/opt/redash/supervisord/supervisord.conf``. + +There you can see the names of its programs (``redash_celery``, +``redash_server``) and the location of their logs. + +Restart +======= + +Restarting the Web Server +------------------------- + +``sudo supervisorctl stop redash_server`` + +Restarting Celery Workers +------------------------- + +``sudo supervisorctl restart redash_celery`` + +Restarting Celery Workers & the Queries Queue +--------------------------------------------- + +In case you are handling a problem, and you need to stop the currently +running queries and reset the queue, follow the steps below. + +1. Stop celery: ``sudo supervisorctl stop redash_celery`` (celery might + take some time to stop, if it's in the middle of running a query) + +2. Flush redis: ``redis-cli flushdb`` + +3. Start celery: ``sudo supervisorctl start redash_celery`` + +Changing the Number of Workers +============================== + +By default, Celery will start a worker per CPU core. Because most of +re:dash's tasks are IO bound, the real limit for number of workers you +can use depends on the amount of memory your machine has. It's +recommended to increase number of workers, to support more concurrent +queries. + +1. Open the supervisord configuration file: + ``/opt/redash/supervisord/supervisord.conf`` + +2. Edit the ``[program:redash_celery]`` section and add to the *command* + value, the param "-c" with the number of concurrent workers you need. + +3. Restart supervisord to apply new configuration: + ``sudo /etc/init.d/redash_supervisord restart``. + +DB +== + +Show the Currently Configured Data Source +----------------------------------------- + +This varies based on the redash version and personal preferences. You +can do one of the following: + +Using the CLI +~~~~~~~~~~~~~ + +In ``/opt/redash/current``, run: +``sudo -u redash bin/run ./manage.py ds list`` + +Using the Admin +~~~~~~~~~~~~~~~ + +(available from version 0.6b797). Browse to ``/admin/datasource`` + +View the Definition Directly in the DB +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +1. Open psql: ``sudo -u redash psql`` + +2. Run the query: ``SELECT * from data_sources;`` + +Backup re:dash's DB: +-------------------- + +``sudo -u redash pg_dump > backup_filename.sql`` + +Version +======= + +See current version: + +``bin/run ./manage.py version`` diff --git a/docs/usage/mongodb_querying.rst b/docs/usage/mongodb_querying.rst new file mode 100644 index 00000000000..fdc623829d7 --- /dev/null +++ b/docs/usage/mongodb_querying.rst @@ -0,0 +1,74 @@ +MongoDB: Querying +################# + +Simple query example: +===================== + +.. code:: json + + { + "collection" : "my_collection", + "query" : { + "date" : { + "$gt" : "ISODate(\"2015-01-15 11:41\")", + }, + "type" : 1 + }, + "fields" : { + "_id" : 1, + "name" : 2 + }, + "sort" : [ + { + "name" : "date", + "direction" : -1 + } + ] + } + +Live example on the demo instance: +http://demo.redash.io/queries/394/source. + +Aggregation +=========== + +Uses a syntax similar to the one used in PyMongo, however to support the +correct order of sorting, it uses a regular list for the "$sort" +operation that converts into a SON (sorted dictionary) object before +execution. + +Aggregation query example: + +.. code:: json + + { + "collection" : "things", + "aggregate" : [ + { + "$unwind" : "$tags" + }, + { + "$group" : { + "_id" : "$tags", + "count" : { "$sum" : 1 } + } + }, + { + "$sort" : [ + { + "name" : "count", + "direction" : -1 + }, + { + "name" : "_id", + "direction" : -1 + } + ] + } + ] + } + +Live examples on the demo instance: + +1. http://demo.redash.io/queries/393/source +2. http://demo.redash.io/queries/387/source diff --git a/docs/usage/users.rst b/docs/usage/users.rst new file mode 100644 index 00000000000..b6e0df408b3 --- /dev/null +++ b/docs/usage/users.rst @@ -0,0 +1,39 @@ +Users' Management +################# + +If you use Google OpenID authentication, then each user from the domains +you allowed will automatically be logged in and have the default +permissions. + +If you want to give some user different permissions or you want to +create password based users (make sure you enabled this options in +settings first), you need to use the CLI (``manage.py``). + +Create a new user +================= + +.. code:: bash + + $ bin/run ./manage.py users create --help + usage: users create [-h] [--permissions PERMISSIONS] [--password PASSWORD] + [--google] [--admin] + name email + + positional arguments: + name User's full name + email User's email + + optional arguments: + -h, --help show this help message and exit + --permissions PERMISSIONS + Comma seperated list of permissions (leave blank for + default). + --password PASSWORD Password for users who don't use Google Auth (leave + blank for prompt). + --google user uses Google Auth to login + --admin set user as admin + +Grant admin permissions +======================= + +``sudo -u redash bin/run ./manage.py users grant_admin {email}``