From 4b6d9abd4e3f565205c1d390f7eb59ee8e954e5f Mon Sep 17 00:00:00 2001 From: Liquan Pei Date: Sun, 3 Jul 2016 22:14:30 -0700 Subject: [PATCH 1/4] Add documentation --- docs/Makefile | 181 +++++++++++++++++++++ docs/README.md | 23 +++ docs/autoreload.py | 6 + docs/conf.py | 267 +++++++++++++++++++++++++++++++ docs/configuration_options.rst | 2 + docs/elasticsearch_connector.rst | 101 ++++++++++++ docs/index.rst | 12 ++ docs/make.bat | 242 ++++++++++++++++++++++++++++ docs/requirements.txt | 3 + 9 files changed, 837 insertions(+) create mode 100644 docs/Makefile create mode 100644 docs/README.md create mode 100755 docs/autoreload.py create mode 100644 docs/conf.py create mode 100644 docs/configuration_options.rst create mode 100644 docs/elasticsearch_connector.rst create mode 100644 docs/index.rst create mode 100644 docs/make.bat create mode 100644 docs/requirements.txt diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 000000000..e0722507e --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,181 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +PAPER = +BUILDDIR = _build + +# User-friendly check for sphinx-build +ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) +$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) +endif + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . +# the i18n builder cannot share the environment and doctrees with the others +I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . + +.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext livehtml + +help: + @echo "Please use \`make ' where is one of" + @echo " html to make standalone HTML files" + @echo " livehtml to make standalone HTML files automatically watching for changes" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " singlehtml to make a single large HTML file" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " devhelp to make HTML files and a Devhelp project" + @echo " epub to make an epub" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " latexpdf to make LaTeX files and run them through pdflatex" + @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" + @echo " text to make text files" + @echo " man to make manual pages" + @echo " texinfo to make Texinfo files" + @echo " info to make Texinfo files and run them through makeinfo" + @echo " gettext to make PO message catalogs" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " xml to make Docutils-native XML files" + @echo " pseudoxml to make pseudoxml-XML files for display purposes" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + +clean: + rm -rf $(BUILDDIR)/* + +html: + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + +livehtml: + python autoreload.py + +dirhtml: + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." + +singlehtml: + $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml + @echo + @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." + +pickle: + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +json: + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json + @echo + @echo "Build finished; now you can process the JSON files." + +htmlhelp: + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in $(BUILDDIR)/htmlhelp." + +qthelp: + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp + @echo + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ + ".qhcp project file in $(BUILDDIR)/qthelp, like this:" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/KafkaRESTProxy.qhcp" + @echo "To view the help file:" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/KafkaRESTProxy.qhc" + +devhelp: + $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp + @echo + @echo "Build finished." + @echo "To view the help file:" + @echo "# mkdir -p $$HOME/.local/share/devhelp/KafkaRESTProxy" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/KafkaRESTProxy" + @echo "# devhelp" + +epub: + $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub + @echo + @echo "Build finished. The epub file is in $(BUILDDIR)/epub." + +latex: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." + @echo "Run \`make' in that directory to run these through (pdf)latex" \ + "(use \`make latexpdf' here to do that automatically)." + +latexpdf: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through pdflatex..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +latexpdfja: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through platex and dvipdfmx..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +text: + $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text + @echo + @echo "Build finished. The text files are in $(BUILDDIR)/text." + +man: + $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man + @echo + @echo "Build finished. The manual pages are in $(BUILDDIR)/man." + +texinfo: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo + @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." + @echo "Run \`make' in that directory to run these through makeinfo" \ + "(use \`make info' here to do that automatically)." + +info: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo "Running Texinfo files through makeinfo..." + make -C $(BUILDDIR)/texinfo info + @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." + +gettext: + $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale + @echo + @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." + +changes: + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes + @echo + @echo "The overview file is in $(BUILDDIR)/changes." + +linkcheck: + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in $(BUILDDIR)/linkcheck/output.txt." + +doctest: + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in $(BUILDDIR)/doctest/output.txt." + +xml: + $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml + @echo + @echo "Build finished. The XML files are in $(BUILDDIR)/xml." + +pseudoxml: + $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml + @echo + @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 000000000..01028472c --- /dev/null +++ b/docs/README.md @@ -0,0 +1,23 @@ +This documentation is built using [Sphinx](http://sphinx-doc.org). It also uses some extensions for theming and REST API +documentation support. + +Start by installing the requirements: + + pip install -r requirements.txt + +Then you can generate the HTML version of the docs: + + make html + +The root of the documentation will be at `_build/html/index.html` + +While editing the documentation, you can get a live preview using python-livepreview. Install the Python library: + + pip install livereload + +Then run the monitoring script in the background: + + python autoreload.py & + +If you install the [browser extensions](http://livereload.com/) then everything should update every time any files are +saved without any manual steps on your part. \ No newline at end of file diff --git a/docs/autoreload.py b/docs/autoreload.py new file mode 100755 index 000000000..9f15f5069 --- /dev/null +++ b/docs/autoreload.py @@ -0,0 +1,6 @@ +#!/usr/bin/env python +from livereload import Server, shell + +server = Server() +server.watch('*.rst', shell('make html')) +server.serve() diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 000000000..6d974333d --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,267 @@ +# -*- coding: utf-8 -*- +# +# Kafka Connect JDBC documentation build configuration file, created by +# sphinx-quickstart on Wed Dec 17 14:17:15 2014. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys +import os + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +#sys.path.insert(0, os.path.abspath('.')) + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +#needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = ['sphinx.ext.ifconfig', 'sphinxcontrib.httpdomain'] + +def setup(app): + app.add_config_value('platform_docs', True, 'env') + +# Even if it has a default, these options need to be specified +platform_docs = False + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'Kafka Connect Elasticsearch' +copyright = u'2016, Confluent, Inc.' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = '3.0' +# The full version, including alpha/beta/rc tags. +release = '3.1.0-SNAPSHOT' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +#language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ['_build'] + +# The reST default role (used for this markup: `text`) to use for all +# documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + +# If true, keep warnings as "system message" paragraphs in the built documents. +#keep_warnings = False + + +# -- Options for HTML output ---------------------------------------------- + +import sphinx_rtd_theme + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'sphinx_rtd_theme' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# Add any extra paths that contain custom files (such as robots.txt or +# .htaccess) here, relative to this directory. These files are copied +# directly to the root of the documentation. +#html_extra_path = [] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_domain_indices = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +#html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +#html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = None + +# Output file base name for HTML help builder. +htmlhelp_basename = 'KafkaConnectElasticsearchDoc' + + +# -- Options for LaTeX output --------------------------------------------- + +latex_elements = { +# The paper size ('letterpaper' or 'a4paper'). +#'papersize': 'letterpaper', + +# The font size ('10pt', '11pt' or '12pt'). +#'pointsize': '10pt', + +# Additional stuff for the LaTeX preamble. +#'preamble': '', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + ('index', 'KafkaConnectElasticserch.tex', u'Kafka Connect Elasticsearch Documentation', + u'Confluent, Inc.', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# If true, show page references after internal links. +#latex_show_pagerefs = False + +# If true, show URL addresses after external links. +#latex_show_urls = False + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_domain_indices = True + + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + ('index', 'kafkaconnectelasticsearch', u'Kafka Connect Elasticsearch', + [u'Confluent, Inc.'], 1) +] + +# If true, show URL addresses after external links. +#man_show_urls = False + + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + ('index', 'KafkaConnectElasticsearch', u'Kafka Connect Elasticsearch Documentation', + u'Confluent, Inc.', 'KafkaConnectElasticsearch', + 'Kafka Connector for Elasticsearch sink', + 'Miscellaneous'), +] + +# Documents to append as an appendix to all manuals. +#texinfo_appendices = [] + +# If false, no module index is generated. +#texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +#texinfo_show_urls = 'footnote' + +# If true, do not generate a @detailmenu in the "Top" node's menu. +#texinfo_no_detailmenu = False diff --git a/docs/configuration_options.rst b/docs/configuration_options.rst new file mode 100644 index 000000000..3aab43dee --- /dev/null +++ b/docs/configuration_options.rst @@ -0,0 +1,2 @@ +Configuration Options +--------------------- diff --git a/docs/elasticsearch_connector.rst b/docs/elasticsearch_connector.rst new file mode 100644 index 000000000..033abdef1 --- /dev/null +++ b/docs/elasticsearch_connector.rst @@ -0,0 +1,101 @@ +Elasticsearch Connector +======================== + +The Elasticsearch connector allows to move data out of Kafka to Elasticsearch. + +Quickstart +---------- +In this Quickstart, we use the Elasticsearch connector to export data produced by the Avro console +producer to Elasticsearch. + +Start Zookeeper, Kafka and SchemaRegistry if you haven't done so. The instructions on how to start +these services are available at the Confluent Platform QuickStart. You also need to have +Elasticsearch running locally or remotely and make sure that you know the address to connect to +Elasticsearch. + +This Quickstart assumes that you started the required services with the default configurations and +you should make necessary changes according to the actual configurations used. + +First, start the Avro console producer:: + + $ ./bin/kafka-avro-console-producer --broker-list localhost:9092 --topic test-elasticsearch \ + --property value.schema='{"type":"record","name":"myrecord","fields":[{"name":"f1","type":"string"}]}' + +Then in the console producer, type in:: + + {"f1": "value1"} + {"f1": "value2"} + {"f1": "value3"} + +The three records entered are published to the Kafka topic ``test-elasticsearch`` in Avro format. + +Before starting the connector, please make sure that the configurations in +``etc/kafka-connect-elasticsearch/quickstart-elasticsearch.properties`` are properly set to your +configurations of Elasticsearch, e.g. ``http.address`` points to the correct http address. +Then run the following command to start Kafka Connect with the Elasticsearch connector:: + + $ ./bin/connect-standalone etc/schema-registry/connect-avro-standalone.properties \ + etc/kafka-connect-elasticsearch/quickstart-elasticsearch.properties + +You should see that the process starts up and logs some messages, and then exports data from Kafka +to Elasticsearch. Once the connector finishes ingesting data to Elasticsearch, check that the data +is available in Elasticsearch:: + + $ search + +Features +-------- +The Elasticsearch connector offers a bunch of features: + +* **Exactly Once Delivery**: The connector relies on Elasticsearch's write semantics to ensure + exactly once delivery to Elasticsearch. In case of keys are ignored, the Elasticsearch connector + uses ``topic+partition+offset`` as document id to ensure that the same record in Kafka only creates + one document in Elasticsearch even in case that it is written to Elasticsearch multiple times. + In case of keys are kept , the keys are used as the document ids and the connector ensures that + only one document for the same key exists in Elasticsearch. + +* **Batching and Pipelining**: The connector supports batching and pipelined writing to Elasticsearch. + It accumulates messages in batches and allows concurrent processing of multiple batches. + +* **Delivery Ordering**: When pipelining is turned off, the connector supports ordering of delivery + on a per batch basis. This is useful for cases that ordering of messages is important. + +* **Mapping Inference**: The connector can infer mappings from the Kafka Connect schemas. + When this is enabled, the connector creates a mapping based on the schema from the message. However + , the inference is limited to field types and default values when a field is missing. If more + customizations are needed (e.g. user defined analyzers) for indices, we highly recommend to + manually create mappings. + +* **Schema Evolution**: The connector supports schema evolution can handle backward, forward and + full compatible changes of schemas in Kafka Connect. It can also handle some incompatible schema + changes such as changing a field from integer to string. + +Schema Evolution +---------------- +The Elasticsearch connector writes data in different topics in Kafka to different indices. All +data for a topic will have the same type in Elasticseearch. This allows independent evolution of +schemas for data in different topics. This simplifies the schema evolution as the enforcement for +mappings in Elasticsearch is the following: all fields with the same name in the same index must +have the same mapping. + +The Elasticsearch connector supports schema evolution as mappings in Elasticsearch are more +flexible than the Schema evolution allowed in Kafka Connect. New fields can be added as +Elasticsearch can infer the type for each newly added fields. Missing fields will be treated +as the null value defined for those fields in the mapping. Moreover, type changes for fields are +allowed in case that the types can be merged. For example, if a field defined as a string type. +If we change schema for that field to integer type later, the connector can still write the records +with the new schema to Elasticsearch. As the mappings are more flexible, schema compatibility +should be enforced when writing data to Kafka. + +However, some incompatible changes of Kafka Connect can cause the connector to fail to write to +Elasticsearch. For example, the connector is not able to write the data to Elasticsearch if we +change a field in Kafka Connect schema from string to integer. + +Also, there are some changes that are not allowed after a mapping is already defined. Although you +can add new types to an index, or add new fields to a type, you can’t add new analyzers or +make changes to existing fields. If you were to do so, the data that had already been indexed would +be incorrect and your searches would no longer work as expected. It is highly recommended that +to manually define mappings before writing data to Elasticsearch. + + + diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 000000000..b4ca6cdc2 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,12 @@ +.. _connect_elasticsearch: + +Confluent Elasticsearch Connector +================================= + +Contents: + +.. toctree:: + :maxdepth: 3 + + elasticsearch_connector + configuration_options diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 000000000..680c3febf --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,242 @@ +@ECHO OFF + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set BUILDDIR=_build +set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . +set I18NSPHINXOPTS=%SPHINXOPTS% . +if NOT "%PAPER%" == "" ( + set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% + set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% +) + +if "%1" == "" goto help + +if "%1" == "help" ( + :help + echo.Please use `make ^` where ^ is one of + echo. html to make standalone HTML files + echo. dirhtml to make HTML files named index.html in directories + echo. singlehtml to make a single large HTML file + echo. pickle to make pickle files + echo. json to make JSON files + echo. htmlhelp to make HTML files and a HTML help project + echo. qthelp to make HTML files and a qthelp project + echo. devhelp to make HTML files and a Devhelp project + echo. epub to make an epub + echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter + echo. text to make text files + echo. man to make manual pages + echo. texinfo to make Texinfo files + echo. gettext to make PO message catalogs + echo. changes to make an overview over all changed/added/deprecated items + echo. xml to make Docutils-native XML files + echo. pseudoxml to make pseudoxml-XML files for display purposes + echo. linkcheck to check all external links for integrity + echo. doctest to run all doctests embedded in the documentation if enabled + goto end +) + +if "%1" == "clean" ( + for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i + del /q /s %BUILDDIR%\* + goto end +) + + +%SPHINXBUILD% 2> nul +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "html" ( + %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/html. + goto end +) + +if "%1" == "dirhtml" ( + %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. + goto end +) + +if "%1" == "singlehtml" ( + %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. + goto end +) + +if "%1" == "pickle" ( + %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can process the pickle files. + goto end +) + +if "%1" == "json" ( + %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can process the JSON files. + goto end +) + +if "%1" == "htmlhelp" ( + %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can run HTML Help Workshop with the ^ +.hhp project file in %BUILDDIR%/htmlhelp. + goto end +) + +if "%1" == "qthelp" ( + %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can run "qcollectiongenerator" with the ^ +.qhcp project file in %BUILDDIR%/qthelp, like this: + echo.^> qcollectiongenerator %BUILDDIR%\qthelp\KafkaRESTProxy.qhcp + echo.To view the help file: + echo.^> assistant -collectionFile %BUILDDIR%\qthelp\KafkaRESTProxy.ghc + goto end +) + +if "%1" == "devhelp" ( + %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. + goto end +) + +if "%1" == "epub" ( + %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The epub file is in %BUILDDIR%/epub. + goto end +) + +if "%1" == "latex" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "latexpdf" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + cd %BUILDDIR%/latex + make all-pdf + cd %BUILDDIR%/.. + echo. + echo.Build finished; the PDF files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "latexpdfja" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + cd %BUILDDIR%/latex + make all-pdf-ja + cd %BUILDDIR%/.. + echo. + echo.Build finished; the PDF files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "text" ( + %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The text files are in %BUILDDIR%/text. + goto end +) + +if "%1" == "man" ( + %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The manual pages are in %BUILDDIR%/man. + goto end +) + +if "%1" == "texinfo" ( + %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. + goto end +) + +if "%1" == "gettext" ( + %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The message catalogs are in %BUILDDIR%/locale. + goto end +) + +if "%1" == "changes" ( + %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes + if errorlevel 1 exit /b 1 + echo. + echo.The overview file is in %BUILDDIR%/changes. + goto end +) + +if "%1" == "linkcheck" ( + %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck + if errorlevel 1 exit /b 1 + echo. + echo.Link check complete; look for any errors in the above output ^ +or in %BUILDDIR%/linkcheck/output.txt. + goto end +) + +if "%1" == "doctest" ( + %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest + if errorlevel 1 exit /b 1 + echo. + echo.Testing of doctests in the sources finished, look at the ^ +results in %BUILDDIR%/doctest/output.txt. + goto end +) + +if "%1" == "xml" ( + %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The XML files are in %BUILDDIR%/xml. + goto end +) + +if "%1" == "pseudoxml" ( + %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. + goto end +) + +:end diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 000000000..6ff9a8162 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,3 @@ +Sphinx +sphinx_rtd_theme +sphinxcontrib-httpdomain From 03c67871d18a6ab295df5db5c59b541a0c840340 Mon Sep 17 00:00:00 2001 From: Liquan Pei Date: Mon, 11 Jul 2016 14:15:09 -0700 Subject: [PATCH 2/4] Doc improvements --- docs/conf.py | 2 +- docs/elasticsearch_connector.rst | 166 ++++++++++++++++++++++--------- 2 files changed, 121 insertions(+), 47 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 6d974333d..40e2fc950 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -57,7 +57,7 @@ def setup(app): # built documents. # # The short X.Y version. -version = '3.0' +version = '3.1' # The full version, including alpha/beta/rc tags. release = '3.1.0-SNAPSHOT' diff --git a/docs/elasticsearch_connector.rst b/docs/elasticsearch_connector.rst index 033abdef1..e3efce97d 100644 --- a/docs/elasticsearch_connector.rst +++ b/docs/elasticsearch_connector.rst @@ -1,20 +1,44 @@ Elasticsearch Connector ======================== - -The Elasticsearch connector allows to move data out of Kafka to Elasticsearch. +The Elasticsearch connector allows moving data from Kafka to Elasticsearch. It writes data from +a topic in Kafka to an `index `_ +in Elasticsearch and all data for a topic have the same +`type `_. + +Elasticsearch is often used for text queries, analytics and as an key-value store +(`use cases `_). The connector covers +both the analytics and key-value store use cases. For the analytics use case, +each message is in Kafka is treated as an event and the connector uses ``topic+partition+offset`` +as unique identifiers for events, which then converted to unique documents in Elasticsearch. +For the key-value store use case, it supports using keys from Kafka messages as document ids in +Elasticsearch and provides configurations ensuring that updates to a key is written to Elasticsearch +in order. For both use cases, Elasticsearch's idempotent write semantics guarantees exactly once +delivery. + +`Mapping `_ is the +process of defining how a document, and the fields it contains, are stored and indexed. Users can +explicitly define mappings for types in indices. When mapping is not explicitly defined, +Elasticsearch can determine field names and types from data, however, some types such as timestamp +and decimal may not be correctly inferred. To ensure that the types are correctly inferred, the +connector provides a feature to infer mapping from the schemas of Kafka messages. Quickstart ---------- In this Quickstart, we use the Elasticsearch connector to export data produced by the Avro console producer to Elasticsearch. -Start Zookeeper, Kafka and SchemaRegistry if you haven't done so. The instructions on how to start -these services are available at the Confluent Platform QuickStart. You also need to have +Start Zookeeper, Kafka and Schema Registry if you haven't done so. You also need to have Elasticsearch running locally or remotely and make sure that you know the address to connect to Elasticsearch. -This Quickstart assumes that you started the required services with the default configurations and -you should make necessary changes according to the actual configurations used. +.. ifconfig:: platform_docs + + The instructions on how to start these services are available at the + :ref:`Confluent Platform quickstart`. + +This Quickstart assumes that you started the required services with the default configurations. +If you are not using the default settings, you should adjust the subsequent commands to account for +different hostnames and ports. First, start the Avro console producer:: @@ -31,7 +55,7 @@ The three records entered are published to the Kafka topic ``test-elasticsearch` Before starting the connector, please make sure that the configurations in ``etc/kafka-connect-elasticsearch/quickstart-elasticsearch.properties`` are properly set to your -configurations of Elasticsearch, e.g. ``http.address`` points to the correct http address. +configurations of Elasticsearch, e.g. ``connection.url`` points to the correct http address. Then run the following command to start Kafka Connect with the Elasticsearch connector:: $ ./bin/connect-standalone etc/schema-registry/connect-avro-standalone.properties \ @@ -43,59 +67,109 @@ is available in Elasticsearch:: $ search + Features -------- The Elasticsearch connector offers a bunch of features: -* **Exactly Once Delivery**: The connector relies on Elasticsearch's write semantics to ensure - exactly once delivery to Elasticsearch. In case of keys are ignored, the Elasticsearch connector - uses ``topic+partition+offset`` as document id to ensure that the same record in Kafka only creates - one document in Elasticsearch even in case that it is written to Elasticsearch multiple times. - In case of keys are kept , the keys are used as the document ids and the connector ensures that - only one document for the same key exists in Elasticsearch. - -* **Batching and Pipelining**: The connector supports batching and pipelined writing to Elasticsearch. - It accumulates messages in batches and allows concurrent processing of multiple batches. +* **Exactly Once Delivery**: The connector relies on Elasticsearch's idempotent write semantics to + ensure exactly once delivery to Elasticsearch. By setting ids in Elasticsearch documents, the + connector can ensure exactly once delivery. If keys are included in Kafka messages, these keys + are translated to Elasticsearch document ids automatically. When the keys are not included, + or are explicitly ignored, the connector will use ``topic+partition+offset`` as the key, + ensuring each message in Kafka has exactly one document corresponding to it in Elasticsearch. -* **Delivery Ordering**: When pipelining is turned off, the connector supports ordering of delivery - on a per batch basis. This is useful for cases that ordering of messages is important. +* **Mapping Inference**: The connector can infer mappings from the Kafka Connect schemas. When + enabled, the connector creates mappings based on schemas of Kafka messages. However, the inference + is limited to field types and default values when a field is missing. If more customizations are + needed (e.g. user defined analyzers), we highly recommend to manually create mappings. -* **Mapping Inference**: The connector can infer mappings from the Kafka Connect schemas. - When this is enabled, the connector creates a mapping based on the schema from the message. However - , the inference is limited to field types and default values when a field is missing. If more - customizations are needed (e.g. user defined analyzers) for indices, we highly recommend to - manually create mappings. - -* **Schema Evolution**: The connector supports schema evolution can handle backward, forward and - full compatible changes of schemas in Kafka Connect. It can also handle some incompatible schema +* **Schema Evolution**: The connector supports schema evolution and can handle backward, forward and + fully compatible changes of schemas in Kafka Connect. It can also handle some incompatible schema changes such as changing a field from integer to string. +Delivery Semantics +------------------ +The connector supports batching and pipelined writes to Elasticsearch to boost throughput. It +accumulates messages in batches and allows concurrent processing of multiple batches. However, +when piplining is enabled, the ordering of batches written to Elasticsearch is not guaranteed. +A transient failure can cause a batch to be retried and because of this, a batch sent later may +be written to Elasticsearch first. Pipeplining should be turned off when order of batches is +important (set ``max.in.flight.requests=1``). Moreover, batching should be turned off when message +order is important (set ``batch.size=1``). + +Mapping Management +------------------ +Before using the connector, you need to think carefully on how the data should be tokenized, +analyzed and indexed, which are determined by mapping. Some changes are not allowed after a mapping +is already defined. Although you can add new types to an index, or add new fields to a type, you +can’t add new analyzers or make changes to existing fields. If you were to do so, the data that +had already been indexed would be incorrect and your searches would no longer work as expected. +It is highly recommended that to manually define mappings before writing data to Elasticsearch. + +`Index templates `_ +can be helpful when manually define mappings. It allows you to define templates that will +automatically be applied when new indices are created. The templates include both settings and +mappings, and a simple pattern template that controls whether the template should be applied to +the new index. + Schema Evolution ---------------- -The Elasticsearch connector writes data in different topics in Kafka to different indices. All +The Elasticsearch connector writes data from different topics in Kafka to different indices. All data for a topic will have the same type in Elasticseearch. This allows independent evolution of -schemas for data in different topics. This simplifies the schema evolution as the enforcement for -mappings in Elasticsearch is the following: all fields with the same name in the same index must -have the same mapping. +schemas for data from different topics. This simplifies the schema evolution as Elasticsearch has +one enforcement on mappings: all fields with the same name in the same index must have the same +mapping. + +Elasticsearch supports dynamic mapping: when it encounters previously unknown field in a document, +it uses `dynamic mapping `_ +to determine the datatype for the field and automatically adds the new field to the type mapping. -The Elasticsearch connector supports schema evolution as mappings in Elasticsearch are more -flexible than the Schema evolution allowed in Kafka Connect. New fields can be added as -Elasticsearch can infer the type for each newly added fields. Missing fields will be treated -as the null value defined for those fields in the mapping. Moreover, type changes for fields are -allowed in case that the types can be merged. For example, if a field defined as a string type. -If we change schema for that field to integer type later, the connector can still write the records -with the new schema to Elasticsearch. As the mappings are more flexible, schema compatibility -should be enforced when writing data to Kafka. +When dynamic mapping is enabled, the Elasticsearch connector supports schema evolution as mappings +in Elasticsearch are more flexible than the schema evolution allowed in Kafka Connect when different +converters are used. For example, when the Avro converter is used, backward, forward and fully +compatible schema evolutions are allowed. -However, some incompatible changes of Kafka Connect can cause the connector to fail to write to -Elasticsearch. For example, the connector is not able to write the data to Elasticsearch if we -change a field in Kafka Connect schema from string to integer. +When dynamic mapping is enabled, the Elasticsearch connector allows the following schema changes: -Also, there are some changes that are not allowed after a mapping is already defined. Although you -can add new types to an index, or add new fields to a type, you can’t add new analyzers or -make changes to existing fields. If you were to do so, the data that had already been indexed would -be incorrect and your searches would no longer work as expected. It is highly recommended that -to manually define mappings before writing data to Elasticsearch. +* **Adding Fields**: Adding one or more fields to Kafka messages. Elasticsearch will add the new + fields to the mapping when dynamic mapping is enabled. +* **Removing Fields**: Removing one or more fields to Kafka messages. Missing fields will be treated + as the null value defined for those fields in the mapping. +* **Changing types that can be merged**: Changing a field from string type to integer type. + For example, Elasticsearch can convert integers to strings. +The following change is not allowed: +* **Changing types that can not be merged**: Changing a field from integer type to string type. +As mappings are more flexible, schema compatibility should be enforced when writing data to Kafka. + +Reindexing +---------- +In some cases, the way to index a set of documents may need to be changed. For example, the analyzer, +tokenizer and which fields are indexed may need to be changed. As those cannot be changed once a +mapping is defined, we have to reindex the data. +`Index aliases `_ +can be used to achieve reindexing with zero downtime. Here are the steps at needs to be performed +in Elasticsearch: + + 1. Create an alias for the index with the old mapping. + 2. The applications that uses the index are pointed to the alias. + 3. Create a new index with the updated mapping. + 4. Move data from old to the new index. + 5. Atomically move the alias to the new index. + 6. Delete the old index. + +For zero downtime reindexing, there are still write requests coming during the reindex period. +As aliases do not allow writing to both the old and the new index at the same time. To solve this, +the same data needs to be written both to the old and the new index. + +When the Elasticsearch connector is used to write data to Elasticsearch, we can use two +connector jobs to achieve double writes: + + 1. The connector job that ingest data to the old indices continue writing to the old indices. + 2. Create a new connector job that writes to new indices. This will copy both some old data and + new data to the new indices as long as the data is in Kafka. + 4. Once the data in the old indices are moved to the new indices by the reindexing process, we + can stop the old connector job. From 07dd1c4ac63260f15d32e041f82f929a31009d21 Mon Sep 17 00:00:00 2001 From: Liquan Pei Date: Fri, 15 Jul 2016 14:10:26 -0700 Subject: [PATCH 3/4] Add configuration options --- docs/configuration_options.rst | 97 +++++++++++++++++++ docs/elasticsearch_connector.rst | 8 +- .../ElasticsearchSinkConnectorConfig.java | 8 +- 3 files changed, 105 insertions(+), 8 deletions(-) diff --git a/docs/configuration_options.rst b/docs/configuration_options.rst index 3aab43dee..b07a9951a 100644 --- a/docs/configuration_options.rst +++ b/docs/configuration_options.rst @@ -1,2 +1,99 @@ Configuration Options --------------------- +``connection.url`` + The URL to connect to Elasticsearch. + + * Type: string + * Default: "" + * Importance: high + +``type.name`` + The type to use for each index. + + * Type: string + * Default: "" + * Importance: high + +``key.ignore`` + Whether to ignore the key during indexing. When this is set to true, only the value from the message will be written to Elasticsearch.Note that this is a global config that applies to all topics. If this is set to true, Use ``topic.key.ignore`` to config for different topics. This value will be overridden by the per topic configuration. + + * Type: boolean + * Default: false + * Importance: high + +``batch.size`` + The number of requests to process as a batch when writing to Elasticsearch. + + * Type: int + * Default: 10000 + * Importance: medium + +``max.in.flight.requests`` + The maximum number of incomplete batches each task will send before blocking. Note that if this is set to be greater than 1 and there are failed sends, there is a risk of message re-ordering due to retries + + * Type: int + * Default: 5 + * Importance: medium + +``flush.timeout.ms`` + The timeout when flushing data to Elasticsearch. + + * Type: long + * Default: 10000 + * Importance: low + +``linger.ms`` + The task groups together any records that arrive in between request transmissions into a single batched request. Normally this occurs only under load when records arrive faster than they can be sent out. However in some circumstances the tasks may want to reduce the number of requests even under moderate load. This setting accomplishes this by adding a small amount of artificial delay. Rather than immediately sending out a record the task will wait for up to the given delay to allow other records to be sent so that the sends can be batched together. + + * Type: long + * Default: 1 + * Importance: low + +``max.buffered.records`` + Approximately the max number of records each task will buffer. This config controls the memory usage for each task. When the number of buffered records is larger than this value, the partitions assigned to this task will be paused. + + * Type: long + * Default: 100000 + * Importance: low + +``max.retry`` + The max allowed number of retries. Allowing retries will potentially change the ordering of records. + + * Type: int + * Default: 5 + * Importance: low + +``retry.backoff.ms`` + The amount of time to wait before attempting to retry a failed batch. This avoids repeatedly sending requests in a tight loop under some failure scenarios. + + * Type: long + * Default: 100 + * Importance: low + +``schema.ignore`` + Whether to ignore schemas during indexing. When this is set to true, the schema in ``SinkRecord`` will be ignored and Elasticsearch will infer the mapping from data. Note that this is a global config that applies to all topics.Use ``topic.schema.ignore`` to config for different topics. This value will be overridden by the per topic configuration. + + * Type: boolean + * Default: false + * Importance: low + +``topic.index.map`` + The map between Kafka topics and Elasticsearch indices. + + * Type: list + * Default: [] + * Importance: low + +``topic.key.ignore`` + A list of topics to ignore key when indexing. In case that the key for a topic can be null, you should include the topic in this config in order to generate a valid document id. + + * Type: list + * Default: [] + * Importance: low + +``topic.schema.ignore`` + A list of topics to ignore schema. + + * Type: list + * Default: [] + * Importance: low \ No newline at end of file diff --git a/docs/elasticsearch_connector.rst b/docs/elasticsearch_connector.rst index e3efce97d..790303704 100644 --- a/docs/elasticsearch_connector.rst +++ b/docs/elasticsearch_connector.rst @@ -9,9 +9,9 @@ Elasticsearch is often used for text queries, analytics and as an key-value stor (`use cases `_). The connector covers both the analytics and key-value store use cases. For the analytics use case, each message is in Kafka is treated as an event and the connector uses ``topic+partition+offset`` -as unique identifiers for events, which then converted to unique documents in Elasticsearch. +as a unique identifier for events, which then converted to unique documents in Elasticsearch. For the key-value store use case, it supports using keys from Kafka messages as document ids in -Elasticsearch and provides configurations ensuring that updates to a key is written to Elasticsearch +Elasticsearch and provides configurations ensuring that updates to a key are written to Elasticsearch in order. For both use cases, Elasticsearch's idempotent write semantics guarantees exactly once delivery. @@ -19,7 +19,7 @@ delivery. process of defining how a document, and the fields it contains, are stored and indexed. Users can explicitly define mappings for types in indices. When mapping is not explicitly defined, Elasticsearch can determine field names and types from data, however, some types such as timestamp -and decimal may not be correctly inferred. To ensure that the types are correctly inferred, the +and decimal, may not be correctly inferred. To ensure that the types are correctly inferred, the connector provides a feature to infer mapping from the schemas of Kafka messages. Quickstart @@ -171,5 +171,5 @@ connector jobs to achieve double writes: 1. The connector job that ingest data to the old indices continue writing to the old indices. 2. Create a new connector job that writes to new indices. This will copy both some old data and new data to the new indices as long as the data is in Kafka. - 4. Once the data in the old indices are moved to the new indices by the reindexing process, we + 3. Once the data in the old indices are moved to the new indices by the reindexing process, we can stop the old connector job. diff --git a/src/main/java/io/confluent/connect/elasticsearch/ElasticsearchSinkConnectorConfig.java b/src/main/java/io/confluent/connect/elasticsearch/ElasticsearchSinkConnectorConfig.java index edd232e48..208b84518 100644 --- a/src/main/java/io/confluent/connect/elasticsearch/ElasticsearchSinkConnectorConfig.java +++ b/src/main/java/io/confluent/connect/elasticsearch/ElasticsearchSinkConnectorConfig.java @@ -41,11 +41,11 @@ public class ElasticsearchSinkConnectorConfig extends AbstractConfig { private static final String KEY_IGNORE_DOC = "Whether to ignore the key during indexing. When this is set to true, only the value from the message will be written to Elasticsearch." + "Note that this is a global config that applies to all topics. If this is set to true, " - + "Use `topic.key.ignore` to config for different topics. This value will be overridden by the per topic configuration."; + + "Use ``topic.key.ignore`` to config for different topics. This value will be overridden by the per topic configuration."; private static final boolean KEY_IGNORE_DEFAULT = false; private static final String KEY_IGNORE_DISPLAY = "Ignore Key"; - // TODO: remove thid config when single message transform is in + // TODO: remove this config when single message transform is in public static final String TOPIC_INDEX_MAP_CONFIG = "topic.index.map"; private static final String TOPIC_INDEX_MAP_DOC = "The map between Kafka topics and Elasticsearch indices."; private static final String TOPIC_INDEX_MAP_DEFAULT = ""; @@ -106,9 +106,9 @@ public class ElasticsearchSinkConnectorConfig extends AbstractConfig { public static final String SCHEMA_IGNORE_CONFIG = "schema.ignore"; private static final String SCHEMA_IGNORE_DOC = - "Whether to ignore schemas during indexing. When this is set to true, the schema in `SinkRecord` will be ignored and Elasticsearch will infer the mapping from data. " + "Whether to ignore schemas during indexing. When this is set to true, the schema in ``SinkRecord`` will be ignored and Elasticsearch will infer the mapping from data. " + "Note that this is a global config that applies to all topics." - + "Use `topic.schema.ignore` to config for different topics. This value will be overridden by the per topic configuration."; + + "Use ``topic.schema.ignore`` to config for different topics. This value will be overridden by the per topic configuration."; private static final boolean SCHEMA_IGNORE_DEFAULT = false; private static final String SCHEMA_IGNORE_DISPLAY = "Ignore Schema"; From 146ff61b0a74ff9b0ac0ef183b4c15791de658b1 Mon Sep 17 00:00:00 2001 From: Liquan Pei Date: Fri, 15 Jul 2016 16:35:31 -0700 Subject: [PATCH 4/4] Improve quickstart --- config/quickstart-elasticsearch.properties | 4 +-- docs/elasticsearch_connector.rst | 37 ++++++++++++++++++---- 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/config/quickstart-elasticsearch.properties b/config/quickstart-elasticsearch.properties index ba0930f95..226cb7095 100644 --- a/config/quickstart-elasticsearch.properties +++ b/config/quickstart-elasticsearch.properties @@ -19,5 +19,5 @@ connector.class=io.confluent.connect.elasticsearch.ElasticsearchSinkConnector tasks.max=1 topics=test-elasticsearch-sink key.ignore=true -connection.url=localhost:9200 -type.name=kafka-connect \ No newline at end of file +connection.url=http://localhost:9200 +type.name=kafka-connect diff --git a/docs/elasticsearch_connector.rst b/docs/elasticsearch_connector.rst index 790303704..e31c1736c 100644 --- a/docs/elasticsearch_connector.rst +++ b/docs/elasticsearch_connector.rst @@ -42,14 +42,13 @@ different hostnames and ports. First, start the Avro console producer:: - $ ./bin/kafka-avro-console-producer --broker-list localhost:9092 --topic test-elasticsearch \ + $ ./bin/kafka-avro-console-producer --broker-list localhost:9092 --topic test-elasticsearch-sink \ --property value.schema='{"type":"record","name":"myrecord","fields":[{"name":"f1","type":"string"}]}' Then in the console producer, type in:: {"f1": "value1"} - {"f1": "value2"} - {"f1": "value3"} + The three records entered are published to the Kafka topic ``test-elasticsearch`` in Avro format. @@ -65,9 +64,35 @@ You should see that the process starts up and logs some messages, and then expor to Elasticsearch. Once the connector finishes ingesting data to Elasticsearch, check that the data is available in Elasticsearch:: - $ search - - + $ curl -XGET 'http://localhost:9200/test-elasticsearch-sink/_search?pretty' + { + "took" : 2, + "timed_out" : false, + "_shards" : { + "total" : 5, + "successful" : 5, + "failed" : 0 + }, + "hits" : { + "total" : 1, + "max_score" : 1.0, + "hits" : [ { + "_index" : "test-elasticsearch-sink", + "_type" : "kafka-connect", + "_id" : "test-elasticsearch-sink+0+0", + "_score" : 1.0, + "_source" : { + "_children" : { + "f1" : { + "_value" : "value1" + } + }, + "_nodeFactory" : { + "_cfgBigDecimalExact" : false + } + } + }] + } Features -------- The Elasticsearch connector offers a bunch of features: