From 731fb44b57579167235817459d9f5f81c26e7398 Mon Sep 17 00:00:00 2001 From: msb3399 Date: Wed, 3 May 2017 09:55:29 -0400 Subject: [PATCH] Cleaned up docker. Removed distill source code. Simplfied ELK deployment. Added init process to launch Kibana once Elasticsearch is online. Added LICENSE headers. --- LICENSE | 201 +++++++++++++++ docker/distill/distill.conf | 29 --- docker/distill/distill/__init__.py | 45 ---- docker/distill/distill/algorithms/__init__.py | 14 -- .../distill/algorithms/graphs/__init__.py | 14 -- .../distill/algorithms/graphs/graph.py | 24 -- .../distill/algorithms/stats/__init__.py | 14 -- .../distill/distill/algorithms/stats/hist.py | 183 -------------- .../algorithms/stats/tests/__init__.py | 22 -- .../distill/algorithms/tests/__init__.py | 14 -- docker/distill/distill/app.py | 216 ---------------- docker/distill/distill/config.cfg | 57 ----- docker/distill/distill/models/__init__.py | 14 -- docker/distill/distill/models/brew.py | 235 ------------------ docker/distill/distill/models/stout.py | 149 ----------- .../distill/distill/models/tests/__init__.py | 22 -- docker/distill/distill/models/userale.py | 137 ---------- docker/distill/distill/server.py | 29 --- docker/distill/distill/tests/__init__.py | 21 -- docker/distill/distill/tests/basic_test.py | 24 -- docker/distill/distill/tests/distill_test.py | 43 ---- docker/distill/distill/utils/__init__.py | 0 docker/distill/distill/utils/exceptions.py | 25 -- .../distill/distill/utils/tests/__init__.py | 21 -- docker/distill/distill/utils/validation.py | 39 --- docker/distill/distill/version.py | 22 -- docker/distill/requirements.txt | 22 -- docker/distill/setup.cfg | 30 --- docker/distill/setup.py | 87 ------- docker/docker-compose.yml | 26 +- docker/es/._elasticsearch.yml | Bin 222 -> 0 bytes docker/es/Dockerfile | 5 - docker/es/elasticsearch.yml | 15 ++ docker/kibana/Dockerfile | 7 - docker/kibana/entrypoint.sh | 10 - docker/logstash/Dockerfile | 15 ++ docker/logstash/config/logstash-apache.conf | 82 ------ docker/logstash/config/logstash-userale.conf | 17 +- docker/logstash/templates/apache.json | 59 ----- .../query_builder.py => startup/Dockerfile} | 24 +- .../__init__.py => startup/entrypoint.sh} | 10 +- 41 files changed, 281 insertions(+), 1742 deletions(-) create mode 100644 LICENSE delete mode 100644 docker/distill/distill.conf delete mode 100644 docker/distill/distill/__init__.py delete mode 100644 docker/distill/distill/algorithms/__init__.py delete mode 100644 docker/distill/distill/algorithms/graphs/__init__.py delete mode 100644 docker/distill/distill/algorithms/graphs/graph.py delete mode 100644 docker/distill/distill/algorithms/stats/__init__.py delete mode 100644 docker/distill/distill/algorithms/stats/hist.py delete mode 100644 docker/distill/distill/algorithms/stats/tests/__init__.py delete mode 100644 docker/distill/distill/algorithms/tests/__init__.py delete mode 100644 docker/distill/distill/app.py delete mode 100644 docker/distill/distill/config.cfg delete mode 100644 docker/distill/distill/models/__init__.py delete mode 100644 docker/distill/distill/models/brew.py delete mode 100644 docker/distill/distill/models/stout.py delete mode 100644 docker/distill/distill/models/tests/__init__.py delete mode 100644 docker/distill/distill/models/userale.py delete mode 100644 docker/distill/distill/server.py delete mode 100644 docker/distill/distill/tests/__init__.py delete mode 100644 docker/distill/distill/tests/basic_test.py delete mode 100644 docker/distill/distill/tests/distill_test.py delete mode 100644 docker/distill/distill/utils/__init__.py delete mode 100644 docker/distill/distill/utils/exceptions.py delete mode 100644 docker/distill/distill/utils/tests/__init__.py delete mode 100644 docker/distill/distill/utils/validation.py delete mode 100644 docker/distill/distill/version.py delete mode 100644 docker/distill/requirements.txt delete mode 100644 docker/distill/setup.cfg delete mode 100644 docker/distill/setup.py delete mode 100644 docker/es/._elasticsearch.yml delete mode 100644 docker/es/Dockerfile delete mode 100644 docker/kibana/Dockerfile delete mode 100644 docker/kibana/entrypoint.sh delete mode 100644 docker/logstash/config/logstash-apache.conf delete mode 100644 docker/logstash/templates/apache.json rename docker/{distill/distill/utils/query_builder.py => startup/Dockerfile} (65%) rename docker/{distill/distill/algorithms/graphs/tests/__init__.py => startup/entrypoint.sh} (82%) diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..c9857e3 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2016 The Charles Stark Draper Laboratory, Inc. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/docker/distill/distill.conf b/docker/distill/distill.conf deleted file mode 100644 index a113dcb..0000000 --- a/docker/distill/distill.conf +++ /dev/null @@ -1,29 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -description "Gunicorn application server running Apache Distill" - -# Restart process if it ever fails -start on runlevel [2345] -stop on runlevel [!2345] - -respawn -#Setup user and group that Gunicorn should be run as -setuid nobody -setgid www-data - -# Path to run_server -chdir /path/to/distill -exec gunicorn -c "gunicorn.cfg" scripts/run_server:app diff --git a/docker/distill/distill/__init__.py b/docker/distill/distill/__init__.py deleted file mode 100644 index 2b44372..0000000 --- a/docker/distill/distill/__init__.py +++ /dev/null @@ -1,45 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from flask import Flask -from elasticsearch_dsl.connections import connections - -# Initialize Flask instance -app = Flask (__name__) - -# Load Configurations -app.config.from_pyfile('config.cfg') - -# Unpack Elasticsearch configuration and create elasticsearch connection -host = app.config ['ES_HOST'] -port = app.config ['ES_PORT'] -http_auth = app.config ['HTTP_AUTH'] -use_ssl = app.config ['USE_SSL'] -verify_certs = app.config ['VERIFY_CERTS'] -ca_certs = app.config ['CA_CERTS'] -client_cert = app.config ['CLIENT_CERT'] -client_key = app.config ['CLIENT_KEY'] -timeout = app.config ['TIMEOUT'] - -# Initialize Elasticsearch instance -es = connections.create_connection (hosts = [host], - port = port, - http_auth = http_auth, - use_ssl = use_ssl, - verify_certs = verify_certs, - ca_certs = ca_certs, - client_cert = client_cert, - client_key = client_key, - timeout=timeout) \ No newline at end of file diff --git a/docker/distill/distill/algorithms/__init__.py b/docker/distill/distill/algorithms/__init__.py deleted file mode 100644 index 6acb5d1..0000000 --- a/docker/distill/distill/algorithms/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/docker/distill/distill/algorithms/graphs/__init__.py b/docker/distill/distill/algorithms/graphs/__init__.py deleted file mode 100644 index 6acb5d1..0000000 --- a/docker/distill/distill/algorithms/graphs/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/docker/distill/distill/algorithms/graphs/graph.py b/docker/distill/distill/algorithms/graphs/graph.py deleted file mode 100644 index 3c44730..0000000 --- a/docker/distill/distill/algorithms/graphs/graph.py +++ /dev/null @@ -1,24 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -class GraphAnalytics (object): - """ - Distill's graph analytics package. Apply graph algorithms to User Ale log data segmented with - Stout. - """ - - @staticmethod - def foo (): - pass \ No newline at end of file diff --git a/docker/distill/distill/algorithms/stats/__init__.py b/docker/distill/distill/algorithms/stats/__init__.py deleted file mode 100644 index 6acb5d1..0000000 --- a/docker/distill/distill/algorithms/stats/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/docker/distill/distill/algorithms/stats/hist.py b/docker/distill/distill/algorithms/stats/hist.py deleted file mode 100644 index b516423..0000000 --- a/docker/distill/distill/algorithms/stats/hist.py +++ /dev/null @@ -1,183 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from distill import es -from distill.utils.query_builder import QueryBuilder -from flask import jsonify -from elasticsearch import Elasticsearch, TransportError - -class Hist (object): - """ - Distill's statistics package. Apply statistical algorithms to User Ale log data segmented with - Stout. Need to query/filter by session or user id. - """ - - def __init__ (self): - # parse out query - pass - - # @staticmethod - # def filter (app, app_type=None, q=''): - - # field = q.get ("field") if q.get ("field") else "" - # size = q.get ("size") if q.get ("size") else 10 - - # query = { "aggs" : { - # "count_by_type" : { - # "filter" : { "term" : { field : }} - # "terms" : { - # "field" : field, - # "size" : 100 - # } - # } - # } - # } - - # d = {} - # # try: - # response = es.search (index=app, doc_type=app_type, body=query) - # # for tag in response['aggregations']['count_by_type']['buckets']: - # # d [tag ['key']] = tag ['doc_count'] - # # except TransportError as e: - # # d ['error'] = e.info - # # except Exception as e: - # # d ['error'] = str (e) - # # return jsonify (d) - # return jsonify (response) - - @staticmethod - def terms (app, app_type=None, q=''): - """ - Group by field (find all elements ) - """ - field = q.get ("field") if q.get ("field") else "" - segment = q.get ("seg") if q.get ("seg") else "*" - size = q.get ("size") if q.get ("size") else 10000 - numhits = q.get ("numhits") if q.get ("numhits") else 10 - - query = { "aggs" : { - "count_by_type" : { - "terms" : { - "field" : field, - "size" : size # maximum number of keys (unique fields) - }, - "aggs" : { - "top" : { # arbitrary name - "top_hits" : { - "size" : numhits, # number of logs in subgroup - "_source" : { # segment on fields - return only subgroup based on field - "include" : [ - segment - ] - } - } - } - } - } - } - } - - d = {} - # try: - response = es.search (index=app, doc_type=app_type, body=query) - # for tag in response['aggregations']['count_by_type']['buckets']: - # d [tag ['key']] = tag ['doc_count'] - # except TransportError as e: - # d ['error'] = e.info - # except Exception as e: - # d ['error'] = str (e) - # return jsonify (d) - return jsonify (response) - - @staticmethod - def unique_terms (app, app_type=None, q=""): - """ - Aggregate the number of unique terms in a field. Missing values are counted and marked as "N/A". - - .. todo:: - - Need to incorporate QueryBuilder library instead of manually generating queries. - - :param app: [string] application name - :param app_type: [string] application type - :param field: [string] field to search against for unique values - :param size: [int] the top size terms returned in the result. Default value is 10. - :param min_hits: [int] return tags which have been found in min_hits or more. Default value is 1. - :return: [dict] dictionary of results - """ - - field = q.get ("field") if q.get ("field") else "" - size = q.get ("size") if q.get ("size") else 10000 - min_hits = q.get ("min_hits") if q.get ("min_hits") else 0 - - print field - query = { "aggs" : { - "terms_agg" : { - "terms" : { - "field" : field, - "size" : size, - "min_doc_count" : min_hits, - "missing" : "N/A" - } - } - } - } - - d = {} - try: - response = es.search (index=app, doc_type=app_type, body=query) - for tag in response['aggregations']['terms_agg']['buckets']: - d [tag ['key']] = tag ['doc_count'] - except TransportError as e: - d ['error'] = e.info - except Exception as e: - d ['error'] = str (e) - return jsonify (d) - - @staticmethod - def histogram (app, app_type=None, q=""): - """ - Only works on numerical data. - """ - field = q.get ("field") if q.get ("field") else "" - - interval = 50 - query = { "aggs" : { - "hist_agg" : { - "histogram" : { - "field" : field, - "interval" : interval - } - } - } - } - - d = {} - try: - response = es.search (index=app, doc_type=app_type, body=query) - for tag in response['aggregations']['hist_agg']['buckets']: - d [tag ['key']] = tag ['doc_count'] - except TransportError as e: - d ['error'] = e.info - except Exception as e: - d ['error'] = str (e) - return jsonify (d) - - def get_value (): - return 0 - - def _parse_msg (query): - # should have form ?measure=name&field=f1, f2&event=a,b - pass diff --git a/docker/distill/distill/algorithms/stats/tests/__init__.py b/docker/distill/distill/algorithms/stats/tests/__init__.py deleted file mode 100644 index f6f6899..0000000 --- a/docker/distill/distill/algorithms/stats/tests/__init__.py +++ /dev/null @@ -1,22 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -''' -distill: tests module. - -Meant for use with py.test. -Organize tests into files, each named xxx_test.py -Read more here: http://pytest.org/ -''' \ No newline at end of file diff --git a/docker/distill/distill/algorithms/tests/__init__.py b/docker/distill/distill/algorithms/tests/__init__.py deleted file mode 100644 index 6acb5d1..0000000 --- a/docker/distill/distill/algorithms/tests/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/docker/distill/distill/app.py b/docker/distill/distill/app.py deleted file mode 100644 index 58434a1..0000000 --- a/docker/distill/distill/app.py +++ /dev/null @@ -1,216 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from flask import Flask, request, jsonify -from distill import app -from distill.models.brew import Brew -from distill.models.userale import UserAle -from distill.models.stout import Stout -from distill.algorithms.stats.hist import Hist - -@app.route ('/', methods=['GET']) -def index (): - """ - Show Distill version information, connection status, and all registered applications. - - .. code-block:: bash - - $ curl -XGET https://localhost:8090 - - { - "author" : "Michelle Beard", - "email" : "mbeard@draper.com", - "name": "Distill", - "status" : true, - "version" : "1.0", - "applications" : { - "xdata_v3" : { - testing: 205, - parsed: 500, - }, - "test_app" : { - logs: 500, - parsed: 100, - } - } - } - - :return: Distill's status information as JSON blob - """ - return jsonify (name="Distill", version="1.0 alpha", author="Michelle Beard", email="mbeard@draper.com", status=Brew.get_status (), applications=Brew.get_applications ()) - -@app.route ('/create/', methods=['POST', 'PUT']) -def create (app_id): - """ - Registers an application in Distill. - - .. code-block:: bash - - $ curl -XPOST https://localhost:8090/xdata_v3 - - :param app_id: Application name - :return: Newly created application's status as JSON blob - """ - return Brew.create (app_id) - -@app.route ('/status/', defaults={"app_type" : None}, methods=['GET']) -@app.route ('/status//', methods=['GET']) -def status (app_id, app_type): - """ - Presents meta information about an registered application, including field names and document types. - - .. code-block:: bash - - $ curl -XGET https://localhost:8090/status/xdata_v3 - - { - "application": "xdata_v3", - "health": "green", - "num_docs": "433", - "status": "open" - } - - :param app_id: Application name - :return: Registered applications meta data as JSON blob - """ - return Brew.read (app_id, app_type=app_type) - -@app.route ('/update/', methods=['POST', 'PUT']) -def update (app_id): - """ - Renames a specific application - - .. code-block:: bash - - $ curl -XPOST https://localhost:8090/update/xdata_v3?name="xdata_v4" - - :param app_id: Application name - :return: Boolean response message as JSON blob - """ - return Brew.update (app_id) - -@app.route ('/delete/', methods=['DELETE']) -def delete (app_id): - """ - Deletes an application permentantly from Distill - - .. code-block:: bash - - $ curl -XDELETE https://localhost:8090/xdata_v3 - - :param app_id: Application name - :return: Boolean response message as JSON blob - """ - return Brew.delete (app_id) - -@app.route ('/search/', defaults={"app_type" : None}, methods=['GET']) -@app.route ('/search//', methods=['GET']) -def segment (app_id, app_type): - """ - Search against an application on various fields. - - .. code-block:: bash - - $ curl -XGET https://[hostname]:[port]/search/xdata_v3?q=session_id:A1234&size=100&scroll=false&fl=param1,param2 - - :param app_id: Application name - :param app_type: Optional document type to filter against - :param q: Main search query. To return all documents, pass in q=*:* - :param size: Maximum number of documents to return in request - :param scroll: Scroll id if the number of documents exceeds 10,000 - :param fl: List of fields to restrict the result set - :return: JSON blob of result set - """ - q = request.args - return UserAle.segment (app_id, app_type=app_type, params=q) - -@app.route ('/stat/', defaults={"app_type" : None}, methods=['GET']) -@app.route ('/stat//', methods=['GET']) -def stat (app_id, app_type): - """ - Generic histogram counts for a single registered application filtered optionally by document type. - View the Statistics document page for method definitions and arguments - - .. code-block:: bash - - $ curl -XGET https://localhost:8090/stat/xdata_v3/testing/?stat=terms&elem=signup&event=click - - :param app_id: Application name - :param app_type: Application type - :return: JSON blob of result set - """ - stat = request.args.get ('stat') - q = request.args - - hist_cls = Hist () - method = None - try: - method = getattr (hist_cls, stat) - return method (app_id, app_type, q=q) - except AttributeError: - msg = "Class `{}` does not implement `{}`".format(hist_cls.__class__.__name__, stat) - return jsonify (error=msg) - -@app.route ('/denoise/', methods=['GET']) -def denoise (app_id): - """ - Bootstrap script to cleanup the raw logs. A document type called "parsed" - will be stored with new log created unless specified in the request. Have option to save - parsed results back to data store. These parsed logs can be intergrated with STOUT results - by running the stout bootstrap script. - - .. code-block:: bash - - $ curl -XGET https://localhost:8090/denoise/xdata_v3?save=true&type=parsed - - :param app_id: Application name - :return: [dict] - """ - doc_type = 'parsed' - save = False - q = request.args - if 'save' in q: - save = str2bool (q.get ('save')) - if 'type' in q: - # @TODO: Proper cleanup script needs to happen - doc_type = q.get ('type') - return UserAle.denoise (app_id, doc_type=doc_type, save=save) - -@app.route ('/stout', methods=['GET']) -def merge_stout (): - """ - Bootstrap script to aggregate user ale logs to stout master answer table - This will save the merged results back to ES instance at new index stout - OR denoise data first, then merge with the stout index... - If STOUT is enabled, the select method expects a stout index to exist or otherwise - it will return an error message. - - .. code-block:: bash - - $ curl -XGET https://locahost:8090/stout/xdata_v3 - - :return: Status message - """ - flag = app.config ['ENABLE_STOUT'] - if flag: - return Stout.ingest () - return jsonify (status="STOUT is disabled.") - -@app.errorhandler(404) -def page_not_found (error): - """ - Generic Error Message - """ - return "Unable to find Distill." diff --git a/docker/distill/distill/config.cfg b/docker/distill/distill/config.cfg deleted file mode 100644 index 189e3ea..0000000 --- a/docker/distill/distill/config.cfg +++ /dev/null @@ -1,57 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Statement for enabling the development environment -DEBUG = True - -# Host -HOST = '0.0.0.0' - -# Port -PORT = 8090 - -# Enable STOUT integration into Distill -ENABLE_STOUT = False -SQLITEDB = '../path/to/stout/stout.db' -MASTER = '../path/to/master/master_ans.csv' -MAPPINGS = '../path/to/mappings/MOT_Mappings.csv' -SELECTED = '../path/to/stout/selected_vars_for_distill.csv' - -# Elasticsearch Configuration -ES_HOST = 'http://elasticsearch' -ES_PORT = 9200 -HTTP_AUTH = None -USE_SSL = False -VERIFY_CERTS = False -CA_CERTS = None -CLIENT_CERT = None -CLIENT_KEY = None -TIMEOUT = 3 - -# Application threads. A common general assumption is -# using 2 per available processor cores - to handle -# incoming requests using one and performing background -# operations using the other. -THREADS_PER_PAGE = 2 - -# Enable protection agains *Cross-site Request Forgery (CSRF)* -# CSRF_ENABLED = True - -# Use a secure, unique and absolutely secret key for -# signing the data. -# CSRF_SESSION_KEY = "secret" - -# Secret key for signing cookies -# SECRET_KEY = "secret" \ No newline at end of file diff --git a/docker/distill/distill/models/__init__.py b/docker/distill/distill/models/__init__.py deleted file mode 100644 index 6acb5d1..0000000 --- a/docker/distill/distill/models/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/docker/distill/distill/models/brew.py b/docker/distill/distill/models/brew.py deleted file mode 100644 index 28d16b3..0000000 --- a/docker/distill/distill/models/brew.py +++ /dev/null @@ -1,235 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from elasticsearch import Elasticsearch, TransportError -from flask import jsonify -from distill import es - -class Brew (object): - """ - Distill supports basic CRUD operations and publishes the status - of an persistenct database. Eventually it will support ingesting logs sent from - an registered application. - """ - - @staticmethod - def get_status (): - """ - Fetch the status of the underlying database instance. - - :return: [bool] if connection to database instance has been established - """ - return es.ping (ignore=[400, 404]) - - @staticmethod - def get_applications (): - """ - Fetch all the registered applications in Distill. - - .. note:: Private indexes starting with a period are not included in the result set - - :return: [dict] dictionary of all registered applications and meta information - """ - doc = {} - query = { "aggs" : { - "count_by_type" : { - "terms" : { - "field" : "_type", - "size" : 100 - } - } - } - } - - try: - cluster_status = es.cat.indices (h=["index"], pri=False) - x = cluster_status.splitlines() - - for idx in x: - idx = idx.rstrip () - - # Ignore private indexes (like .kibana or .stout) - if idx [:1] != '.': - response = es.search (index=idx, body=query) - d = {} - for tag in response["aggregations"]["count_by_type"]["buckets"]: - d [tag ['key']] = tag ['doc_count'] - doc [idx] = d - except TransportError as e: - doc ['error'] = e.info - except Exception as e: - doc ['error'] = str (e) - return doc - - @staticmethod - def create (app): - """ - Register a new application in Distill - - .. code-block:: bash - - { - "application" : "xdata_v3", - "health" : "green", - "num_docs" : 0, - "status" : "open" - } - - :param app: [string] application name (e.g. xdata_v3) - :return: [dict] dictionary of application and its meta information - """ - - # ignore 400 cause by IndexAlreadyExistsException when creating an index - res = es.indices.create (index=app, ignore=[400, 404]) - doc = _get_cluster_status (app) - return jsonify (doc) - - @staticmethod - def read (app, app_type=None): - """ - Fetch meta data associated with an application - - .. code-block:: bash - - Example: - { - "application" : "xdata_v3", - "health" : "green", - "num_docs" : "100", - "status" : "open" - "types" : { - "raw_logs" : { - "@timestamp" : "date", - "action" : "string", - "elementId" : "string" - }, - "parsed" : { - "@timestamp" : "date", - "elementId_interval" : "string" - }, - "graph" : { - "uniqueID" : "string", - "transition_count" : "long", - "p_value" : "float" - } - } - } - - :param app: [string] application name (e.g. xdata_v3) - :return: [dict] dictionary of application and its meta information - """ - - return jsonify (_get_cluster_status (app, app_type=app_type)) - - @staticmethod - def update (app): - """ - .. todo:: - Currently not implemented - """ - - return jsonify (status="not implemented") - - @staticmethod - def delete (app): - """ - Technically closes the index so its content is not searchable. - - .. code-block: bash - - Example: - { - status: "Deleted index xdata_v3" - } - - :param app: [string] application name (e.g. xdata_v3) - :return: [dict] status message of the event - """ - - es.indices.close (index=app, ignore=[400, 404]) - return jsonify (status="Deleted index %s" % app) - -def _get_cluster_status (app, app_type=None): - """ - Return cluster status, index health, and document count as string - - @todo figure out how to count individual documents stored at an app_type (currently shows only index count) - :param app: [string] application name (e.g. xdata_v3) - :return: [dict] dictionary of index meta data including field names - """ - - doc = {} - try: - cluster_status = es.cat.indices (index=app, h=["health", "status", "docs.count"], pri=True, ignore=[400, 404]) - v = str (cluster_status).split (" ") - m = ["health", "status", "num_docs"] - doc = dict (zip (m, v)) - # Add back application - doc ["application"] = app - except TransportError as e: - doc ['error'] = e.info - except Exception as e: - doc ['error'] = str (e) - - doc ['fields'] = _get_all_fields (app, app_type) - return doc - -def _parse_mappings (app, app_type=None): - """ - .. todo: - - Need to parse out result set that presents field list and type - """ - - try: - mappings = es.indices.get_mapping (index=app, doc_type=[app_type], ignore=[400, 404]) - # mappings = yaml.safe_load (json.ess (mappings)) - # print json.dumps (mappings [app]["mappings"], indent=4, separators=(',', ': ')) - ignore = ["properties", "format"] - except TransportError as e: - doc ['error'] = e.info - except Exception as e: - doc ['error'] = str (e) - return doc - -def _get_all_fields (app, app_type=None): - """ - Retrieve all possible fields in an application - - :param app: [string] application name (e.g. xdata_v3) - :param app_type: [string] application type (e.g. logs) - :return: [list] list of strings representing the fields names - """ - d = list () - query = { "aggs" : { - "fields" : { - "terms" : { - "field" : "_field_names", - "size" : 100 - } - } - } - } - - try: - response = es.search (index=app, doc_type=app_type, body=query) - for tag in response['aggregations']['fields']['buckets']: - d.append (tag ['key']) - except TransportError as e: - d.append (str (e.info)) - except Exception as e: - d.append (str (e)) - return d diff --git a/docker/distill/distill/models/stout.py b/docker/distill/distill/models/stout.py deleted file mode 100644 index d6421d8..0000000 --- a/docker/distill/distill/models/stout.py +++ /dev/null @@ -1,149 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from distill import app, es -from elasticsearch_dsl import DocType, String, Boolean, Date, Nested, Search -from elasticsearch_dsl.query import MultiMatch, Match, Q -from elasticsearch import Elasticsearch, TransportError -from flask import jsonify -import pandas as pd - -class StoutDoc (DocType): - """ - Representation of a Stout documentat. - """ - - sessionID = String (index="not_analyzed") - task1 = Nested () - task2 = Nested () - - class Meta: - index = '.stout' - doc_type = 'testing' - - def save (self, *args, **kwargs): - """ - Save data from parsing as a Stout document in Distill - """ - return super (StoutDoc, self).save (*args, **kwargs) - -class Stout (object): - """ - Main Stout class to support ingest and search operations. - """ - - @staticmethod - def ingest (): - """ - Ingest data coming from Stout to Distill - """ - - # Create the mappings in elasticsearch - StoutDoc.init () - status = True - data = _parse (); - try: - for k,v in data.items (): - doc = StoutDoc () - if 'sessionID' in v: - doc.sessionID = v['sessionID'] - if 'task1' in v: - doc.task1 = v['task1'] - if 'task2' in v: - doc.task2 = v['task2'] - doc.save () - except Error as e: - status = False - return jsonify (status=status) - -def _parse (): - """ - Parse master answer table with mapping into an associative array - - :return: [dict] dictionary of session information - """ - master = app.config ['MASTER'] - mappings = app.config ['MAPPINGS'] - - fileContents=pd.read_csv(master, encoding='utf-8') - plainTextMappings=pd.read_csv(mappings, encoding='raw_unicode_escape') - headers=list(fileContents.columns.values) - - #generate the mapping between header and plain text - translationRow={}; - for fieldIndex in range(1,len(headers)): - t=plainTextMappings.ix[fieldIndex] - translationRow[headers[fieldIndex]]=t[9] - - dictBySessionID={} - translationRow['items.text']='foo' - index=0 - for row in fileContents.iterrows(): - index=index+1 - - taskMetrics={} - index,data=row - identifier=row[1][0].split("::") - sessionID=identifier[0] - taskID=(identifier[1]) - workingData={} - #is this session id already in the dictionary? - if sessionID in dictBySessionID: - #grab the entry as workingData - workingData=dictBySessionID[sessionID] - - sysData={} - task1Data={} - task2Data={} - metaData={} - d={} - - for fieldIndex in range(1,len(headers)): - if not pd.isnull(row[1][fieldIndex]): #only interested in non-null fields - tempDict={} - if headers[fieldIndex] in translationRow: - tempDict['field']=translationRow[headers[fieldIndex]] - #tempDict['field']=translationRow[9] - tempDict['value']=row[1][fieldIndex] - d[headers[fieldIndex]]=row[1][fieldIndex] - if "SYS" in headers[fieldIndex]: - sysData[headers[fieldIndex]]=tempDict - elif "OT1" in headers[fieldIndex]: - task1Data[headers[fieldIndex]]=tempDict - elif "OT2" in headers[fieldIndex]: - task2Data[headers[fieldIndex]]=tempDict - else: - metaData[headers[fieldIndex]]=tempDict - - if d['TSK_TIME_DIFF_']>0: #block tasks with zero time elapsed - a=int(d['TSK_TIME_DIFF_OT1_']) - b=int(d['TSK_TIME_DIFF_OT2_']) - #figure out which task the values belong to - if ((a>0) & (b<=0)): - task1Data['taskID']=taskID - task1Data['meta']=metaData - task1Data['system']=sysData - workingData['task1']=task1Data - elif ((a<=0) & (b>0)): - task2Data['taskID']=taskID - task2Data['meta']=metaData - task2Data['system']=sysData - workingData['task2']=task2Data - else: - raise ValueError('Encountered an unexpected task time diff state') - - workingData['sessionID'] = sessionID - dictBySessionID[sessionID]=workingData - return dictBySessionID diff --git a/docker/distill/distill/models/tests/__init__.py b/docker/distill/distill/models/tests/__init__.py deleted file mode 100644 index f6f6899..0000000 --- a/docker/distill/distill/models/tests/__init__.py +++ /dev/null @@ -1,22 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -''' -distill: tests module. - -Meant for use with py.test. -Organize tests into files, each named xxx_test.py -Read more here: http://pytest.org/ -''' \ No newline at end of file diff --git a/docker/distill/distill/models/userale.py b/docker/distill/distill/models/userale.py deleted file mode 100644 index f63fa51..0000000 --- a/docker/distill/distill/models/userale.py +++ /dev/null @@ -1,137 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from elasticsearch import Elasticsearch, TransportError -from elasticsearch_dsl import DocType, String, Boolean, Date, Float, Search -from elasticsearch_dsl.query import MultiMatch, Match, Q -from elasticsearch import Elasticsearch, TransportError -from elasticsearch_dsl.connections import connections -from werkzeug.datastructures import ImmutableMultiDict, MultiDict - -from flask import jsonify, Markup -from distill import app, es -import datetime - -class UserAle (object): - """ - Main method of entry to perform segmentation and integration of STOUT's master - answer table (if STOUT is enabled). Advanced and basic analytics is performed in the - distill.algorithms.stats and distill.algorithms.graphs module. - """ - - @staticmethod - def segment (app, app_type=None, params=''): - """ - Just support match all for now. - """ - q = params.get ("q") if params.get ("q") else {} - fields = params.get ("fields") if params.get ("fields") else [] - size = params.get ("size") if params.get ("size") else 10 - scroll = params.get ("scroll") if params.get ("scroll") else False - fl = params.get ("fl") if params.get ("fl") else [] - - # filters = params.get ("filter") if params.get ("filter") else {} - - # 'q': args.get('q', '{}'), - # 'fields': args.get('fl', '{}'), - # 'size': args.get ('size', 100), - # 'scroll': args.get ('scroll', False), - # 'filters': request_args.getlist ('fq') - query = {} - query ['size'] = size - - if q: - res = q.split(":") - key = res [0] - val = res [1] - query ['query'] = {"match" : { key : val } } - else: - query ['query'] = {"match_all" : {}} - - if len (fields) > 0: - ex = { - "include" : fields.split(",") - } - query ['_source'] = ex - - - response = es.search (index=app, doc_type=app_type, body=query) - - return jsonify (response) - - @staticmethod - def search (app, - app_type=None, - filters=list (), - size=100, - include="*", - scroll=None, - sort_field=None): - """ - Perform a search query. - - :param app: [string] application id (e.g. "xdata_v3") - :param app_type: [string] name of the application type. If None all application types are searched. - :param filters: [list of strings] list of filters for a query. - :param size: [int] maximum number of hits that should be returned - :param sort_field: [string] sorting field. Currently supported fields: "timestamp", "date" - :return: [dict] dictionary with processed results. If STOUT is enabled, STOUT data will be merged with final result. - """ - - # Need some query builder... - log_result = es.search (index=app, doc_type=app_type, body=query, fields=filters, size=size) - - stout_result = Stout.getSessions () - - data = merged_results (log_result, stout_result) - return data - - @staticmethod - def denoise (app, app_type='parsed', save=False): - """ - """ - pass - -""" -Combine a list of dictionaries together to form one complete dictionary -""" -def merge_dicts (lst): - dall = {} - for d in lst: - dall.update (d) - return dall - -""" -Get query parameters from the request and preprocess them. -:param [dict-like structure] Any structure supporting get calls -:result [dict] Parsed parameters -""" -def parse_query_parameters (indx, app_type=None, request_args = {}): - args = {key: value[0] for (key, value) in dict (request_args).iteritems ()} - - # print "args = ", args - # Parse out simple filter queries - filters = [] - for filter in get_all_fields (indx, app_type): - if filter in args: - filters.append((filter, args[filter])) - - return { - 'q': args.get('q', '{}'), - 'fields': args.get('fl', []), - 'size': args.get ('size', 100), - 'scroll': args.get ('scroll', False), - 'filters': request_args.getlist ('fq') - } \ No newline at end of file diff --git a/docker/distill/distill/server.py b/docker/distill/distill/server.py deleted file mode 100644 index 23acd83..0000000 --- a/docker/distill/distill/server.py +++ /dev/null @@ -1,29 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from distill import app -from distill.app import * - -""" -Start up a local WSGI server called development -""" -def dev_server (): - host = app.config ['HOST'] - port = app.config ['PORT'] - debug = app.config ['DEBUG'] - app.run (host=host, port=port, debug=debug) - -if __name__ == '__main__': - dev_server () diff --git a/docker/distill/distill/tests/__init__.py b/docker/distill/distill/tests/__init__.py deleted file mode 100644 index 09c5e2f..0000000 --- a/docker/distill/distill/tests/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -''' -distill: tests module. - -Meant for use with py.test. -Organize tests into files, each named xxx_test.py -Read more here: http://pytest.org/ -''' \ No newline at end of file diff --git a/docker/distill/distill/tests/basic_test.py b/docker/distill/distill/tests/basic_test.py deleted file mode 100644 index 712d1fe..0000000 --- a/docker/distill/distill/tests/basic_test.py +++ /dev/null @@ -1,24 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -''' -distill: Test module. - -Meant for use with py.test. -Write each test as a function named test_. -Read more here: http://pytest.org/ -''' - -def test_example(): - assert True diff --git a/docker/distill/distill/tests/distill_test.py b/docker/distill/distill/tests/distill_test.py deleted file mode 100644 index 2fb6502..0000000 --- a/docker/distill/distill/tests/distill_test.py +++ /dev/null @@ -1,43 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from flask import Flask, request - -from distill import app as test_app - -def test_example (): - assert True - # with test_app.test_client () as c: - # rv = c.get ('/?tequila=42') - # assert request.args ['tequila'] == '42' - -# import os -# import flaskr -# import unittest -# import tempfile - -# class FlaskrTestCase(unittest.TestCase): - -# def setUp(self): -# self.db_fd, flaskr.app.config['DATABASE'] = tempfile.mkstemp() -# flaskr.app.config['TESTING'] = True -# self.app = flaskr.app.test_client() -# flaskr.init_db() - -# def tearDown(self): -# os.close(self.db_fd) -# os.unlink(flaskr.app.config['DATABASE']) - -# if __name__ == '__main__': -# unittest.main() \ No newline at end of file diff --git a/docker/distill/distill/utils/__init__.py b/docker/distill/distill/utils/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/docker/distill/distill/utils/exceptions.py b/docker/distill/distill/utils/exceptions.py deleted file mode 100644 index a391241..0000000 --- a/docker/distill/distill/utils/exceptions.py +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -class Error (Exception): - """Base class for exceptions.""" - pass - -class ValidationError (Error): - """ Exceptions raised for errors in validated a url.""" - - def __init__ (self, url, msg): - self.url = url - self.msg = msg diff --git a/docker/distill/distill/utils/tests/__init__.py b/docker/distill/distill/utils/tests/__init__.py deleted file mode 100644 index 09c5e2f..0000000 --- a/docker/distill/distill/utils/tests/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -''' -distill: tests module. - -Meant for use with py.test. -Organize tests into files, each named xxx_test.py -Read more here: http://pytest.org/ -''' \ No newline at end of file diff --git a/docker/distill/distill/utils/validation.py b/docker/distill/distill/utils/validation.py deleted file mode 100644 index 7cd3362..0000000 --- a/docker/distill/distill/utils/validation.py +++ /dev/null @@ -1,39 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from distill.utils.exceptions import ValidationError - -def validate_request (q): - """ - Parse out request message and validate inputs - - :param q: Url query string - :raises ValidationError: if the query is missing required parameters - """ - if 'q' not in q: - raise ValidationError ("Missing required parameter: %s" % 'q') - else: - # Handle rest of parsing - pass - -def str2bool (v): - """ - Convert string expression to boolean - - :param v: Input value - :returns: Converted message as boolean type - :rtype: bool - """ - return v.lower() in ("yes", "true", "t", "1") \ No newline at end of file diff --git a/docker/distill/distill/version.py b/docker/distill/distill/version.py deleted file mode 100644 index 6532ea7..0000000 --- a/docker/distill/distill/version.py +++ /dev/null @@ -1,22 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Version information for Distill. - -This file is imported by ``Distill.__init__``, -and parsed by ``setup.py``. -""" - -__version__ = "0.1.3" \ No newline at end of file diff --git a/docker/distill/requirements.txt b/docker/distill/requirements.txt deleted file mode 100644 index 5137ad9..0000000 --- a/docker/distill/requirements.txt +++ /dev/null @@ -1,22 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -Flask==0.10.1 -#networkx==1.11 -elasticsearch-dsl==2.0.0 -#numpy>=1.10.0 -#scipy>=0.17.0 -pandas>=0.18.1 -pytest>=3.0.0 \ No newline at end of file diff --git a/docker/distill/setup.cfg b/docker/distill/setup.cfg deleted file mode 100644 index 08020f4..0000000 --- a/docker/distill/setup.cfg +++ /dev/null @@ -1,30 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -[egg_info] -tag_build = 0.1.4 -tag_svn_revision = false - -[aliases] -test=pytest - -[tool:pytest] -addopts = --verbose --ignore=build --ignore=setup.py --ignore=dist --junitxml=test-report.xml --cov-report xml --cov=distill distill/. -norecursedirs = *.eggs *env* .git - -[build_sphinx] -source-dir = docs -build-dir = docs/_build -all_files = 1 diff --git a/docker/distill/setup.py b/docker/distill/setup.py deleted file mode 100644 index 8ddd32f..0000000 --- a/docker/distill/setup.py +++ /dev/null @@ -1,87 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from setuptools import setup, find_packages -import distutils.cmd -import distutils.log -from setuptools.command.test import test as TestCommand -import io, os, sys, subprocess - -if sys.version_info[:2] < (2, 7): - m = "Python 2.7 or later is required for Distill (%d.%d detected)." - raise ImportError (m % sys.version_info[:2]) - -if sys.argv[-1] == 'setup.py': - print ("To install, run 'python setup.py install'") - print () - -def read (*filenames, **kwargs): - encoding = kwargs.get ('encoding', 'utf-8') - sep = kwargs.get ('sep', '\n') - buf = [] - for filename in filenames: - with io.open (filename, encoding=encoding) as f: - buf.append (f.read ()) - return sep.join (buf) - -# Get the version string -def get_version (): - basedir = os.path.dirname (__file__) - with open (os.path.join (basedir, 'distill/version.py')) as f: - version = {} - exec (f.read (), version) - return version['__version__'] - raise RuntimeError ('No version info found.') - -setup ( - name = "Distill", - version = get_version (), - url = "https://github.com/apache/incubator-senssoft-distill", - license = "Apache Software License", - author = "Michelle Beard", - author_email = "msbeard@apache.org", - description = "An analytical framework for UserALE.", - long_description = __doc__, - classifiers = [ - 'Development Status :: 4 - Beta', - 'Programming Language :: Python', - 'Programming Language :: Python :: 2.7', - 'Natural Language :: English', - 'Environment :: Web Environment', - 'Intended Audience :: Developers', - 'License :: OSI Approved :: Apache Software License', - 'Operating System :: OS Independent', - 'Private :: Do Not Upload"' - ], - keywords = "stout userale tap distill", # Separate with spaces - packages = find_packages (exclude=['examples', 'tests']), - include_package_data = True, - zip_safe = False, - setup_requires = ['pytest-runner'], - tests_require = ['pytest>=3.0.0', 'pytest-pylint', 'coverage'], - install_requires = ['Flask==0.10.1', - #'networkx==1.11', - 'elasticsearch-dsl==2.0.0', - #'numpy>=1.10.0', - #'scipy>=0.17.0', - 'pandas>=0.18.1' - ], - entry_points = { - 'console_scripts': [ - 'dev = distill.server:dev_server' - ] - } -) \ No newline at end of file diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index b1e552b..0778c42 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -13,13 +13,22 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Apache TAP Production Build +# Apache SensSoft Docker Build version: "2" services: + + # Startup + startup: + build: ./startup + container_name: senssoft-start + depends_on: + - "elasticsearch" + command: bash /opt/entrypoint.sh + # ELK Stack elasticsearch: - build: ./es + image: elasticsearch:latest container_name: senssoft-elastic ports: - 9200:9200 @@ -29,14 +38,19 @@ services: - ./es/logs:/usr/share/elasticsearch/logs - ./es/elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml environment: - ES_JAVA_OPTS: "-Xms1g -Xmx1g" + ES_JAVA_OPTS: "-Xms4g -Xmx4g" + + # Kibana kibana: - build: ./kibana + image: kibana:latest container_name: senssoft-kibana ports: - 5601:5601 depends_on: - "elasticsearch" + - "startup" + + # Logstash userale-forwarder: build: ./logstash container_name: senssoft-userale-forwarder @@ -48,8 +62,10 @@ services: - ./logstash/templates/userale.json:/usr/share/logstash/templates/userale.json depends_on: - "elasticsearch" + - "startup" environment: LS_HEAP_SIZE: "2048m" + distill: build: ./distill container_name: distill @@ -59,11 +75,13 @@ services: - elasticsearch links: - elasticsearch + db: container_name: tap-db build: ./db ports: - "5432:5432" + tap: container_name: tap-web build: ./tap diff --git a/docker/es/._elasticsearch.yml b/docker/es/._elasticsearch.yml deleted file mode 100644 index 6da966aae043cc9287a67f06d22310259a80b81e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 222 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}@fsio@$UgK5x_AdBnYYuq+J^qI7A5ADWagzZ6zUro7#LccnkQN$o0wTxyXWVp=cL9|7#TQc v6y@ipS{qmxxR|(Ey6UW6kS{S$)=$e>VI2l=*I~ut;nlk_Z8P6jG diff --git a/docker/es/Dockerfile b/docker/es/Dockerfile deleted file mode 100644 index 1eb4748..0000000 --- a/docker/es/Dockerfile +++ /dev/null @@ -1,5 +0,0 @@ -FROM elasticsearch:5 -MAINTAINER Michelle Beard - -# Install XPack -RUN elasticsearch-plugin install --batch x-pack diff --git a/docker/es/elasticsearch.yml b/docker/es/elasticsearch.yml index c795b7c..af0e14e 100644 --- a/docker/es/elasticsearch.yml +++ b/docker/es/elasticsearch.yml @@ -1,3 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # ======================== Elasticsearch Configuration ========================= # # NOTE: Elasticsearch comes with reasonable defaults for most settings. diff --git a/docker/kibana/Dockerfile b/docker/kibana/Dockerfile deleted file mode 100644 index 4814f47..0000000 --- a/docker/kibana/Dockerfile +++ /dev/null @@ -1,7 +0,0 @@ -FROM kibana:5 -MAINTAINER Michelle Beard - -# Install XPack -RUN kibana-plugin install x-pack - -#CMD ["/tmp/entrypoint.sh"] diff --git a/docker/kibana/entrypoint.sh b/docker/kibana/entrypoint.sh deleted file mode 100644 index c08d70a..0000000 --- a/docker/kibana/entrypoint.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/env bash - -# Wait for the Elasticsearch container to be ready before starting Kibana. -echo "Stalling for Elasticsearch" -while true; do - nc -q 1 elasticsearch 9200 2>/dev/null && break -done - -echo "Starting Kibana" -exec kibana diff --git a/docker/logstash/Dockerfile b/docker/logstash/Dockerfile index 3a9533a..d676e62 100644 --- a/docker/logstash/Dockerfile +++ b/docker/logstash/Dockerfile @@ -1,3 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + FROM logstash:5 MAINTAINER Michelle Beard diff --git a/docker/logstash/config/logstash-apache.conf b/docker/logstash/config/logstash-apache.conf deleted file mode 100644 index c95882b..0000000 --- a/docker/logstash/config/logstash-apache.conf +++ /dev/null @@ -1,82 +0,0 @@ -input { - # file { - # path => "/var/log/server_logs/access.log" - # start_position => "beginning" - # sincedb_path => "/dev/null" - # } - - file { - path => [ "/var/log/server_logs/access.log" ] - start_position => "beginning" - sincedb_path => "/dev/null" - type => "apache-access" - } - # file { - # path => "/var/log/server_logs/error.log" - # start_position => "beginning" - # sincedb_path => "/dev/null" - # type => "apache-error" - # } -} - -filter { - # ------------------------ Parse services logs into fields --------------------------- - # APACHE 2 - if [type] == "apache-access" { - # To process log data (message's content) using some regex or precompiled GROK pattern - grok { - match => [ "message", "%{COMBINEDAPACHELOG}"] - } - # To extract log's time according to a date pattern - date { - match => [ "timestamp", "dd/MMM/YYYY:HH:mm:ss Z"] - } - # Extract browser information, if available. - if [agent] != "" { - useragent { - source => "agent" - } - } - # Extract client ip information, if available. - # if [clientip] != "" { - # geoip { - # source => "clientip" - # database => "/usr/share/logstash/GeoIP/GeoIP.dat" - # target => "apache_clientip" - # add_tag => [ "geoip" ] - # } - # } - } - - # if [type] == "apache-error" { - # grok { - # match => [ "message", "%{APACHEERRORLOG}"] - # # Directory where to find the custom patterns - # patterns_dir => ["/etc/logstash/grok"] - # } - # if [clientip] != "" { - # geoip { - # source => "clientip" - # target => "apache_clientip" - # add_tag => [ "geoip" ] - # } - # } - # } -} - -output { - # Output data to Elasticsearch instance - elasticsearch { - hosts => "elasticsearch:9200" - index => "apache" - user => "elastic" - password => "changeme" - manage_template => true - template_overwrite => true - template => "/usr/share/logstash/templates/apache.json" - template_name => "apache" - } - - # Debug - stdout { codec => rubydebug } -} diff --git a/docker/logstash/config/logstash-userale.conf b/docker/logstash/config/logstash-userale.conf index 217a4dd..79a69d0 100644 --- a/docker/logstash/config/logstash-userale.conf +++ b/docker/logstash/config/logstash-userale.conf @@ -1,3 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + input { http { codec => "json" @@ -42,8 +57,6 @@ output { hosts => "elasticsearch:9200" index => "userale" document_type => "logs" - user => "elastic" - password => "changeme" manage_template => true template_overwrite => true template => "/usr/share/logstash/templates/userale.json" diff --git a/docker/logstash/templates/apache.json b/docker/logstash/templates/apache.json deleted file mode 100644 index 10552e3..0000000 --- a/docker/logstash/templates/apache.json +++ /dev/null @@ -1,59 +0,0 @@ -{ - "template": "apache", - "settings": { - "index.refresh_interval": "5s" - }, - "mappings": { - "_default_": { - "dynamic_templates": [{ - "string_fields": { - "match_mapping_type": "string", - "match": "*", - "mapping": { - "omit_norms": true, - "type": "text", - "fields": { - "raw": { - "ignore_above": 256, - "type": "keyword" - } - } - } - } - }], - "properties": { - "geoip": { - "dynamic": true, - "path": "full", - "properties": { - "location": { - "type": "geo_point" - } - }, - "type": "object" - }, - "@version": { - "type": "keyword" - }, - "referer": { - "type": "keyword" - }, - "request": { - "type": "keyword" - }, - "responsetime": { - "type": "long" - }, - "bytes": { - "type": "long" - } - }, - "_all": { - "enabled": true - } - } - }, - "aliases": { - - } -} diff --git a/docker/distill/distill/utils/query_builder.py b/docker/startup/Dockerfile similarity index 65% rename from docker/distill/distill/utils/query_builder.py rename to docker/startup/Dockerfile index 017a08b..ea8757d 100644 --- a/docker/distill/distill/utils/query_builder.py +++ b/docker/startup/Dockerfile @@ -13,23 +13,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -class QueryBuilder (object): +FROM ubuntu:latest +MAINTAINER Michelle Beard - def __init__ (self, query=None): - if query: - self.query = query - else: - self.query = { - "query" : { - "match_all" : {} - } - } +RUN apt-get update && apt-get install -y wget curl && apt-get clean +RUN wget https://github.com/jwilder/dockerize/releases/download/v0.1.0/dockerize-linux-amd64-v0.1.0.tar.gz - def add_filters (self, filters): - pass +RUN tar -C /usr/local/bin -xzvf dockerize-linux-amd64-v0.1.0.tar.gz - def add_sorting (self, sort_field='', sort_order=''): - pass +# Copy Files Over +COPY ./entrypoint.sh /opt/entrypoint.sh - \ No newline at end of file +# Make executable +RUN chmod +x /opt/*.sh diff --git a/docker/distill/distill/algorithms/graphs/tests/__init__.py b/docker/startup/entrypoint.sh similarity index 82% rename from docker/distill/distill/algorithms/graphs/tests/__init__.py rename to docker/startup/entrypoint.sh index f6f6899..5686d9d 100644 --- a/docker/distill/distill/algorithms/graphs/tests/__init__.py +++ b/docker/startup/entrypoint.sh @@ -13,10 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -''' -distill: tests module. +#!/bin/bash -Meant for use with py.test. -Organize tests into files, each named xxx_test.py -Read more here: http://pytest.org/ -''' \ No newline at end of file +# Wait for the Elasticsearch container to be ready before starting Kibana. +echo "Stalling for Elasticsearch" +dockerize -wait http://elasticsearch:9200