Skip to content

Commit

Permalink
Updated distill api and fixed bugs.
Browse files Browse the repository at this point in the history
  • Loading branch information
mooshu1x2 committed Jul 22, 2016
1 parent ab8b9fa commit 47e4ea1
Show file tree
Hide file tree
Showing 14 changed files with 460 additions and 180 deletions.
168 changes: 168 additions & 0 deletions distill/algorithms/stats/hist.py
@@ -0,0 +1,168 @@
from distill import es
from distill.utils.query_builder import QueryBuilder
from flask import jsonify
from elasticsearch import Elasticsearch, TransportError

class Hist (object):
"""
Distill's statistics package. Apply statistical algorithms to User Ale log data segmented with
Stout. Need to query/filter by session or user id.
"""

def __init__ (self):
# parse out query
pass

# @staticmethod
# def filter (app, app_type=None, q=''):

# field = q.get ("field") if q.get ("field") else ""
# size = q.get ("size") if q.get ("size") else 10

# query = { "aggs" : {
# "count_by_type" : {
# "filter" : { "term" : { field : }}
# "terms" : {
# "field" : field,
# "size" : 100
# }
# }
# }
# }

# d = {}
# # try:
# response = es.search (index=app, doc_type=app_type, body=query)
# # for tag in response['aggregations']['count_by_type']['buckets']:
# # d [tag ['key']] = tag ['doc_count']
# # except TransportError as e:
# # d ['error'] = e.info
# # except Exception as e:
# # d ['error'] = str (e)
# # return jsonify (d)
# return jsonify (response)

@staticmethod
def terms (app, app_type=None, q=''):
"""
Group by field (find all elements )
"""
field = q.get ("field") if q.get ("field") else ""
segment = q.get ("seg") if q.get ("seg") else "*"
size = q.get ("size") if q.get ("size") else 10000
numhits = q.get ("numhits") if q.get ("numhits") else 10

query = { "aggs" : {
"count_by_type" : {
"terms" : {
"field" : field,
"size" : size # maximum number of keys (unique fields)
},
"aggs" : {
"top" : { # arbitrary name
"top_hits" : {
"size" : numhits, # number of logs in subgroup
"_source" : { # segment on fields - return only subgroup based on field
"include" : [
segment
]
}
}
}
}
}
}
}

d = {}
# try:
response = es.search (index=app, doc_type=app_type, body=query)
# for tag in response['aggregations']['count_by_type']['buckets']:
# d [tag ['key']] = tag ['doc_count']
# except TransportError as e:
# d ['error'] = e.info
# except Exception as e:
# d ['error'] = str (e)
# return jsonify (d)
return jsonify (response)

@staticmethod
def unique_terms (app, app_type=None, q=""):
"""
Aggregate the number of unique terms in a field. Missing values are counted and marked as "N/A".
.. todo::
Need to incorporate QueryBuilder library instead of manually generating queries.
:param app: [string] application name
:param app_type: [string] application type
:param field: [string] field to search against for unique values
:param size: [int] the top size terms returned in the result. Default value is 10.
:param min_hits: [int] return tags which have been found in min_hits or more. Default value is 1.
:return: [dict] dictionary of results
"""

field = q.get ("field") if q.get ("field") else ""
size = q.get ("size") if q.get ("size") else 10000
min_hits = q.get ("min_hits") if q.get ("min_hits") else 0

print field
query = { "aggs" : {
"terms_agg" : {
"terms" : {
"field" : field,
"size" : size,
"min_doc_count" : min_hits,
"missing" : "N/A"
}
}
}
}

d = {}
try:
response = es.search (index=app, doc_type=app_type, body=query)
for tag in response['aggregations']['terms_agg']['buckets']:
d [tag ['key']] = tag ['doc_count']
except TransportError as e:
d ['error'] = e.info
except Exception as e:
d ['error'] = str (e)
return jsonify (d)

@staticmethod
def histogram (app, app_type=None, q=""):
"""
Only works on numerical data.
"""
field = q.get ("field") if q.get ("field") else ""

interval = 50
query = { "aggs" : {
"hist_agg" : {
"histogram" : {
"field" : field,
"interval" : interval
}
}
}
}

d = {}
try:
response = es.search (index=app, doc_type=app_type, body=query)
for tag in response['aggregations']['hist_agg']['buckets']:
d [tag ['key']] = tag ['doc_count']
except TransportError as e:
d ['error'] = e.info
except Exception as e:
d ['error'] = str (e)
return jsonify (d)

def get_value ():
return 0

def _parse_msg (query):
# should have form ?measure=name&field=f1, f2&event=a,b
pass
9 changes: 0 additions & 9 deletions distill/algorithms/stats/stats.py

This file was deleted.

65 changes: 45 additions & 20 deletions distill/app.py
@@ -1,11 +1,26 @@
# -*- coding: utf-8 -*-
#
# This file is part of Distill.
# Copyright 2016 The Charles Stark Draper Laboratory, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from flask import Flask, request, jsonify
from distill import app

from distill.models.brew import Brew
from distill.models.userale import UserAle
from distill.models.stout import Stout
from distill.exceptions import ValidationError
from distill.validation import validate_request
from distill.algorithms.stats.hist import Hist

@app.route ('/', methods=['GET'])
def index ():
Expand All @@ -14,7 +29,7 @@ def index ():
.. code-block:: bash
$ curl -XGET https://localhost:8000
$ curl -XGET https://localhost:8090
{
"author" : "Michelle Beard",
Expand Down Expand Up @@ -45,21 +60,22 @@ def create (app_id):
.. code-block:: bash
$ curl -XPOST https://localhost:8000/xdata_v3
$ curl -XPOST https://localhost:8090/xdata_v3
:param app_id: Application name
:return: Newly created application's status as JSON blob
"""
return Brew.create (app_id)

@app.route ('/status/<app_id>', methods=['GET'])
def status (app_id):
@app.route ('/status/<app_id>', defaults={"app_type" : None}, methods=['GET'])
@app.route ('/status/<app_id>/<app_type>', methods=['GET'])
def status (app_id, app_type):
"""
Presents meta information about an registered application, including field names and document types.
.. code-block:: bash
$ curl -XGET https://localhost:8000/status/xdata_v3
$ curl -XGET https://localhost:8090/status/xdata_v3
{
"application": "xdata_v3",
Expand All @@ -71,7 +87,7 @@ def status (app_id):
:param app_id: Application name
:return: Registered applications meta data as JSON blob
"""
return Brew.read (app_id)
return Brew.read (app_id, app_type=app_type)

@app.route ('/update/<app_id>', methods=['POST', 'PUT'])
def update (app_id):
Expand All @@ -80,7 +96,7 @@ def update (app_id):
.. code-block:: bash
$ curl -XPOST https://localhost:8000/update/xdata_v3?name="xdata_v4"
$ curl -XPOST https://localhost:8090/update/xdata_v3?name="xdata_v4"
:param app_id: Application name
:return: Boolean response message as JSON blob
Expand All @@ -94,7 +110,7 @@ def delete (app_id):
.. code-block:: bash
$ curl -XDELETE https://localhost:8000/xdata_v3
$ curl -XDELETE https://localhost:8090/xdata_v3
:param app_id: Application name
:return: Boolean response message as JSON blob
Expand All @@ -103,7 +119,7 @@ def delete (app_id):

@app.route ('/search/<app_id>', defaults={"app_type" : None}, methods=['GET'])
@app.route ('/search/<app_id>/<app_type>', methods=['GET'])
def search (app_id, app_type):
def segment (app_id, app_type):
"""
Search against an application on various fields.
Expand All @@ -119,26 +135,35 @@ def search (app_id, app_type):
:param fl: List of fields to restrict the result set
:return: JSON blob of result set
"""
return UserAle.select (app_id, app_type=app_type, params=q)
q = request.args
return UserAle.segment (app_id, app_type=app_type, params=q)

@app.route ('/stat/<app_id>', defaults={"app_type" : None}, methods=['GET'])
@app.route ('/stat/<app_id>/<app_type>', methods=['GET'])
def stat (app_id, app_type):
"""
.. warning:: Not implemented/available
Generic histogram counts for a single registered application filtered optionally by document type.
View the Statistics document page for method definitions and arguments
.. code-block:: bash
$ curl -XGET https://localhost:8000/xdata_v3/testing/?elem=signup&event=click
$ curl -XGET https://localhost:8090/stat/xdata_v3/testing/?stat=terms&elem=signup&event=click
:param app_id: Application name
:param app_type: Application type
:return: JSON blob of result set
"""
stat = request.args.get ('stat')
q = request.args
return jsonify (error='Not implemented')

hist_cls = Hist ()
method = None
try:
method = getattr (hist_cls, stat)
return method (app_id, app_type, q=q)
except AttributeError:
msg = "Class `{}` does not implement `{}`".format(hist_cls.__class__.__name__, stat)
return jsonify (error=msg)

@app.route ('/denoise/<app_id>', methods=['GET'])
def denoise (app_id):
Expand All @@ -150,10 +175,10 @@ def denoise (app_id):
.. code-block:: bash
$ curl -XGET https://localhost:8000/denoise/xdata_v3?save=true&type=parsed
$ curl -XGET https://localhost:8090/denoise/xdata_v3?save=true&type=parsed
:param app_id: Application name
:return: JSON blob of status
:return: [dict]
"""
doc_type = 'parsed'
save = False
Expand All @@ -176,7 +201,7 @@ def merge_stout ():
.. code-block:: bash
$ curl -XGET https://locahost:8000/stout/xdata_v3
$ curl -XGET https://locahost:8090/stout/xdata_v3
:return: Status message
"""
Expand Down
15 changes: 4 additions & 11 deletions distill/config.cfg
@@ -1,13 +1,3 @@
'''
distill: This package contains a flask app RESTful api for distill
This flask app exposes some restful api endpoints for querying User-ALE.
Very similar to Lucene syntax for basic query operations.
Copyright 2016, The Charles Stark Draper Laboratory
Licensed under Apache Software License.
'''

# Statement for enabling the development environment
DEBUG = True

Expand All @@ -25,14 +15,17 @@ MAPPINGS = '/Users/msb3399/Documents/xdata/stout/MOT_Mappings.csv'
SELECTED = '/Users/msb3399/Documents/xdata/stout/selected_vars_for_distill.csv'

# Elasticsearch Configuration
ES_HOST = 'http://localhost'
#ES_HOST = 'http://52.20.188.117'
#ES_HOST = 'http://msbx.draper.com'
ES_HOST = 'http://msbx.draper.com'
ES_PORT = 9200
HTTP_AUTH = None
USE_SSL = False
VERIFY_CERTS = False
CA_CERTS = None
CLIENT_CERT = None
CLIENT_KEY = None
TIMEOUT = 3

# Application threads. A common general assumption is
# using 2 per available processor cores - to handle
Expand Down
10 changes: 0 additions & 10 deletions distill/exceptions.py

This file was deleted.

0 comments on commit 47e4ea1

Please sign in to comment.