Skip to content

Commit

Permalink
add src
Browse files Browse the repository at this point in the history
  • Loading branch information
corpglory-dev committed Apr 27, 2020
1 parent a7f045f commit 8734258
Show file tree
Hide file tree
Showing 58 changed files with 4,526 additions and 1 deletion.
2 changes: 2 additions & 0 deletions .dockerignore
@@ -0,0 +1,2 @@
__pycache__
.vscode
5 changes: 5 additions & 0 deletions .gitignore
@@ -0,0 +1,5 @@
build/
dist/
*.spec
__pycache__/
test/
1 change: 1 addition & 0 deletions .vscode/.env
@@ -0,0 +1 @@
PYTHONPATH=analytics
32 changes: 32 additions & 0 deletions .vscode/launch.json
@@ -0,0 +1,32 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Attach (Remote Debug)",
"type": "python",
"request": "attach",
"port": 5679,
"host": "localhost",
"pathMappings": [
{
"localRoot": "${workspaceFolder}",
"remoteRoot": "/var/www/analytics"
}
]
},
{
"name": "Python: Current File",
"type": "python",
"request": "launch",
"windows": {
"program": "${workspaceFolder}\\bin\\server"
},
"linux": {
"program": "${workspaceFolder}/bin/server"
}
}
]
}
22 changes: 22 additions & 0 deletions .vscode/settings.json
@@ -0,0 +1,22 @@
{
"terminal.integrated.shell.windows": "C:\\WINDOWS\\System32\\WindowsPowerShell\\v1.0\\powershell.exe",
"editor.insertSpaces": true,
"files.eol": "\n",
"files.exclude": {
"**/__pycache__/": true,
"dist": true,
"build": true
},
"[python]": {
"editor.tabSize": 4,
},
"python.envFile": "${workspaceFolder}/.vscode/.env",
"python.pythonPath": "python",
"python.linting.enabled": true,
"python.testing.unittestArgs": [ "-v" ],
"python.testing.pytestEnabled": false,
"python.testing.nosetestsEnabled": false,
"python.testing.unittestEnabled": true,
"python.linting.pylintEnabled": true,
"python.jediEnabled": false
}
27 changes: 27 additions & 0 deletions Codestyle.md
@@ -0,0 +1,27 @@
# Type hints

Please use: https://www.python.org/dev/peps/pep-0484/

# Line endings

We use LF everywhere

# Imports

You import local files first, than spesific liba and then standart libs.
So you import from something very scecific to something very common.
It allows you to pay attention on most important things from beginning.

```
from data_provider import DataProvider
from anomaly_model import AnomalyModel
from pattern_detection_model import PatternDetectionModel
import numpy as np
from scipy.signal import argrelextrema
import pickle
```
12 changes: 12 additions & 0 deletions Dockerfile
@@ -0,0 +1,12 @@
FROM python:3.6.6

COPY requirements.txt /requirements.txt

RUN pip install -r /requirements.txt

WORKDIR /var/www/analytics

COPY . /var/www/analytics/


CMD ["python", "-u", "bin/server"]
13 changes: 12 additions & 1 deletion README.md 100644 → 100755
@@ -1 +1,12 @@
# analytics
# Hastic-server-analytics

Python service which gets tasks from [hastic-server-node](https://github.com/hastic/hastic-server/tree/master/server) like

* trains statistical models
* detect patterns in time series data

## Arhitecture

The service uses [asyncio](https://docs.python.org/3/library/asyncio.html),
[concurrency](https://docs.python.org/3.6/library/concurrent.futures.html#module-concurrent.futures) and
[pyzmq](https://pyzmq.readthedocs.io/en/latest/).
39 changes: 39 additions & 0 deletions analytics/analytic_types/__init__.py
@@ -0,0 +1,39 @@
"""
It is the place where we put all classes and types
common for all analytics code
For example, if you write someting which is used
in analytic_unit_manager, it should be here.
If you create something spicific which is used only in one place,
like PatternDetectionCache, then it should not be here.
"""

import pandas as pd
from typing import Union, List, Tuple

AnalyticUnitId = str

ModelCache = dict

# TODO: explicit timestamp / value
TimeSeries = List[Tuple[int, float]]

"""
Example:
tsis = TimeSeriesIndex(['2017-12-31 16:00:00-08:00', '2017-12-31 17:00:00-08:00', '2017-12-31 18:00:00-08:00'])
ts = TimeSeries([4, 5, 6], tsis)
"""
Timestamp = Union[str, pd.Timestamp]

class TimeSeriesIndex(pd.DatetimeIndex):
def __new__(cls, *args, **kwargs):
return pd.DatetimeIndex.__new__(cls, *args, **kwargs)

# TODO: make generic type for values. See List definition for example of generic class
# TODO: constructor from DataFrame
# TODO: repleace TimeSeries (above) with this class: rename TimeSeries2 to TimeSeries
class TimeSeries2(pd.Series):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
38 changes: 38 additions & 0 deletions analytics/analytic_types/cache.py
@@ -0,0 +1,38 @@
from typing import Optional, List, Dict

from analytic_types.segment import AnomalyDetectorSegment
from analytic_types.detector import Bound

from utils.meta import JSONClass, SerializableList

@JSONClass
class AnomalyCache:
def __init__(
self,
alpha: float,
confidence: float,
enable_bounds: str,
seasonality: Optional[int] = None,
segments: Optional[List[Dict]] = None,
time_step: Optional[int] = None,
):
self.alpha = alpha
self.confidence = confidence
self.enable_bounds = enable_bounds
if seasonality != None and seasonality < 0:
raise ValueError(f'Can`t create AnomalyCache: got invalid seasonality {seasonality}')
self.seasonality = seasonality
self.time_step = time_step
if segments != None:
anomaly_segments = map(AnomalyDetectorSegment.from_json, segments)
self.segments = SerializableList(anomaly_segments)
else:
self.segments = []

def set_segments(self, segments: List[AnomalyDetectorSegment]):
if len(segments) > 0:
self.segments = SerializableList(segments)

def get_enabled_bounds(self) -> Bound:
#TODO: use class with to_json()
return Bound(self.enable_bounds)
14 changes: 14 additions & 0 deletions analytics/analytic_types/data_bucket.py
@@ -0,0 +1,14 @@
import pandas as pd


class DataBucket:

def __init__(self):
self.data = pd.DataFrame([], columns=['timestamp', 'value'])

def receive_data(self, data: pd.DataFrame):
self.data = self.data.append(data, ignore_index=True)

def drop_data(self, count: int):
if count > 0:
self.data = self.data.iloc[count:]
47 changes: 47 additions & 0 deletions analytics/analytic_types/detector.py
@@ -0,0 +1,47 @@
from analytic_types import ModelCache, TimeSeries
from analytic_types.segment import Segment

from enum import Enum
from typing import List, Optional, Tuple

import utils.meta

class Bound(Enum):
ALL = 'ALL'
UPPER = 'UPPER'
LOWER = 'LOWER'

class DetectionResult:

def __init__(
self,
cache: Optional[ModelCache] = None,
segments: Optional[List[Segment]] = None,
last_detection_time: int = None
):
if cache is None:
cache = {}
if segments is None:
segments = []
self.cache = cache
self.segments = segments
self.last_detection_time = last_detection_time

# TODO: use @utils.meta.JSONClass (now it can't serialize list of objects)
def to_json(self):
return {
'cache': self.cache,
'segments': list(map(lambda segment: segment.to_json(), self.segments)),
'lastDetectionTime': self.last_detection_time
}

@utils.meta.JSONClass
class ProcessingResult():

def __init__(
self,
lower_bound: Optional[TimeSeries] = None,
upper_bound: Optional[TimeSeries] = None,
):
self.lower_bound = lower_bound
self.upper_bound = upper_bound
17 changes: 17 additions & 0 deletions analytics/analytic_types/learning_info.py
@@ -0,0 +1,17 @@
import utils.meta

@utils.meta.JSONClass
class LearningInfo:

def __init__(self):
super().__init__()
self.confidence = []
self.patterns_list = []
self.pattern_width = []
self.pattern_height = []
self.pattern_timestamp = []
self.segment_center_list = []
self.patterns_value = []

def __str__(self):
return str(self.to_json())
57 changes: 57 additions & 0 deletions analytics/analytic_types/segment.py
@@ -0,0 +1,57 @@
from typing import Optional

import utils.meta

@utils.meta.JSONClass
class Segment:
'''
Used for segment manipulation instead of { 'from': ..., 'to': ... } dict
'''

def __init__(
self,
from_timestamp: int,
to_timestamp: int,
_id: Optional[str] = None,
analytic_unit_id: Optional[str] = None,
labeled: Optional[bool] = None,
deleted: Optional[bool] = None,
message: Optional[str] = None
):
if to_timestamp < from_timestamp:
raise ValueError(f'Can`t create segment with to < from: {to_timestamp} < {from_timestamp}')
self.from_timestamp = from_timestamp
self.to_timestamp = to_timestamp
self._id = _id
self.analytic_unit_id = analytic_unit_id
self.labeled = labeled
self.deleted = deleted
self.message = message

@utils.meta.JSONClass
class AnomalyDetectorSegment(Segment):
'''
Used for segment manipulation instead of { 'from': ..., 'to': ..., 'data': ... } dict
'''

def __init__(
self,
from_timestamp: int,
to_timestamp: int,
data = [],
_id: Optional[str] = None,
analytic_unit_id: Optional[str] = None,
labeled: Optional[bool] = None,
deleted: Optional[bool] = None,
message: Optional[str] = None
):
super().__init__(
from_timestamp,
to_timestamp,
_id,
analytic_unit_id,
labeled,
deleted,
message
)
self.data = data

0 comments on commit 8734258

Please sign in to comment.