add src

hastic-zzz · Apr 27, 2020 · 8734258 · 8734258
1 parent a7f045f
commit 8734258
Show file tree

Hide file tree

Showing 58 changed files with 4,526 additions and 1 deletion.
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,2 @@
+__pycache__
+.vscode
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,5 @@
+build/
+dist/
+*.spec
+__pycache__/
+test/
diff --git a/.vscode/.env b/.vscode/.env
@@ -0,0 +1 @@
+PYTHONPATH=analytics
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -0,0 +1,32 @@
+{
+  // Use IntelliSense to learn about possible attributes.
+  // Hover to view descriptions of existing attributes.
+  // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+  "version": "0.2.0",
+  "configurations": [
+    {
+      "name": "Attach (Remote Debug)",
+      "type": "python",
+      "request": "attach",
+      "port": 5679,
+      "host": "localhost",
+      "pathMappings": [
+        {
+          "localRoot": "${workspaceFolder}",
+          "remoteRoot": "/var/www/analytics"
+        }
+      ]
+    },
+    {
+      "name": "Python: Current File",
+      "type": "python",
+      "request": "launch",
+      "windows": {
+        "program": "${workspaceFolder}\\bin\\server"
+      },
+      "linux": {
+        "program": "${workspaceFolder}/bin/server"
+      }
+    }
+  ]
+}
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,22 @@
+{  
+  "terminal.integrated.shell.windows": "C:\\WINDOWS\\System32\\WindowsPowerShell\\v1.0\\powershell.exe",
+  "editor.insertSpaces": true,
+  "files.eol": "\n",
+  "files.exclude": {
+    "**/__pycache__/": true,
+    "dist": true,
+    "build": true
+  },
+  "[python]": {
+    "editor.tabSize": 4,
+  },
+  "python.envFile": "${workspaceFolder}/.vscode/.env",
+  "python.pythonPath": "python",
+  "python.linting.enabled": true,
+  "python.testing.unittestArgs": [ "-v" ],
+  "python.testing.pytestEnabled": false,
+  "python.testing.nosetestsEnabled": false,
+  "python.testing.unittestEnabled": true,
+  "python.linting.pylintEnabled": true,
+  "python.jediEnabled": false
+}
diff --git a/Codestyle.md b/Codestyle.md
@@ -0,0 +1,27 @@
+# Type hints
+
+Please use: https://www.python.org/dev/peps/pep-0484/
+
+# Line endings
+
+We use LF everywhere
+
+# Imports
+
+You import local files first, than spesific liba and then standart libs.
+So you import from something very scecific to something very common.
+It allows you to pay attention on most important things from beginning.
+
+```
+
+from data_provider import DataProvider
+from anomaly_model import AnomalyModel
+from pattern_detection_model import PatternDetectionModel
+
+import numpy as np
+
+from scipy.signal import argrelextrema
+
+import pickle
+
+```
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,12 @@
+FROM python:3.6.6
+
+COPY requirements.txt /requirements.txt
+
+RUN pip install -r /requirements.txt
+
+WORKDIR /var/www/analytics
+
+COPY . /var/www/analytics/
+
+
+CMD ["python", "-u", "bin/server"]
diff --git a/README.md b/README.md
@@ -1 +1,12 @@
-# analytics
+# Hastic-server-analytics
+
+Python service which gets tasks from [hastic-server-node](https://github.com/hastic/hastic-server/tree/master/server) like
+
+* trains statistical models
+* detect patterns in time series data
+
+## Arhitecture
+
+The service uses [asyncio](https://docs.python.org/3/library/asyncio.html), 
+[concurrency](https://docs.python.org/3.6/library/concurrent.futures.html#module-concurrent.futures) and 
+[pyzmq](https://pyzmq.readthedocs.io/en/latest/). 
diff --git a/analytics/analytic_types/__init__.py b/analytics/analytic_types/__init__.py
@@ -0,0 +1,39 @@
+"""
+It is the place where we put all classes and types
+common for all analytics code
+
+For example, if you write someting which is used
+in analytic_unit_manager, it should be here.
+
+If you create something spicific which is used only in one place, 
+like PatternDetectionCache, then it should not be here.
+"""
+
+import pandas as pd
+from typing import Union, List, Tuple
+
+AnalyticUnitId = str
+
+ModelCache = dict
+
+# TODO: explicit timestamp / value
+TimeSeries = List[Tuple[int, float]]
+
+"""
+Example:
+
+tsis = TimeSeriesIndex(['2017-12-31 16:00:00-08:00', '2017-12-31 17:00:00-08:00', '2017-12-31 18:00:00-08:00'])
+ts = TimeSeries([4, 5, 6], tsis)
+"""
+Timestamp = Union[str, pd.Timestamp]
+
+class TimeSeriesIndex(pd.DatetimeIndex):
+    def __new__(cls, *args, **kwargs):
+        return pd.DatetimeIndex.__new__(cls, *args, **kwargs)
+
+# TODO: make generic type for values. See List definition for example of generic class
+# TODO: constructor from DataFrame
+# TODO: repleace TimeSeries (above) with this class: rename TimeSeries2 to TimeSeries
+class TimeSeries2(pd.Series):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
diff --git a/analytics/analytic_types/cache.py b/analytics/analytic_types/cache.py
@@ -0,0 +1,38 @@
+from typing import Optional, List, Dict
+
+from analytic_types.segment import AnomalyDetectorSegment
+from analytic_types.detector import Bound
+
+from utils.meta import JSONClass, SerializableList
+
+@JSONClass
+class AnomalyCache:
+    def __init__(
+        self,
+        alpha: float,
+        confidence: float,
+        enable_bounds: str,
+        seasonality: Optional[int] = None,
+        segments: Optional[List[Dict]] = None,
+        time_step: Optional[int] = None,
+    ):
+        self.alpha = alpha
+        self.confidence = confidence
+        self.enable_bounds = enable_bounds
+        if seasonality != None and seasonality < 0:
+            raise ValueError(f'Can`t create AnomalyCache: got invalid seasonality {seasonality}')
+        self.seasonality = seasonality
+        self.time_step = time_step
+        if segments != None:
+            anomaly_segments = map(AnomalyDetectorSegment.from_json, segments)
+            self.segments = SerializableList(anomaly_segments)
+        else:
+            self.segments = []
+
+    def set_segments(self, segments: List[AnomalyDetectorSegment]):
+        if len(segments) > 0:
+            self.segments = SerializableList(segments)
+
+    def get_enabled_bounds(self) -> Bound:
+        #TODO: use class with to_json()
+        return Bound(self.enable_bounds)
diff --git a/analytics/analytic_types/data_bucket.py b/analytics/analytic_types/data_bucket.py
@@ -0,0 +1,14 @@
+import pandas as pd
+
+
+class DataBucket:
+
+    def __init__(self):
+        self.data = pd.DataFrame([], columns=['timestamp', 'value'])
+
+    def receive_data(self, data: pd.DataFrame):
+        self.data = self.data.append(data, ignore_index=True)
+
+    def drop_data(self, count: int):
+        if count > 0:
+            self.data = self.data.iloc[count:]
diff --git a/analytics/analytic_types/detector.py b/analytics/analytic_types/detector.py
@@ -0,0 +1,47 @@
+from analytic_types import ModelCache, TimeSeries
+from analytic_types.segment import Segment
+
+from enum import Enum
+from typing import List, Optional, Tuple
+
+import utils.meta
+
+class Bound(Enum):
+    ALL = 'ALL'
+    UPPER = 'UPPER'
+    LOWER = 'LOWER'
+
+class DetectionResult:
+
+    def __init__(
+        self,
+        cache: Optional[ModelCache] = None,
+        segments: Optional[List[Segment]] = None,
+        last_detection_time: int = None
+    ):
+        if cache is None:
+            cache = {}
+        if segments is None:
+            segments = []
+        self.cache = cache
+        self.segments = segments
+        self.last_detection_time = last_detection_time
+
+    # TODO: use @utils.meta.JSONClass (now it can't serialize list of objects)
+    def to_json(self):
+        return {
+            'cache': self.cache,
+            'segments': list(map(lambda segment: segment.to_json(), self.segments)),
+            'lastDetectionTime': self.last_detection_time
+        }
+
+@utils.meta.JSONClass
+class ProcessingResult():
+
+    def __init__(
+        self,
+        lower_bound: Optional[TimeSeries] = None,
+        upper_bound: Optional[TimeSeries] = None,
+    ):
+        self.lower_bound = lower_bound
+        self.upper_bound = upper_bound
diff --git a/analytics/analytic_types/learning_info.py b/analytics/analytic_types/learning_info.py
@@ -0,0 +1,17 @@
+import utils.meta
+
+@utils.meta.JSONClass
+class LearningInfo:
+
+    def __init__(self):
+        super().__init__()
+        self.confidence = []
+        self.patterns_list = []
+        self.pattern_width = []
+        self.pattern_height = []
+        self.pattern_timestamp = []
+        self.segment_center_list = []
+        self.patterns_value = []
+
+    def __str__(self):
+        return str(self.to_json())
diff --git a/analytics/analytic_types/segment.py b/analytics/analytic_types/segment.py
@@ -0,0 +1,57 @@
+from typing import Optional
+
+import utils.meta
+
+@utils.meta.JSONClass
+class Segment:
+    '''
+    Used for segment manipulation instead of { 'from': ..., 'to': ... } dict
+    '''
+
+    def __init__(
+        self,
+        from_timestamp: int,
+        to_timestamp: int,
+        _id: Optional[str] = None,
+        analytic_unit_id: Optional[str] = None,
+        labeled: Optional[bool] = None,
+        deleted: Optional[bool] = None,
+        message: Optional[str] = None
+    ):
+        if to_timestamp < from_timestamp:
+            raise ValueError(f'Can`t create segment with to < from: {to_timestamp} < {from_timestamp}')
+        self.from_timestamp = from_timestamp
+        self.to_timestamp = to_timestamp
+        self._id = _id
+        self.analytic_unit_id = analytic_unit_id
+        self.labeled = labeled
+        self.deleted = deleted
+        self.message = message
+
+@utils.meta.JSONClass
+class AnomalyDetectorSegment(Segment):
+    '''
+    Used for segment manipulation instead of { 'from': ..., 'to': ..., 'data': ... } dict
+    '''
+
+    def __init__(
+        self,
+        from_timestamp: int,
+        to_timestamp: int,
+        data = [],
+        _id: Optional[str] = None,
+        analytic_unit_id: Optional[str] = None,
+        labeled: Optional[bool] = None,
+        deleted: Optional[bool] = None,
+        message: Optional[str] = None
+    ):
+        super().__init__(
+            from_timestamp,
+            to_timestamp,
+            _id,
+            analytic_unit_id,
+            labeled,
+            deleted,
+            message
+        )
+        self.data = data