andrewm4894 · andrewm4894 · Oct 27, 2024 · Oct 26, 2024 · Oct 26, 2024 · Oct 26, 2024
diff --git a/.github/workflows/branch_deployments.yml b/.github/workflows/branch_deployments.yml
@@ -11,7 +11,8 @@ env:
   DAGSTER_CLOUD_URL: "http://andrewm4894.dagster.cloud"
   DAGSTER_CLOUD_API_TOKEN: ${{ secrets.DAGSTER_CLOUD_API_TOKEN }}
   ENABLE_FAST_DEPLOYS: 'true'
-  PYTHON_VERSION: '3.10'
+  PYTHON_VERSION: '3.11'
+  DAGSTER_CLOUD_WORKSPACE_PATH: 'workspace.yaml'
   DAGSTER_CLOUD_FILE: 'dagster_cloud.yaml'
 
 jobs:

diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
@@ -13,7 +13,8 @@ env:
   DAGSTER_CLOUD_URL: "http://andrewm4894.dagster.cloud"
   DAGSTER_CLOUD_API_TOKEN: ${{ secrets.DAGSTER_CLOUD_API_TOKEN }}
   ENABLE_FAST_DEPLOYS: 'true'
-  PYTHON_VERSION: '3.10'
+  PYTHON_VERSION: '3.11'
+  DAGSTER_CLOUD_WORKSPACE_PATH: 'workspace.yaml'
   DAGSTER_CLOUD_FILE: 'dagster_cloud.yaml'
 
 jobs:

diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml
@@ -24,10 +24,12 @@ jobs:
 
       - uses: actions/setup-python@v4
         with:
-          python-version: "3.10"
+          python-version: "3.11"
           cache: "pip"
           cache-dependency-path: "**/requirements*.txt"
       - uses: pre-commit/action@v3.0.0
+        with:
+          extra_args: --config .pre-commit-config.yaml
       - name: Post PR comment on failure
         if: failure() && github.event_name == 'pull_request_target'
         uses: peter-evans/create-or-update-comment@v2

diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml
@@ -14,15 +14,14 @@ jobs:
 
     steps:
     - uses: actions/checkout@v2
-    - name: Set up Python 3.10
+    - name: Set up Python 3.11
       uses: actions/setup-python@v2
       with:
-        python-version: "3.10"
+        python-version: "3.11"
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
         pip install pytest
-        pip install .
         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
         if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi
     - name: Test with pytest

diff --git a/.gitignore b/.gitignore
@@ -161,6 +161,7 @@ cython_debug/
 
 # dagster
 tmp*
+dagster.log
 
 # vscode
 .vscode/*

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -18,4 +18,16 @@ repos:
     rev: 5.12.0
     hooks:
       - id: isort
-        args: ["--profile", "black"]
+        args:
+          - "--profile"
+          - "black"
+  - repo: https://github.com/charliermarsh/ruff-pre-commit
+    rev: v0.0.241
+    hooks:
+      - id: ruff
+        args:
+          - "--fix"
+          - "--exclude"
+          - "metrics/defaults/python/prompt.py"
+          - "--exclude"
+          - "anomstack/llm/completion.py"
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -9,5 +9,14 @@
         "openai"
     ],
     "python.pythonPath": "venv\\Scripts\\python.exe",
-    "cloudcode.duetAI.inlineSuggestions.enableAuto": true
+    "cloudcode.duetAI.inlineSuggestions.enableAuto": true,
+    "sqltools.connections": [
+        {
+            "previewLimit": 50,
+            "driver": "SQLite",
+            "database": "${workspaceFolder:anomstack}/tmpdata/anomstack.db",
+            "name": "tmpdata/anomstack.db"
+        }
+    ],
+    "sqltools.useNodeRuntime": true
 }
diff --git a/Makefile b/Makefile
@@ -8,6 +8,8 @@ SHELL=/bin/bash
 .PHONY: tests
 .PHONY: docs
 .PHONY: requirements
+.PHONY: kill-locald
+.PHONY: ps-locald
 
 # start streamlit dashboard
 dashboard:
@@ -21,13 +23,21 @@ local:
 locald:
 	nohup dagster dev -f anomstack/main.py > dagster.log 2>&1 &
 
+# kill any running dagster process
+kill-locald:
+	kill -9 $(shell ps aux | grep dagster | grep -v grep | awk '{print $$2}')
+
+# list running dagster process
+ps-locald:
+	ps aux | grep dagster | grep -v grep
+
 # start docker containers
 docker:
 	docker compose up -d --build
 
 # pre-commit
 pre-commit:
-	pre-commit run --all-files
+	pre-commit run --all-files --config .pre-commit-config.yaml
 
 # run tests
 tests:

diff --git a/anomstack/alerts/asciiart.py b/anomstack/alerts/asciiart.py
@@ -2,6 +2,7 @@
 # copied from: https://raw.githubusercontent.com/kakwa/py-ascii-graph/09ca5901be94ec3563bdcc25d6396e18fd8ca5df/ascii_graph/__init__.py
 # copied from: https://raw.githubusercontent.com/nyurik/py-ascii-graph/fix-python310/ascii_graph/__init__.py
 """
+
 from __future__ import unicode_literals
 
 import copy
@@ -115,8 +116,7 @@ def __init__(
 
     @staticmethod
     def _len_noansi(string):
-        l = len(re.sub("\x1b[^m]*m", "", string))
-        return l
+        return len(re.sub("\x1b[^m]*m", "", string))
 
     def _trans_hr(self, value):
         if self.divider is None:
@@ -165,7 +165,6 @@ def _get_thresholds(self, data):
 
             # If we have a list of values for the item
             if isinstance(value, Iterable):
-                icount = 0
                 maxvalue = 0
                 minvalue = 0
                 for ivalue, icolor in value:
@@ -235,14 +234,12 @@ def _gen_graph_string_part(
         all_width = max_value + abs(min_neg_value)
 
         if all_width == 0:
-            bar_width = 0
             neg_width = 0
-            pos_width = 0
         else:
             neg_width = int(
                 abs(float(min_neg_value)) * float(graph_length) / float(all_width)
             )
-            pos_width = int(abs(max_value) * graph_length / all_width)
+            int(abs(max_value) * graph_length / all_width)
 
         if isinstance(value, Iterable):
             accuvalue = 0
@@ -372,14 +369,14 @@ def _sanitize_string(self, string):
         input_type = type(string)
         if input_type is str:
             if sys.version < "3":
-                info = unicode(string)
+                info = unicode(string)  # noqa: F821
             else:
                 info = string
         elif input_type is unicode_type:
             info = string
         elif input_type is int or input_type is float:
             if sys.version < "3":
-                info = unicode(string)
+                info = unicode(string)  # noqa: F821
             else:
                 info = str(string)
         return info
@@ -441,7 +438,7 @@ def graph(self, label=None, data=[]):
         san_data = self._sanitize_data(data)
         all_thre = self._get_thresholds(san_data)
 
-        if not label is None:
+        if label is not None:
             san_label = self._sanitize_string(label)
             label_len = self._len_noansi(san_label)
         else:
@@ -477,7 +474,7 @@ def graph(self, label=None, data=[]):
             # calcul of the real line length
             real_line_length = min_line_length
 
-        if not label is None:
+        if label is not None:
             result.append(san_label)
             result.append(Pyasciigraph._u(self.titlebar) * real_line_length)
 

diff --git a/anomstack/alerts/email.py b/anomstack/alerts/email.py
@@ -34,8 +34,10 @@ def send_email_with_plot(
         subject (str): The subject of the email.
         body (str): The body of the email.
         attachment_name (str): The name of the attachment.
-        threshold (float, optional): The threshold for the anomaly detection. Defaults to 0.8.
-        score_col (str, optional): The name of the column containing the anomaly scores. Defaults to 'metric_score_smooth'.
+        threshold (float, optional): The threshold for the anomaly detection.
+            Defaults to 0.8.
+        score_col (str, optional): The name of the column containing the
+            anomaly scores. Defaults to 'metric_score_smooth'.
 
     Returns:
         None
@@ -118,7 +120,7 @@ def send_email(
     with smtplib.SMTP(host, port) as server:
         server.connect(host, port)
         server.starttls(context=context)
-        server.login(sender, password)
+        server.login(sender, password) # type: ignore
         text = msg.as_string()
         server.sendmail(sender, to, text)
         server.quit()

diff --git a/anomstack/alerts/send.py b/anomstack/alerts/send.py
@@ -27,11 +27,16 @@ def send_alert(
         metric_name (str): The name of the metric.
         title (str): The title of the alert.
         df (pd.DataFrame): The data to be included in the alert.
-        alert_methods (str, optional): The alert methods to use, separated by commas. Defaults to 'email,slack'.
-        threshold (float, optional): The threshold for the alert. Defaults to 0.8.
-        description (str, optional): The description of the alert. Defaults to ''.
-        tags (list, optional): The tags to be included in the alert. Defaults to None.
-        score_col (str, optional): The column name of the score. Defaults to 'metric_score_smooth'.
+        alert_methods (str, optional): The alert methods to use, separated by
+            commas. Defaults to 'email,slack'.
+        threshold (float, optional): The threshold for the alert.
+            Defaults to 0.8.
+        description (str, optional): The description of the alert.
+            Defaults to ''.
+        tags (list, optional): The tags to be included in the alert.
+            Defaults to None.
+        score_col (str, optional): The column name of the score.
+            Defaults to 'metric_score_smooth'.
 
     Returns:
         pd.DataFrame: The input DataFrame.
@@ -69,9 +74,12 @@ def send_df(
     Args:
         title (str): The title of the alert.
         df (pd.DataFrame): The data to be included in the alert.
-        alert_methods (str, optional): The alert methods to use, separated by commas. Defaults to 'email,slack'.
-        description (str, optional): The description of the alert. Defaults to ''.
-        tags (list, optional): The tags to be included in the alert. Defaults to None.
+        alert_methods (str, optional): The alert methods to use, separated by
+            commas. Defaults to 'email,slack'.
+        description (str, optional): The description of the alert.
+            Defaults to ''.
+        tags (list, optional): The tags to be included in the alert.
+            Defaults to None.
 
     Returns:
         pd.DataFrame: The input DataFrame.

diff --git a/anomstack/alerts/slack.py b/anomstack/alerts/slack.py
@@ -18,7 +18,8 @@ def send_alert_slack(
     Args:
         title (str, optional): Title of the alert. Defaults to "alert".
         message (str, optional): Message of the alert. Defaults to "hello".
-        env_var_webhook_url (str, optional): Environment variable name for the webhook URL. Defaults to "ANOMSTACK_SLACK_WEBHOOK_URL".
+        env_var_webhook_url (str, optional): Environment variable name for the
+            webhook URL. Defaults to "ANOMSTACK_SLACK_WEBHOOK_URL".
 
     Returns:
         requests.Response: Response from the Slack API.

diff --git a/anomstack/df/save.py b/anomstack/df/save.py
@@ -18,9 +18,11 @@ def save_df(
 
     Args:
     - df: The Pandas DataFrame to save.
-    - db: The name of the database to save to. Must be one of 'bigquery', 'snowflake', or 'duckdb'.
+    - db: The name of the database to save to. Must be one of 'bigquery',
+        'snowflake', or 'duckdb'.
     - table_key: A string identifying the table to save to.
-    - if_exists: What to do if the table already exists. Must be one of 'fail', 'replace', or 'append'. Default is 'append'.
+    - if_exists: What to do if the table already exists. Must be one of
+        'fail', 'replace', or 'append'. Default is 'append'.
 
     Returns:
     - The Pandas DataFrame that was saved.

diff --git a/anomstack/df/wrangle.py b/anomstack/df/wrangle.py
@@ -12,7 +12,8 @@ def wrangle_df(df: pd.DataFrame, rounding: int = 4) -> pd.DataFrame:
 
     Args:
         df (pd.DataFrame): The DataFrame to be wrangled.
-        rounding (int, optional): The number of decimal places to round the 'metric_value' column to. Defaults to 4.
+        rounding (int, optional): The number of decimal places to round the
+            'metric_value' column to. Defaults to 4.
 
     Returns:
         pd.DataFrame: The wrangled DataFrame.
@@ -37,10 +38,12 @@ def wrangle_df(df: pd.DataFrame, rounding: int = 4) -> pd.DataFrame:
         ]
     ]
 
-    # if we have any nan metric_values then drop them and log how many nan rows we dropped
+    # if we have any nan metric_values then drop them and log how many
+    # nan rows we dropped
     if df["metric_value"].isnull().sum() > 0:
         logger.warning(
-            f"dropping {df['metric_value'].isnull().sum()} nan metric_value rows"
+            f"dropping {df['metric_value'].isnull().sum()} nan "
+            "metric_value rows"
         )
         df = df[~df["metric_value"].isnull()]