From 6bcadcad0009a070e1782c82485a7da94de50375 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 26 Dec 2022 03:27:39 +0000 Subject: [PATCH 1/7] Bump unstructured from 0.3.2 to 0.3.4 in /requirements Bumps [unstructured](https://github.com/Unstructured-IO/unstructured) from 0.3.2 to 0.3.4. - [Release notes](https://github.com/Unstructured-IO/unstructured/releases) - [Changelog](https://github.com/Unstructured-IO/unstructured/blob/main/CHANGELOG.md) - [Commits](https://github.com/Unstructured-IO/unstructured/compare/0.3.2...0.3.4) --- updated-dependencies: - dependency-name: unstructured dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements/base.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/requirements/base.txt b/requirements/base.txt index 8f2183872..c29bb6172 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -19,7 +19,9 @@ beautifulsoup4==4.11.1 bleach==5.0.1 # via nbconvert certifi==2022.12.7 - # via httpx + # via + # httpx + # unstructured click==8.1.3 # via # nltk @@ -182,7 +184,7 @@ typing-extensions==4.3.0 # via # mypy # pydantic -unstructured==0.3.2 +unstructured==0.3.4 # via -r base.in unstructured-api-tools==0.4.6 # via -r base.in From 8e1d4558dbd1cb433319cdb51ce38926d9466d51 Mon Sep 17 00:00:00 2001 From: Matt Robinson Date: Mon, 26 Dec 2022 09:28:40 -0500 Subject: [PATCH 2/7] regenerate api --- prepline_emails/api/app.py | 4 +- .../api/{hello_world.py => email.py} | 39 +++++++------------ 2 files changed, 15 insertions(+), 28 deletions(-) rename prepline_emails/api/{hello_world.py => email.py} (82%) diff --git a/prepline_emails/api/app.py b/prepline_emails/api/app.py index c7169bb14..6bc68dc0c 100644 --- a/prepline_emails/api/app.py +++ b/prepline_emails/api/app.py @@ -10,7 +10,7 @@ from slowapi.errors import RateLimitExceeded from slowapi.util import get_remote_address -from .hello_world import router as hello_world_router +from .email import router as email_router limiter = Limiter(key_func=get_remote_address) @@ -18,7 +18,7 @@ app.state.limiter = limiter app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler) -app.include_router(hello_world_router) +app.include_router(email_router) @app.get("/healthcheck", status_code=status.HTTP_200_OK) diff --git a/prepline_emails/api/hello_world.py b/prepline_emails/api/email.py similarity index 82% rename from prepline_emails/api/hello_world.py rename to prepline_emails/api/email.py index 2e28be681..1a13d05e9 100644 --- a/prepline_emails/api/hello_world.py +++ b/prepline_emails/api/email.py @@ -21,16 +21,8 @@ RATE_LIMIT = os.environ.get("PIPELINE_API_RATE_LIMIT", "1/second") -# pipeline-api -message = "hello world" - - -def pipeline_api( - file, - file_content_type=None, - m_some_parameters=[], -): - return f"{message}: {' '.join(m_some_parameters)}" +def pipeline_api(text): + pass import json @@ -99,17 +91,16 @@ async def stream_response(self, send: Send) -> None: await send({"type": "http.response.body", "body": b"", "more_body": False}) -@router.post("/emails/v0.0.1/hello-world") +@router.post("/emails/v0.0.1/email") @limiter.limit(RATE_LIMIT) async def pipeline_1( request: Request, - files: Union[List[UploadFile], None] = File(default=None), - some_parameters: List[str] = Form(default=[]), + text_files: Union[List[UploadFile], None] = File(default=None), ): content_type = request.headers.get("Accept") - if isinstance(files, list) and len(files): - if len(files) > 1: + if isinstance(text_files, list) and len(text_files): + if len(text_files) > 1: if content_type and content_type not in ["*/*", "multipart/mixed"]: return PlainTextResponse( content=( @@ -120,14 +111,12 @@ async def pipeline_1( ) def response_generator(): - for file in files: + for file in text_files: - _file = file.file + text = file.file.read().decode("utf-8") response = pipeline_api( - _file, - m_some_parameters=some_parameters, - file_content_type=file.content_type, + text, ) if type(response) not in [str, bytes]: response = json.dumps(response) @@ -138,20 +127,18 @@ def response_generator(): ) else: - file = files[0] - _file = file.file + text_file = text_files[0] + text = text_file.file.read().decode("utf-8") response = pipeline_api( - _file, - m_some_parameters=some_parameters, - file_content_type=file.content_type, + text, ) return response else: return PlainTextResponse( - content='Request parameter "files" is required.\n', + content='Request parameter "text_files" is required.\n', status_code=status.HTTP_400_BAD_REQUEST, ) From 528f500f56820ef25dedfed4666b2a26b44bea65 Mon Sep 17 00:00:00 2001 From: Matt Robinson Date: Mon, 26 Dec 2022 09:38:20 -0500 Subject: [PATCH 3/7] added a test --- requirements/base.in | 1 + requirements/base.txt | 31 ++++++++++++++++++++++----- requirements/dev.txt | 42 ++++++++++++++++++++++++++++--------- requirements/test.txt | 24 +++++++++++++-------- test_emails/api/test_app.py | 11 ++++++++++ 5 files changed, 85 insertions(+), 24 deletions(-) create mode 100644 test_emails/api/test_app.py diff --git a/requirements/base.in b/requirements/base.in index a32a9ee31..9c986acfd 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -2,3 +2,4 @@ unstructured>=0.2.4 unstructured-api-tools>=0.4.4 ratelimit +requests diff --git a/requirements/base.txt b/requirements/base.txt index c29bb6172..6ac17693a 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -1,6 +1,6 @@ # -# This file is autogenerated by pip-compile with python 3.8 -# To update, run: +# This file is autogenerated by pip-compile with Python 3.8 +# by the following command: # # pip-compile requirements/base.in # @@ -21,7 +21,10 @@ bleach==5.0.1 certifi==2022.12.7 # via # httpx + # requests # unstructured +charset-normalizer==2.1.1 + # via requests click==8.1.3 # via # nltk @@ -50,7 +53,12 @@ httpx==0.15.5 idna==3.4 # via # anyio + # requests # rfc3986 +importlib-metadata==5.2.0 + # via nbconvert +importlib-resources==5.10.1 + # via jsonschema jinja2==3.1.2 # via # nbconvert @@ -112,6 +120,8 @@ pandas==1.5.2 # via argilla pandocfilters==1.5.0 # via nbconvert +pkgutil-resolve-name==1.3.10 + # via jsonschema pydantic==1.10.2 # via # argilla @@ -137,9 +147,11 @@ pyyaml==6.0 pyzmq==24.0.1 # via jupyter-client ratelimit==2.2.1 - # via -r base.in + # via -r requirements/base.in regex==2022.10.31 # via nltk +requests==2.28.1 + # via -r requirements/base.in rfc3986[idna2008]==1.5.0 # via httpx six==1.16.0 @@ -161,6 +173,8 @@ starlette==0.20.4 # via fastapi tinycss2==1.1.1 # via nbconvert +tomli==2.0.1 + # via mypy tornado==6.2 # via jupyter-client tqdm==4.64.1 @@ -184,10 +198,13 @@ typing-extensions==4.3.0 # via # mypy # pydantic + # starlette unstructured==0.3.4 - # via -r base.in + # via -r requirements/base.in unstructured-api-tools==0.4.6 - # via -r base.in + # via -r requirements/base.in +urllib3==1.26.13 + # via requests uvicorn[standard]==0.18.3 # via unstructured-api-tools uvloop==0.17.0 @@ -204,3 +221,7 @@ wrapt==1.13.3 # via # argilla # deprecated +zipp==3.11.0 + # via + # importlib-metadata + # importlib-resources diff --git a/requirements/dev.txt b/requirements/dev.txt index 04d68622c..da910216b 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -1,9 +1,13 @@ # -# This file is autogenerated by pip-compile with python 3.8 -# To update, run: +# This file is autogenerated by pip-compile with Python 3.8 +# by the following command: # # pip-compile requirements/dev.in # +appnope==0.1.3 + # via + # ipykernel + # ipython argon2-cffi==21.3.0 # via notebook argon2-cffi-bindings==21.2.0 @@ -21,7 +25,7 @@ backcall==0.2.0 beautifulsoup4==4.11.1 # via nbconvert black==22.12.0 - # via -r dev.in + # via -r requirements/dev.in bleach==5.0.1 # via nbconvert build==0.8.0 @@ -52,9 +56,13 @@ fastcore==1.5.27 fastjsonschema==2.16.2 # via nbformat flake8==5.0.4 - # via -r dev.in + # via -r requirements/dev.in ghapi==1.0.3 # via nbdev +importlib-metadata==5.2.0 + # via nbconvert +importlib-resources==5.10.1 + # via jsonschema ipykernel==6.15.3 # via # ipywidgets @@ -64,7 +72,7 @@ ipykernel==6.15.3 # qtconsole ipython==8.7.0 # via - # -r dev.in + # -r requirements/dev.in # execnb # ipykernel # ipywidgets @@ -84,7 +92,7 @@ jinja2==3.1.2 jsonschema==4.16.0 # via nbformat jupyter==1.0.0 - # via -r dev.in + # via -r requirements/dev.in jupyter-client==7.3.5 # via # ipykernel @@ -120,7 +128,7 @@ mccabe==0.7.0 mistune==2.0.4 # via nbconvert mypy==0.991 - # via -r dev.in + # via -r requirements/dev.in mypy-extensions==0.4.3 # via # black @@ -132,7 +140,7 @@ nbconvert==7.0.0 # jupyter # notebook nbdev==2.3.9 - # via -r dev.in + # via -r requirements/dev.in nbformat==5.6.0 # via # nbclient @@ -167,7 +175,9 @@ pexpect==4.8.0 pickleshare==0.7.5 # via ipython pip-tools==6.12.1 - # via -r dev.in + # via -r requirements/dev.in +pkgutil-resolve-name==1.3.10 + # via jsonschema platformdirs==2.5.2 # via black prometheus-client==0.14.1 @@ -230,6 +240,12 @@ terminado==0.15.0 # via notebook tinycss2==1.1.1 # via nbconvert +tomli==2.0.1 + # via + # black + # build + # mypy + # pep517 tornado==6.2 # via # ipykernel @@ -250,7 +266,9 @@ traitlets==5.4.0 # notebook # qtconsole typing-extensions==4.3.0 - # via mypy + # via + # black + # mypy watchdog==2.1.9 # via nbdev wcwidth==0.2.5 @@ -265,6 +283,10 @@ wheel==0.37.1 # pip-tools widgetsnbextension==4.0.3 # via ipywidgets +zipp==3.11.0 + # via + # importlib-metadata + # importlib-resources # The following packages are considered to be unsafe in a requirements file: # pip diff --git a/requirements/test.txt b/requirements/test.txt index 323fc017f..846e7198d 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -1,27 +1,27 @@ # -# This file is autogenerated by pip-compile with python 3.8 -# To update, run: +# This file is autogenerated by pip-compile with Python 3.8 +# by the following command: # # pip-compile requirements/test.in # attrs==22.1.0 # via pytest black==22.12.0 - # via -r test.in + # via -r requirements/test.in click==8.1.3 # via - # -r test.in + # -r requirements/test.in # black coverage[toml]==6.4.4 # via pytest-cov flake8==5.0.4 - # via -r test.in + # via -r requirements/test.in iniconfig==1.1.1 # via pytest mccabe==0.7.0 # via flake8 mypy==0.991 - # via -r test.in + # via -r requirements/test.in mypy-extensions==0.4.3 # via # black @@ -45,8 +45,14 @@ pyparsing==3.0.9 pytest==7.1.3 # via pytest-cov pytest-cov==4.0.0 - # via -r test.in + # via -r requirements/test.in tomli==2.0.1 - # via pytest + # via + # black + # coverage + # mypy + # pytest typing-extensions==4.3.0 - # via mypy + # via + # black + # mypy diff --git a/test_emails/api/test_app.py b/test_emails/api/test_app.py new file mode 100644 index 000000000..5ba9ecf11 --- /dev/null +++ b/test_emails/api/test_app.py @@ -0,0 +1,11 @@ +from fastapi.testclient import TestClient + + +from prepline_emails.api.app import app + + +def test_emails_api_health_check(): + client = TestClient(app) + response = client.get("/healthcheck") + + assert response.status_code == 200 From aee5e726110270f72a8529b763b3fdf20924ce64 Mon Sep 17 00:00:00 2001 From: Matt Robinson Date: Mon, 26 Dec 2022 10:08:08 -0500 Subject: [PATCH 4/7] temporarily disable coverage --- .github/workflows/ci.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d2e13b304..03f9c244a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -71,7 +71,8 @@ jobs: source .venv/bin/activate sudo apt-get install --yes poppler-utils make test - make check-coverage + # NOTE(robinson) - Add check-coverage back in once we add real tests + # make check-coverage make check-notebooks changelog: From 7c9508f93092e394fcb27e607c02c557b6e05338 Mon Sep 17 00:00:00 2001 From: Matt Robinson Date: Mon, 26 Dec 2022 10:08:58 -0500 Subject: [PATCH 5/7] remove codeql until the repo is public --- .github/workflows/codeql-analysis.yml | 74 --------------------------- 1 file changed, 74 deletions(-) delete mode 100644 .github/workflows/codeql-analysis.yml diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml deleted file mode 100644 index dbf93afae..000000000 --- a/.github/workflows/codeql-analysis.yml +++ /dev/null @@ -1,74 +0,0 @@ -# For most projects, this workflow file will not need changing; you simply need -# to commit it to your repository. -# -# You may wish to alter this file to override the set of languages analyzed, -# or to provide custom queries or build logic. -# -# ******** NOTE ******** -# We have attempted to detect the languages in your repository. Please check -# the `language` matrix defined below to confirm you have the correct set of -# supported CodeQL languages. -# -name: "CodeQL" - -on: - push: - branches: [ "main" ] - pull_request: - # The branches below must be a subset of the branches above - branches: [ "main" ] - schedule: - - cron: '23 21 * * 3' - -jobs: - analyze: - name: Analyze - runs-on: ubuntu-latest - permissions: - actions: read - contents: read - security-events: write - - strategy: - fail-fast: false - matrix: - language: [ 'python' ] - # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] - # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support - - steps: - - name: Checkout repository - uses: actions/checkout@v3 - - # Initializes the CodeQL tools for scanning. - - name: Initialize CodeQL - uses: github/codeql-action/init@v2 - with: - languages: ${{ matrix.language }} - # If you wish to specify custom queries, you can do so here or in a config file. - # By default, queries listed here will override any specified in a config file. - # Prefix the list here with "+" to use these queries and those in the config file. - - # Details on CodeQL's query packs refer to : https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs - # queries: security-extended,security-and-quality - - - # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). - # If this step fails, then you should remove it and run the build manually (see below) - - name: Autobuild - uses: github/codeql-action/autobuild@v2 - - # â„šī¸ Command-line programs to run using the OS shell. - # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun - - # If the Autobuild fails above, remove it and uncomment the following three lines. - # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. - - # - run: | - # echo "Run, Build Application using script" - # ./location_of_script_within_repo/buildscript.sh - - - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v2 - with: - category: "/language:${{matrix.language}}"' From 553c1d1314ec2518854ea1d20a27c5f54859102b Mon Sep 17 00:00:00 2001 From: Matt Robinson Date: Mon, 26 Dec 2022 10:12:56 -0500 Subject: [PATCH 6/7] only check pipeline notebooks --- pipeline-notebooks/pipeline-email.ipynb | 14 +------------- scripts/check-and-format-notebooks.py | 5 ++--- 2 files changed, 3 insertions(+), 16 deletions(-) diff --git a/pipeline-notebooks/pipeline-email.ipynb b/pipeline-notebooks/pipeline-email.ipynb index 3bbf54d34..594a0645d 100644 --- a/pipeline-notebooks/pipeline-email.ipynb +++ b/pipeline-notebooks/pipeline-email.ipynb @@ -15,21 +15,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "python3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.15" } }, "nbformat": 4, diff --git a/scripts/check-and-format-notebooks.py b/scripts/check-and-format-notebooks.py index e360b71d8..ffee5ae30 100755 --- a/scripts/check-and-format-notebooks.py +++ b/scripts/check-and-format-notebooks.py @@ -26,9 +26,8 @@ def nb_paths(root_path: Union[str, Path]) -> List[Path]: return [ fn for dir in root_path.iterdir() - # NOTE(alan): Search only in paths with 'notebooks' in the title such as pipeline-notebooks - # and exploration-notebooks - if "notebooks" in dir.stem and dir.is_dir() + # NOTE(robinson): Only check format for pipeline notebooks + if dir.stem == "pipeline-notebooks" and dir.is_dir() for fn in dir.iterdir() if fn.suffix == ".ipynb" ] From 6c613db7694bbfd443198f18abc88113221c4778 Mon Sep 17 00:00:00 2001 From: Matt Robinson Date: Mon, 26 Dec 2022 10:15:20 -0500 Subject: [PATCH 7/7] regenerate api --- prepline_emails/api/email.py | 1 + 1 file changed, 1 insertion(+) diff --git a/prepline_emails/api/email.py b/prepline_emails/api/email.py index 1a13d05e9..68631d74b 100644 --- a/prepline_emails/api/email.py +++ b/prepline_emails/api/email.py @@ -21,6 +21,7 @@ RATE_LIMIT = os.environ.get("PIPELINE_API_RATE_LIMIT", "1/second") +# pipeline-api def pipeline_api(text): pass