diff --git a/README.md b/README.md
index 1703e97f4..8e721f12d 100644
--- a/README.md
+++ b/README.md
@@ -167,7 +167,7 @@ for PDFs and Images, which are `pdf`, `jpg` and `png`. Again, please note that t
 You can specify the encoding to use to decode the text input. If no value is provided, utf-8 will be used.
 
 ```
-curl -X 'POST' 
+curl -X 'POST' \
  'https://api.unstructured.io/general/v0/general' \
  -H 'accept: application/json'  \
  -H 'Content-Type: multipart/form-data' \
@@ -176,6 +176,23 @@ curl -X 'POST'
  | jq -C . | less -R
 ```
 
+#### Gzipped files
+
+You can send gzipped file and api will un-gzip it. 
+
+```
+curl -X 'POST' \
+ 'https://api.unstructured.io/general/v0/general' \
+ -H 'accept: application/json'  \
+ -H 'Content-Type: multipart/form-data' \
+ -F 'gz_uncompressed_content_type=application/pdf' \
+ -F 'files=@sample-docs/layout-parser-paper.pdf.gz' 
+```
+
+If field `gz_uncompressed_content_type` is set, the API will use its value as content-type of all files
+after uncompressing the .gz files that are sent in single batch. If not set, the API will use
+various heuristics to detect the filetypes after uncompressing from .gz.
+
 #### XML Tags
 
 When processing XML documents, set the `xml_keep_tags` parameter to `true` to retain the XML tags in the output. If not specified, it will simply extract the text from within the tags.
diff --git a/prepline_general/api/general.py b/prepline_general/api/general.py
index 57deeaada..3e3393872 100644
--- a/prepline_general/api/general.py
+++ b/prepline_general/api/general.py
@@ -749,11 +749,11 @@ def general_partition(
     chunking_strategy = _validate_chunking_strategy(form_params.chunking_strategy)
 
     # -- unzip any uploaded files that need it --
-    for file_index in range(len(files)):
-        if files[file_index].content_type == "application/gzip":
-            files[file_index] = ungz_file(
-                files[file_index], form_params.gz_uncompressed_content_type
-            )
+    for idx, file in enumerate(files):
+        is_content_type_gz = file.content_type == "application/gzip"
+        is_extension_gz = file.filename and file.filename.endswith(".gz")
+        if is_content_type_gz or is_extension_gz:
+            files[idx] = ungz_file(file, form_params.gz_uncompressed_content_type)
 
     def response_generator(is_multipart: bool):
         for file in files:
diff --git a/requirements/base.txt b/requirements/base.txt
index 1e9d118f4..8407de450 100644
--- a/requirements/base.txt
+++ b/requirements/base.txt
@@ -19,6 +19,7 @@ beautifulsoup4==4.12.3
 certifi==2024.2.2
     # via
     #   requests
+    #   unstructured
     #   unstructured-client
 cffi==1.16.0
     # via cryptography
@@ -28,6 +29,7 @@ charset-normalizer==3.3.2
     # via
     #   pdfminer-six
     #   requests
+    #   unstructured
     #   unstructured-client
 click==8.1.3
     # via
@@ -38,13 +40,15 @@ coloredlogs==15.0.1
     # via onnxruntime
 contourpy==1.2.0
     # via matplotlib
-cryptography==42.0.4
+cryptography==42.0.5
     # via pdfminer-six
 cycler==0.12.1
     # via matplotlib
 dataclasses-json==0.6.4
-    # via unstructured
-dataclasses-json-speakeasy==0.5.11
+    # via
+    #   unstructured
+    #   unstructured-client
+deepdiff==6.7.1
     # via unstructured-client
 deprecated==1.2.14
     # via pikepdf
@@ -56,7 +60,7 @@ et-xmlfile==1.1.0
     # via openpyxl
 exceptiongroup==1.2.0
     # via anyio
-fastapi==0.109.2
+fastapi==0.110.0
     # via -r requirements/base.in
 filelock==3.13.1
     # via
@@ -65,7 +69,7 @@ filelock==3.13.1
     #   transformers
 filetype==1.2.0
     # via unstructured
-flatbuffers==23.5.26
+flatbuffers==24.3.7
     # via onnxruntime
 fonttools==4.49.0
     # via matplotlib
@@ -75,7 +79,7 @@ fsspec==2024.2.0
     #   torch
 h11==0.14.0
     # via uvicorn
-huggingface-hub==0.20.3
+huggingface-hub==0.21.4
     # via
     #   timm
     #   tokenizers
@@ -112,10 +116,9 @@ markdown==3.5.2
     # via unstructured
 markupsafe==2.1.5
     # via jinja2
-marshmallow==3.20.2
+marshmallow==3.21.1
     # via
     #   dataclasses-json
-    #   dataclasses-json-speakeasy
     #   unstructured-client
 matplotlib==3.8.3
     # via pycocotools
@@ -163,7 +166,9 @@ opencv-python==4.9.0.80
     #   unstructured-inference
 openpyxl==3.1.2
     # via unstructured
-packaging==23.2
+ordered-set==4.1.0
+    # via deepdiff
+packaging==24.0
     # via
     #   huggingface-hub
     #   marshmallow
@@ -174,7 +179,7 @@ packaging==23.2
     #   transformers
     #   unstructured-client
     #   unstructured-pytesseract
-pandas==2.2.0
+pandas==2.2.1
     # via
     #   layoutparser
     #   unstructured
@@ -182,11 +187,11 @@ pdf2image==1.17.0
     # via
     #   layoutparser
     #   unstructured
-pdfminer-six==20221105
+pdfminer-six==20231228
     # via
     #   pdfplumber
     #   unstructured
-pdfplumber==0.10.4
+pdfplumber==0.11.0
     # via layoutparser
 pikepdf==8.13.0
     # via unstructured
@@ -218,23 +223,24 @@ pycparser==2.21
     # via cffi
 pycryptodome==3.20.0
     # via -r requirements/base.in
-pydantic==2.6.1
+pydantic==2.6.4
     # via fastapi
-pydantic-core==2.16.2
+pydantic-core==2.16.3
     # via pydantic
 pypandoc==1.13
     # via unstructured
-pyparsing==3.1.1
+pyparsing==3.1.2
     # via matplotlib
-pypdf==4.0.2
+pypdf==4.1.0
     # via
     #   -r requirements/base.in
     #   unstructured
-pypdfium2==4.27.0
+    #   unstructured-client
+pypdfium2==4.28.0
     # via pdfplumber
 pytesseract==0.3.10
     # via layoutparser
-python-dateutil==2.8.2
+python-dateutil==2.9.0.post0
     # via
     #   matplotlib
     #   pandas
@@ -258,7 +264,7 @@ pyyaml==6.0.1
     #   omegaconf
     #   timm
     #   transformers
-rapidfuzz==3.6.1
+rapidfuzz==3.6.2
     # via
     #   unstructured
     #   unstructured-inference
@@ -287,7 +293,7 @@ six==1.16.0
     #   langdetect
     #   python-dateutil
     #   unstructured-client
-sniffio==1.3.0
+sniffio==1.3.1
     # via anyio
 soupsieve==2.5
     # via beautifulsoup4
@@ -322,7 +328,7 @@ tqdm==4.66.2
     #   transformers
 transformers==4.37.1
     # via unstructured-inference
-typing-extensions==4.9.0
+typing-extensions==4.10.0
     # via
     #   anyio
     #   fastapi
@@ -339,13 +345,14 @@ typing-extensions==4.9.0
 typing-inspect==0.9.0
     # via
     #   dataclasses-json
-    #   dataclasses-json-speakeasy
     #   unstructured-client
 tzdata==2024.1
     # via pandas
-unstructured[local-inference]==0.12.4
-    # via -r requirements/base.in
-unstructured-client==0.18.0
+unstructured[local-inference]==0.12.5
+    # via
+    #   -r requirements/base.in
+    #   unstructured
+unstructured-client==0.21.1
     # via unstructured
 unstructured-inference==0.7.23
     # via unstructured
@@ -355,7 +362,7 @@ urllib3==2.2.1
     # via
     #   requests
     #   unstructured-client
-uvicorn==0.27.1
+uvicorn==0.28.0
     # via -r requirements/base.in
 wrapt==1.16.0
     # via
diff --git a/requirements/test.in b/requirements/test.in
index 6ad55c2d9..c507ed49d 100644
--- a/requirements/test.in
+++ b/requirements/test.in
@@ -11,3 +11,4 @@ pytest-mock
 nbdev
 jupyter
 httpx
+deepdiff
diff --git a/requirements/test.txt b/requirements/test.txt
index 2d9b79e89..b94dd2b69 100644
--- a/requirements/test.txt
+++ b/requirements/test.txt
@@ -92,9 +92,11 @@ contourpy==1.2.0
     # via
     #   -r requirements/base.txt
     #   matplotlib
-coverage[toml]==7.4.2
-    # via pytest-cov
-cryptography==42.0.4
+coverage[toml]==7.4.3
+    # via
+    #   coverage
+    #   pytest-cov
+cryptography==42.0.5
     # via
     #   -r requirements/base.txt
     #   pdfminer-six
@@ -106,14 +108,16 @@ dataclasses-json==0.6.4
     # via
     #   -r requirements/base.txt
     #   unstructured
-dataclasses-json-speakeasy==0.5.11
-    # via
-    #   -r requirements/base.txt
     #   unstructured-client
 debugpy==1.8.1
     # via ipykernel
 decorator==5.1.1
     # via ipython
+deepdiff==6.7.1
+    # via
+    #   -r requirements/base.txt
+    #   -r requirements/test.in
+    #   unstructured-client
 defusedxml==0.7.1
     # via nbconvert
 deprecated==1.2.14
@@ -142,7 +146,7 @@ execnb==0.1.5
     # via nbdev
 executing==2.0.1
     # via stack-data
-fastapi==0.109.2
+fastapi==0.110.0
     # via -r requirements/base.txt
 fastcore==1.5.29
     # via
@@ -163,7 +167,7 @@ filetype==1.2.0
     #   unstructured
 flake8==7.0.0
     # via -r requirements/test.in
-flatbuffers==23.5.26
+flatbuffers==24.3.7
     # via
     #   -r requirements/base.txt
     #   onnxruntime
@@ -191,7 +195,7 @@ httpx==0.27.0
     # via
     #   -r requirements/test.in
     #   jupyterlab
-huggingface-hub==0.20.3
+huggingface-hub==0.21.4
     # via
     #   -r requirements/base.txt
     #   timm
@@ -216,14 +220,14 @@ iopath==0.1.10
     # via
     #   -r requirements/base.txt
     #   layoutparser
-ipykernel==6.29.2
+ipykernel==6.29.3
     # via
     #   ipywidgets
     #   jupyter
     #   jupyter-console
     #   jupyterlab
     #   qtconsole
-ipython==8.22.1
+ipython==8.22.2
     # via
     #   execnb
     #   ipykernel
@@ -249,7 +253,7 @@ joblib==1.3.2
     # via
     #   -r requirements/base.txt
     #   nltk
-json5==0.9.17
+json5==0.9.22
     # via jupyterlab-server
 jsonpath-python==1.0.6
     # via
@@ -266,7 +270,7 @@ jsonschema-specifications==2023.12.1
     # via jsonschema
 jupyter==1.0.0
     # via -r requirements/test.in
-jupyter-client==8.6.0
+jupyter-client==8.6.1
     # via
     #   ipykernel
     #   jupyter-console
@@ -275,7 +279,7 @@ jupyter-client==8.6.0
     #   qtconsole
 jupyter-console==6.6.3
     # via jupyter
-jupyter-core==5.7.1
+jupyter-core==5.7.2
     # via
     #   ipykernel
     #   jupyter-client
@@ -286,24 +290,24 @@ jupyter-core==5.7.1
     #   nbconvert
     #   nbformat
     #   qtconsole
-jupyter-events==0.9.0
+jupyter-events==0.9.1
     # via jupyter-server
-jupyter-lsp==2.2.2
+jupyter-lsp==2.2.4
     # via jupyterlab
-jupyter-server==2.12.5
+jupyter-server==2.13.0
     # via
     #   jupyter-lsp
     #   jupyterlab
     #   jupyterlab-server
     #   notebook
     #   notebook-shim
-jupyter-server-terminals==0.5.2
+jupyter-server-terminals==0.5.3
     # via jupyter-server
-jupyterlab==4.1.2
+jupyterlab==4.1.4
     # via notebook
 jupyterlab-pygments==0.3.0
     # via nbconvert
-jupyterlab-server==2.25.3
+jupyterlab-server==2.25.4
     # via
     #   jupyterlab
     #   notebook
@@ -320,6 +324,7 @@ langdetect==1.0.9
 layoutparser[layoutmodels,tesseract]==0.3.4
     # via
     #   -r requirements/base.txt
+    #   layoutparser
     #   unstructured-inference
 lxml==5.1.0
     # via
@@ -337,11 +342,10 @@ markupsafe==2.1.5
     #   -r requirements/base.txt
     #   jinja2
     #   nbconvert
-marshmallow==3.20.2
+marshmallow==3.21.1
     # via
     #   -r requirements/base.txt
     #   dataclasses-json
-    #   dataclasses-json-speakeasy
     #   unstructured-client
 matplotlib==3.8.3
     # via
@@ -363,7 +367,7 @@ msg-parser==1.2.0
     # via
     #   -r requirements/base.txt
     #   unstructured
-mypy==1.8.0
+mypy==1.9.0
     # via -r requirements/test.in
 mypy-extensions==1.0.0
     # via
@@ -374,13 +378,13 @@ mypy-extensions==1.0.0
     #   unstructured-client
 nbclient==0.9.0
     # via nbconvert
-nbconvert==7.16.1
+nbconvert==7.16.2
     # via
     #   jupyter
     #   jupyter-server
 nbdev==2.3.13
     # via -r requirements/test.in
-nbformat==5.9.2
+nbformat==5.10.2
     # via
     #   jupyter-server
     #   nbclient
@@ -396,7 +400,7 @@ nltk==3.8.1
     # via
     #   -r requirements/base.txt
     #   unstructured
-notebook==7.1.0
+notebook==7.1.1
     # via jupyter
 notebook-shim==0.2.4
     # via
@@ -443,9 +447,13 @@ openpyxl==3.1.2
     # via
     #   -r requirements/base.txt
     #   unstructured
+ordered-set==4.1.0
+    # via
+    #   -r requirements/base.txt
+    #   deepdiff
 overrides==7.7.0
     # via jupyter-server
-packaging==23.2
+packaging==24.0
     # via
     #   -r requirements/base.txt
     #   black
@@ -468,7 +476,7 @@ packaging==23.2
     #   transformers
     #   unstructured-client
     #   unstructured-pytesseract
-pandas==2.2.0
+pandas==2.2.1
     # via
     #   -r requirements/base.txt
     #   layoutparser
@@ -484,12 +492,12 @@ pdf2image==1.17.0
     #   -r requirements/base.txt
     #   layoutparser
     #   unstructured
-pdfminer-six==20221105
+pdfminer-six==20231228
     # via
     #   -r requirements/base.txt
     #   pdfplumber
     #   unstructured
-pdfplumber==0.10.4
+pdfplumber==0.11.0
     # via
     #   -r requirements/base.txt
     #   layoutparser
@@ -559,11 +567,11 @@ pycparser==2.21
     #   cffi
 pycryptodome==3.20.0
     # via -r requirements/base.txt
-pydantic==2.6.1
+pydantic==2.6.4
     # via
     #   -r requirements/base.txt
     #   fastapi
-pydantic-core==2.16.2
+pydantic-core==2.16.3
     # via
     #   -r requirements/base.txt
     #   pydantic
@@ -579,15 +587,16 @@ pypandoc==1.13
     # via
     #   -r requirements/base.txt
     #   unstructured
-pyparsing==3.1.1
+pyparsing==3.1.2
     # via
     #   -r requirements/base.txt
     #   matplotlib
-pypdf==4.0.2
+pypdf==4.1.0
     # via
     #   -r requirements/base.txt
     #   unstructured
-pypdfium2==4.27.0
+    #   unstructured-client
+pypdfium2==4.28.0
     # via
     #   -r requirements/base.txt
     #   pdfplumber
@@ -595,7 +604,7 @@ pytesseract==0.3.10
     # via
     #   -r requirements/base.txt
     #   layoutparser
-pytest==8.0.1
+pytest==8.1.1
     # via
     #   pytest-cov
     #   pytest-mock
@@ -603,7 +612,7 @@ pytest-cov==4.1.0
     # via -r requirements/test.in
 pytest-mock==3.12.0
     # via -r requirements/test.in
-python-dateutil==2.8.2
+python-dateutil==2.9.0.post0
     # via
     #   -r requirements/base.txt
     #   arrow
@@ -658,7 +667,7 @@ qtconsole==5.5.1
     # via jupyter
 qtpy==2.4.1
     # via qtconsole
-rapidfuzz==3.6.1
+rapidfuzz==3.6.2
     # via
     #   -r requirements/base.txt
     #   unstructured
@@ -717,7 +726,7 @@ six==1.16.0
     #   python-dateutil
     #   rfc3339-validator
     #   unstructured-client
-sniffio==1.3.0
+sniffio==1.3.1
     # via
     #   -r requirements/base.txt
     #   anyio
@@ -741,7 +750,7 @@ tabulate==0.9.0
     # via
     #   -r requirements/base.txt
     #   unstructured
-terminado==0.18.0
+terminado==0.18.1
     # via
     #   jupyter-server
     #   jupyter-server-terminals
@@ -790,7 +799,7 @@ tqdm==4.66.2
     #   iopath
     #   nltk
     #   transformers
-traitlets==5.14.1
+traitlets==5.14.2
     # via
     #   comm
     #   ipykernel
@@ -811,9 +820,9 @@ transformers==4.37.1
     # via
     #   -r requirements/base.txt
     #   unstructured-inference
-types-python-dateutil==2.8.19.20240106
+types-python-dateutil==2.8.19.20240311
     # via arrow
-typing-extensions==4.9.0
+typing-extensions==4.10.0
     # via
     #   -r requirements/base.txt
     #   anyio
@@ -835,15 +844,16 @@ typing-inspect==0.9.0
     # via
     #   -r requirements/base.txt
     #   dataclasses-json
-    #   dataclasses-json-speakeasy
     #   unstructured-client
 tzdata==2024.1
     # via
     #   -r requirements/base.txt
     #   pandas
-unstructured[local-inference]==0.12.4
-    # via -r requirements/base.txt
-unstructured-client==0.18.0
+unstructured[local-inference]==0.12.5
+    # via
+    #   -r requirements/base.txt
+    #   unstructured
+unstructured-client==0.21.1
     # via
     #   -r requirements/base.txt
     #   unstructured
@@ -862,7 +872,7 @@ urllib3==2.2.1
     #   -r requirements/base.txt
     #   requests
     #   unstructured-client
-uvicorn==0.27.1
+uvicorn==0.28.0
     # via -r requirements/base.txt
 watchdog==4.0.0
     # via nbdev
@@ -876,7 +886,7 @@ webencodings==0.5.1
     #   tinycss2
 websocket-client==1.7.0
     # via jupyter-server
-wheel==0.42.0
+wheel==0.43.0
     # via astunparse
 widgetsnbextension==4.0.10
     # via ipywidgets
diff --git a/scripts/smoketest.py b/scripts/smoketest.py
index 342ce62f9..7096031b5 100644
--- a/scripts/smoketest.py
+++ b/scripts/smoketest.py
@@ -1,11 +1,15 @@
+import io
 import os
 import time
+import gzip
+import shutil
 from pathlib import Path
+from typing import List, Optional
+import tempfile
 
 import pytest
 import requests
 import pandas as pd
-import io
 
 API_URL = "http://localhost:8000/general/v0/general"
 # NOTE(rniko): Skip inference tests if we're running on an emulated architecture
@@ -13,25 +17,34 @@
 
 
 def send_document(
-    filename,
-    content_type,
-    strategy="auto",
-    output_format="application/json",
-    pdf_infer_table_structure="false",
+    filenames: List[str],
+    filenames_gzipped: Optional[List[str]] = None,
+    content_type: str = "",
+    strategy: str = "auto",
+    output_format: str = "application/json",
+    pdf_infer_table_structure: str = "false",
+    uncompressed_content_type: str = "",
 ):
-    # Note: `content_type` is not passed into request since fast API will overwrite it.
-    if str(filename).endswith(".gz"):
-        files = {"files": (str(filename), open(filename, "rb"), "application/gzip")}
-    else:
-        files = {"files": (str(filename), open(filename, "rb"))}
+    if filenames_gzipped is None:
+        filenames_gzipped = []
+    files = []
+    for filename in filenames:
+        files.append(("files", (str(filename), open(filename, "rb"), content_type)))
+    for filename in filenames_gzipped:
+        files.append(("files", (str(filename), open(filename, "rb"), "application/gzip")))
+
+    options = {
+        "strategy": strategy,
+        "output_format": output_format,
+        "pdf_infer_table_structure": pdf_infer_table_structure,
+    }
+    if uncompressed_content_type:
+        options["gz_uncompressed_content_type"] = uncompressed_content_type
+
     return requests.post(
         API_URL,
         files=files,
-        data={
-            "strategy": strategy,
-            "output_format": output_format,
-            "pdf_infer_table_structure": pdf_infer_table_structure,
-        },
+        data=options,
     )
 
 
@@ -84,25 +97,100 @@ def send_document(
         ("layout-parser-paper.pdf.gz", "application/gzip"),
     ],
 )
-def test_happy_path(example_filename, content_type):
+def test_happy_path(example_filename: str, content_type: str):
     """
     For the files in sample-docs, verify that we get a 200
     and some structured response
     """
-    test_file = Path("sample-docs") / example_filename
+    test_file = str(Path("sample-docs") / example_filename)
     print(f"sending {content_type}")
-    json_response = send_document(test_file, content_type)
+    json_response = send_document(filenames=[test_file], content_type=content_type)
     assert json_response.status_code == 200
     assert len(json_response.json()) > 0
     assert len("".join(elem["text"] for elem in json_response.json())) > 20
 
-    csv_response = send_document(test_file, content_type, output_format="text/csv")
+    csv_response = send_document(
+        filenames=[test_file], content_type=content_type, output_format="text/csv"
+    )
     assert csv_response.status_code == 200
     assert len(csv_response.text) > 0
     df = pd.read_csv(io.StringIO(csv_response.text))
     assert len(df) == len(json_response.json())
 
 
+@pytest.mark.parametrize("output_format", ["application/json", "text/csv"])
+@pytest.mark.parametrize(
+    "filenames_to_gzip, filenames_verbatim, uncompressed_content_type",
+    [
+        (["fake-html.html"], [], "text/html"),
+        (["stanley-cups.csv"], [], "application/csv"),
+        (["fake.doc"], [], "application/msword"),
+        # compressed and uncompressed
+        (["layout-parser-paper-fast.pdf"], ["list-item-example.pdf"], "application/pdf"),
+        (["fake-email.eml"], ["fake-email-image-embedded.eml"], "message/rfc822"),
+        # compressed and uncompressed
+        # empty content-type means that API should detect filetype after decompressing.
+        (["layout-parser-paper-fast.pdf"], ["list-item-example.pdf"], ""),
+        (["fake-email.eml"], ["fake-email-image-embedded.eml"], ""),
+    ],
+)
+def test_gzip_sending(
+    output_format: str,
+    filenames_to_gzip: List[str],
+    filenames_verbatim: List[str],
+    uncompressed_content_type: str,
+):
+    temp_files = {}
+
+    for filename in filenames_to_gzip:
+        gz_file_extension = f"{Path(filename).suffix}.gz"
+        temp_file = tempfile.NamedTemporaryFile(suffix=gz_file_extension)
+        full_path = Path("sample-docs") / filename
+        gzip_file(str(full_path), temp_file.name)
+        temp_files[filename] = temp_file
+    filenames_gzipped = [temp_file.name for temp_file in temp_files.values()]
+
+    filenames = []
+    for filename in filenames_verbatim:
+        filenames.append(str(Path("sample-docs") / filename))
+
+    json_response = send_document(
+        filenames,
+        filenames_gzipped,
+        content_type=uncompressed_content_type,
+        uncompressed_content_type=uncompressed_content_type,
+    )
+    assert json_response.status_code == 200, json_response.text
+    json_content = json_response.json()
+    assert len(json_content) > 0
+    if len(filenames_gzipped + filenames) > 1:
+        for file in json_content:
+            assert len("".join(elem["text"] for elem in file)) > 20
+    else:
+        assert len("".join(elem["text"] for elem in json_content)) > 20
+
+    csv_response = send_document(
+        filenames,
+        filenames_gzipped,
+        content_type=uncompressed_content_type,
+        uncompressed_content_type=uncompressed_content_type,
+        output_format="text/csv",
+    )
+    assert csv_response.status_code == 200
+    assert len(csv_response.text) > 0
+    df = pd.read_csv(io.StringIO(csv_response.text))
+    if len(filenames_gzipped + filenames) > 1:
+        json_size = 0
+        for file in json_content:
+            json_size += len(file)
+        assert len(df) == json_size
+    else:
+        assert len(df) == len(json_content)
+
+    for filename in filenames_to_gzip:
+        temp_files[filename].close()
+
+
 @pytest.mark.skipif(skip_inference_tests, reason="emulated architecture")
 def test_strategy_performance():
     """
@@ -110,22 +198,24 @@ def test_strategy_performance():
     is significantly faster than the hi_res strategy
     """
     performance_ratio = 4
-    test_file = Path("sample-docs") / "layout-parser-paper.pdf"
+    test_file = str(Path("sample-docs") / "layout-parser-paper.pdf")
 
     start_time = time.monotonic()
-    response = send_document(test_file, content_type="application/pdf", strategy="hi_res")
+    response = send_document(
+        filenames=[test_file], content_type="application/pdf", strategy="hi_res"
+    )
     hi_res_time = time.monotonic() - start_time
     assert response.status_code == 200
 
     start_time = time.monotonic()
-    response = send_document(test_file, content_type="application/pdf", strategy="auto")
+    response = send_document(filenames=[test_file], content_type="application/pdf", strategy="auto")
     auto_time = time.monotonic() - start_time
     assert response.status_code == 200
 
     assert hi_res_time > performance_ratio * auto_time
 
     start_time = time.monotonic()
-    response = send_document(test_file, content_type="application/pdf", strategy="fast")
+    response = send_document(filenames=[test_file], content_type="application/pdf", strategy="fast")
     fast_time = time.monotonic() - start_time
     assert response.status_code == 200
 
@@ -142,14 +232,14 @@ def test_strategy_performance():
         ("hi_res", "False", 0),
     ],
 )
-def test_table_support(strategy, pdf_infer_table_structure, expected_table_num):
+def test_table_support(strategy: str, pdf_infer_table_structure: str, expected_table_num: int):
     """
     Test that table extraction works on hi_res strategy
     """
-    test_file = Path("sample-docs") / "layout-parser-paper.pdf"
+    test_file = str(Path("sample-docs") / "layout-parser-paper.pdf")
     response = send_document(
-        test_file,
-        "application/pdf",
+        filenames=[test_file],
+        content_type="application/pdf",
         strategy=strategy,
         pdf_infer_table_structure=pdf_infer_table_structure,
     )
@@ -166,3 +256,9 @@ def test_table_support(strategy, pdf_infer_table_structure, expected_table_num):
         # Note(austin) - table output has changed - this line isn't returned
         # assert "Layouts of scanned modern magazines and scientific reports" in extracted_tables[0]
         assert "Layouts of history" in extracted_tables[0]
+
+
+def gzip_file(in_filepath: str, out_filepath: str):
+    with open(in_filepath, "rb") as f_in:
+        with gzip.open(out_filepath, "wb", compresslevel=1) as f_out:
+            shutil.copyfileobj(f_in, f_out)
diff --git a/scripts/version-sync.sh b/scripts/version-sync.sh
index e8888efa5..4a62d26e3 100755
--- a/scripts/version-sync.sh
+++ b/scripts/version-sync.sh
@@ -13,12 +13,12 @@ function usage {
 }
 
 function getopts-extra () {
-    declare i=1
+    declare -i i=1
     # if the next argument is not an option, then append it to array OPTARG
     while [[ ${OPTIND} -le $# && ${!OPTIND:0:1} != '-' ]]; do
         OPTARG[i]=${!OPTIND}
-        i+=1
-        OPTIND+=1
+        ((i += 1))
+        ((OPTIND += 1))
     done
 }
 
diff --git a/test_general/api/test_gzip.py b/test_general/api/test_gzip.py
new file mode 100644
index 000000000..84459a6c3
--- /dev/null
+++ b/test_general/api/test_gzip.py
@@ -0,0 +1,164 @@
+import gzip
+import shutil
+import io
+import tempfile
+from pathlib import Path
+from typing import List
+
+import httpx
+import pandas as pd
+import pytest
+from fastapi.testclient import TestClient
+from deepdiff import DeepDiff
+
+from prepline_general.api.app import app
+
+MAIN_API_ROUTE = "general/v0/general"
+
+
+@pytest.mark.parametrize("output_format", ["application/json", "text/csv"])
+@pytest.mark.parametrize(
+    "filenames_to_gzip, filenames_verbatim, uncompressed_content_type",
+    [
+        (["fake-html.html"], [], "text/html"),
+        (["stanley-cups.csv"], [], "application/csv"),
+        (["fake.doc"], [], "application/msword"),
+        (["layout-parser-paper-fast.pdf"], [], "application/pdf"),
+        (["fake-email-attachment.eml", "fake-email.eml"], [], "message/rfc822"),
+        (
+            ["fake-email-attachment.eml", "fake-email.eml", "announcement.eml"],
+            [],
+            "message/rfc822",
+        ),
+        (["layout-parser-paper-fast.pdf", "list-item-example.pdf"], [], "application/pdf"),
+        # now the same but without explicit content type
+        # to make the system guess the un-gzipped type based on content.
+        (["fake-html.html"], [], ""),
+        (["fake-email-attachment.eml", "fake-email.eml"], [], ""),
+        (["layout-parser-paper-fast.pdf", "list-item-example.pdf"], [], ""),
+        # mix of compressed and uncompressed
+        (["layout-parser-paper-fast.pdf"], ["list-item-example.pdf"], "application/pdf"),
+        # mix of compressed and uncompressed, and guessing of content type
+        (["layout-parser-paper-fast.pdf"], ["list-item-example.pdf"], ""),
+        # have to use OCR which is slow, so minimum cases
+        (["embedded-images-tables.jpg"], ["english-and-korean.png"], "image/png"),
+        (["embedded-images-tables.jpg"], ["english-and-korean.png"], ""),
+    ],
+)
+def test_gzipped_files_are_parsed_like_original(
+    output_format: str,
+    filenames_to_gzip: List[str],
+    filenames_verbatim: List[str],
+    uncompressed_content_type: str,
+):
+    """
+    Verify that API supports un-gzipping and correctly interprets gz_uncompressed_content_type,
+    by comparing response to directly parsing the same files.
+    The one thing which changes is the filenames in metadata, which have to be ignored.
+    """
+    client = TestClient(app)
+    gz_options = {
+        "gz_uncompressed_content_type": (
+            uncompressed_content_type if uncompressed_content_type else None
+        ),
+        "output_format": output_format,
+    }
+    response1 = get_gzipped_response(
+        client, filenames_to_gzip, filenames_verbatim, gz_options, uncompressed_content_type
+    )
+    response2 = call_api(
+        client,
+        [],
+        filenames_to_gzip + filenames_verbatim,
+        uncompressed_content_type,
+        {"output_format": output_format},
+    )
+    compare_responses(
+        response1, response2, output_format, len(filenames_to_gzip + filenames_verbatim)
+    )
+
+
+def compare_responses(
+    response1: httpx.Response, response2: httpx.Response, output_format: str, files_count: int
+) -> None:
+    if output_format == "application/json":
+        if files_count == 1:
+            exclude_regex_paths = r"root\[\d+\]\['metadata'\]\['filename'\]"
+        else:
+            exclude_regex_paths = r"root\[\d+\]\[\d+\]\['metadata'\]\['filename'\]"
+        diff = DeepDiff(
+            t1=response1.json(),
+            t2=response2.json(),
+            exclude_regex_paths=exclude_regex_paths,
+        )
+        assert len(diff) == 0
+    else:
+        df1 = pd.read_csv(io.StringIO(response1.text))
+        df2 = pd.read_csv(io.StringIO(response2.text))
+        diff = DeepDiff(
+            t1=df1.to_dict(), t2=df2.to_dict(), exclude_regex_paths=r"root\['filename'\]\[\d+\]"
+        )
+        assert len(diff) == 0
+
+
+def call_api(
+    client: TestClient,
+    filenames_gzipped: List[str],
+    filenames_verbatim: List[str],
+    content_type: str,
+    options: dict,
+    samples_dir: str = "sample-docs",
+) -> httpx.Response:
+    files = []
+    for filename in filenames_gzipped:
+        full_path = Path(samples_dir) / filename
+        files.append(("files", (str(full_path), open(full_path, "rb"), "application/gzip")))
+
+    for filename in filenames_verbatim:
+        full_path = Path(samples_dir) / filename
+        files.append(("files", (str(full_path), open(full_path, "rb"), content_type)))
+
+    response = client.post(
+        MAIN_API_ROUTE,
+        files=files,
+        data=options,
+    )
+    assert response.status_code == 200, response.text
+    assert len(response.text) > 0
+    return response
+
+
+def get_gzipped_response(
+    client: TestClient,
+    filenames_to_gzip: List[str],
+    filenames_verbatim: List[str],
+    options: dict,
+    content_type: str,
+    samples_dir: str = "sample-docs",
+) -> httpx.Response:
+    """
+    G-zips the filenames_to_gzip into temporary .gz file and sends to API,
+    along with filenames_no_gzip.
+    """
+    temp_files = {}
+    for filename in filenames_to_gzip:
+        gz_file_extension = f"{Path(filename).suffix}.gz"
+        temp_file = tempfile.NamedTemporaryFile(suffix=gz_file_extension)
+        full_path = Path(samples_dir) / filename
+        gzip_file(str(full_path), temp_file.name)
+        temp_files[filename] = temp_file
+
+    filenames_gzipped = [temp_file.name for temp_file in temp_files.values()]
+
+    response = call_api(client, filenames_gzipped, filenames_verbatim, content_type, options)
+
+    for filename in filenames_to_gzip:
+        temp_files[filename].close()
+
+    return response
+
+
+def gzip_file(in_filepath: str, out_filepath: str):
+    with open(in_filepath, "rb") as f_in:
+        with gzip.open(out_filepath, "wb", compresslevel=1) as f_out:
+            shutil.copyfileobj(f_in, f_out)