From 52821250391f39bea364f720a4abe49961535ad9 Mon Sep 17 00:00:00 2001 From: Paul Cornell Date: Mon, 25 Nov 2024 16:55:26 -0800 Subject: [PATCH 1/2] Platform: Supported file types --- platform/supported-file-types.mdx | 4 +- .../supported-file-types-platform.mdx | 128 ++++++++++++++++++ 2 files changed, 130 insertions(+), 2 deletions(-) create mode 100644 snippets/general-shared-text/supported-file-types-platform.mdx diff --git a/platform/supported-file-types.mdx b/platform/supported-file-types.mdx index 740aba45..c250657b 100644 --- a/platform/supported-file-types.mdx +++ b/platform/supported-file-types.mdx @@ -2,6 +2,6 @@ title: Supported file types --- -import SupportedFileTypes from '/snippets/general-shared-text/supported-file-types.mdx'; +import SupportedFileTypesPlatform from '/snippets/general-shared-text/supported-file-types-platform.mdx'; - \ No newline at end of file + \ No newline at end of file diff --git a/snippets/general-shared-text/supported-file-types-platform.mdx b/snippets/general-shared-text/supported-file-types-platform.mdx new file mode 100644 index 00000000..a0bd2a89 --- /dev/null +++ b/snippets/general-shared-text/supported-file-types-platform.mdx @@ -0,0 +1,128 @@ +The Unstructured Platform supports processing of the following file types: + +By file extension: + +| File extension | +| --- | +| `.123` | +| `.602` | +| `.abw` | +| `.bmp` | +| `.cgm` | +| `.csv` | +| `.cwk` | +| `.dbf` | +| `.dif` | +| `.doc` | +| `.docm` | +| `.docx` | +| `.dot` | +| `.dotm` | +| `.eml` | +| `.epub` | +| `.et` | +| `.eth` | +| `.fods` | +| `.gif` | +| `.heic` | +| `.htm` | +| `.html` | +| `.hwp` | +| `.jpeg` | +| `.jpg` | +| `.key` | +| `.lwp` | +| `.md` | +| `.msg` | +| `.mcw` | +| `.mw` | +| `.numbers` | +| `.ods` | +| `.odt` | +| `.org` | +| `.p7s` | +| `.pages` | +| `.pbd` | +| `.pdf` | +| `.png` | +| `.pot` | +| `.potm` | +| `.potx` | +| `.ppt` | +| `.pptm` | +| `.pptx` | +| `.prn` | +| `.qpw` | +| `.rst` | +| `.rtf` | +| `.sda` | +| `.sdd` | +| `.sdp` | +| `.sdw` | +| `.sgl` | +| `.slk` | +| `.sti` | +| `.stw` | +| `.svg` | +| `.sxg` | +| `.sxi` | +| `.sxw` | +| `.sylk` | +| `.tiff` | +| `.txt` | +| `.tsv` | +| `.uof` | +| `.uos1` | +| `.uos2` | +| `.vor` | +| `.wb1` | +| `.wb2` | +| `.wb3` | +| `.web` | +| `.webp` | +| `.wk1` | +| `.wk2` | +| `.wk3` | +| `.wk4` | +| `.wks` | +| `.wpd` | +| `.wps` | +| `.wq1` | +| `.wq2` | +| `.xlr` | +| `.xls` | +| `.xlsb` | +| `.xlsm` | +| `.xlsx` | +| `.xlw` | +| `.xml` | +| `.zabw` | + +By file type: + +| Category | File types | +| --- | --- | +| Apple | `.cwk`, `.key`, `.mcw`, `.numbers`, `.pages` +| CSV | `.csv` | +| Data interchange | `.dif` | +| dBase | `.dbf` | +| E-mail | `.eml`, `.msg`, `.p7s` | +| EPUB | `.epub` | +| Excel | `.xls`, `.xlsb`, `.xlsm`, `.xlsx`, `.xlw` | +| HTML | `.htm`, `.html` | +| Image | `.bmp`, `.cgm`, `.gif`, `.heic`, `.jpeg`, `.jpg`, `.png`, `.prn`, `.svg`, `.tiff`, `.wb1`, `.wb2`, `.wb3`, `.webp` | +| Markdown | `.md` | +| Org Mode | `.org` | +| Open Office | `.odt`, `.sda`, `.sgl`, `.sti`, `.sxi`, `.sxw` | +| Other | `.eth`, `.mw`, `.pbd`, `.sdd`, `.slk`, `.sylk`, `.uof`, `.web`, `.xlr` | +| PDF | `.pdf` | +| Plain text | `.txt` | +| PowerPoint | `.pot`, `.potm`, `.potx`, `.ppt`, `.pptm`, `.pptx` | +| reStructured Text | `.rst` | +| Rich Text | `.rtf` | +| Spreadsheet | `.123`, `.et`, `.fods`, `.ods`, `.qpw`, `.uos1`, `.uos2`, `.wk1`, `.wk2`, `.wk3`, `.wk4`, `.wks`, `.wq1`, `.wq2` | +| StarOffice | `.sdw`, `.sxg`, `.vor` | +| TSV | `.tsv` | +| Word | `.abw`, `.doc`, `.docx`, `.docm`, `.dot`, `.dotm`, `.wpd`, `.wps` | +| Word processing | `.602`, `.hwp`, `.lwp`, `.stw`, `.zabw` | +| XML | `.xml` | From 95c2cebd7784573d624c93c55cb54f1f90a04a6c Mon Sep 17 00:00:00 2001 From: Paul Cornell Date: Sun, 1 Dec 2024 13:06:34 -0800 Subject: [PATCH 2/2] Adjustments to Platform file types --- .../supported-file-types-platform.mdx | 52 ++++--------------- 1 file changed, 9 insertions(+), 43 deletions(-) diff --git a/snippets/general-shared-text/supported-file-types-platform.mdx b/snippets/general-shared-text/supported-file-types-platform.mdx index a0bd2a89..7f32dc99 100644 --- a/snippets/general-shared-text/supported-file-types-platform.mdx +++ b/snippets/general-shared-text/supported-file-types-platform.mdx @@ -4,11 +4,8 @@ By file extension: | File extension | | --- | -| `.123` | -| `.602` | | `.abw` | | `.bmp` | -| `.cgm` | | `.csv` | | `.cwk` | | `.dbf` | @@ -30,14 +27,9 @@ By file extension: | `.hwp` | | `.jpeg` | | `.jpg` | -| `.key` | -| `.lwp` | | `.md` | -| `.msg` | | `.mcw` | | `.mw` | -| `.numbers` | -| `.ods` | | `.odt` | | `.org` | | `.p7s` | @@ -47,49 +39,25 @@ By file extension: | `.png` | | `.pot` | | `.potm` | -| `.potx` | | `.ppt` | | `.pptm` | | `.pptx` | | `.prn` | -| `.qpw` | | `.rst` | | `.rtf` | -| `.sda` | -| `.sdd` | | `.sdp` | -| `.sdw` | | `.sgl` | -| `.slk` | -| `.sti` | -| `.stw` | | `.svg` | | `.sxg` | -| `.sxi` | -| `.sxw` | -| `.sylk` | | `.tiff` | | `.txt` | | `.tsv` | | `.uof` | | `.uos1` | | `.uos2` | -| `.vor` | -| `.wb1` | -| `.wb2` | -| `.wb3` | | `.web` | | `.webp` | -| `.wk1` | | `.wk2` | -| `.wk3` | -| `.wk4` | -| `.wks` | -| `.wpd` | -| `.wps` | -| `.wq1` | -| `.wq2` | -| `.xlr` | | `.xls` | | `.xlsb` | | `.xlsm` | @@ -102,27 +70,25 @@ By file type: | Category | File types | | --- | --- | -| Apple | `.cwk`, `.key`, `.mcw`, `.numbers`, `.pages` +| Apple | `.cwk`, `.mcw`, `.pages` | CSV | `.csv` | | Data interchange | `.dif` | | dBase | `.dbf` | -| E-mail | `.eml`, `.msg`, `.p7s` | +| E-mail | `.eml`, `.p7s` | | EPUB | `.epub` | -| Excel | `.xls`, `.xlsb`, `.xlsm`, `.xlsx`, `.xlw` | | HTML | `.htm`, `.html` | -| Image | `.bmp`, `.cgm`, `.gif`, `.heic`, `.jpeg`, `.jpg`, `.png`, `.prn`, `.svg`, `.tiff`, `.wb1`, `.wb2`, `.wb3`, `.webp` | +| Image | `.bmp`, `.gif`, `.heic`, `.jpeg`, `.jpg`, `.png`, `.prn`, `.svg`, `.tiff`, `.webp` | | Markdown | `.md` | | Org Mode | `.org` | -| Open Office | `.odt`, `.sda`, `.sgl`, `.sti`, `.sxi`, `.sxw` | -| Other | `.eth`, `.mw`, `.pbd`, `.sdd`, `.slk`, `.sylk`, `.uof`, `.web`, `.xlr` | +| Open Office | `.odt`, `.sgl` | +| Other | `.eth`, `.mw`, `.pbd`, `.sdp`, `.uof`, `.web` | | PDF | `.pdf` | | Plain text | `.txt` | -| PowerPoint | `.pot`, `.potm`, `.potx`, `.ppt`, `.pptm`, `.pptx` | +| PowerPoint | `.pot`, `.potm`, `.ppt`, `.pptm`, `.pptx` | | reStructured Text | `.rst` | | Rich Text | `.rtf` | -| Spreadsheet | `.123`, `.et`, `.fods`, `.ods`, `.qpw`, `.uos1`, `.uos2`, `.wk1`, `.wk2`, `.wk3`, `.wk4`, `.wks`, `.wq1`, `.wq2` | -| StarOffice | `.sdw`, `.sxg`, `.vor` | +| Spreadsheet | `.et`, `.fods`, `.uos1`, `.uos2`, `.wk2`, `.xls`, `.xlsb`, `.xlsm`, `.xlsx`, `.xlw` | +| StarOffice | `.sxg` | | TSV | `.tsv` | -| Word | `.abw`, `.doc`, `.docx`, `.docm`, `.dot`, `.dotm`, `.wpd`, `.wps` | -| Word processing | `.602`, `.hwp`, `.lwp`, `.stw`, `.zabw` | +| Word processing | `.abw`, `.doc`, `.docm`, `.docx`, `.dot`, `.dotm`, `.hwp`, `.zabw` | | XML | `.xml` |