From 48ca43b9cf9d5196e13710f087fa92abe0d36c89 Mon Sep 17 00:00:00 2001 From: Bojan Date: Thu, 4 Mar 2021 21:05:41 -0400 Subject: [PATCH 1/4] add uspport for obj, dll, and coff object file formats --- README.md | 6 +++++- src/map.rs | 30 ++++++++++++++++++++++++++++++ src/matchers/app.rs | 20 ++++++++++++++++++++ src/matchers/archive.rs | 13 +++++++++++++ 4 files changed, 68 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 3361285..428589b 100644 --- a/README.md +++ b/README.md @@ -170,6 +170,7 @@ assert_eq!(kind.extension(), "foo"); - **rpm** - `application/x-rpm` - **dcm** - `application/dicom` - **zst** - `application/zstd` +- **msi** - `application/x-ole-storage` #### Book @@ -196,6 +197,7 @@ assert_eq!(kind.extension(), "foo"); - **wasm** - `application/wasm` - **exe** - `application/vnd.microsoft.portable-executable` +- **dll** - `application/vnd.microsoft.portable-executable` - **elf** - `application/x-executable` - **bc** - `application/llvm` - **mach** - `application/x-mach-binary` @@ -203,10 +205,12 @@ assert_eq!(kind.extension(), "foo"); - **dex** - `application/vnd.android.dex` - **dey** - `application/vnd.android.dey` - **der** - `application/x-x509-ca-cert` +- **obj** - `application/x-executable` ## Known Issues -- `doc`, `ppt`, `xls` all have the same magic number so it's not possible to tell which one just based on the binary data. `doc` is returned for all. +- `doc`, `ppt`, `xls`, `msi` all have the same magic number so it's not possible to tell which one just based on the binary data. `doc` is returned for all. +- `exe` and `dll` have the same magic number so it's not possible to tell which one just based on the binary data. `exe` is returned for all. ## License diff --git a/src/map.rs b/src/map.rs index 6a95f41..02a908b 100644 --- a/src/map.rs +++ b/src/map.rs @@ -51,6 +51,12 @@ matcher_map!( "exe", matchers::app::is_exe ), + ( + MatcherType::APP, + "application/vnd.microsoft.portable-executable", + "dll", + matchers::app::is_dll + ), ( MatcherType::APP, "application/java", @@ -87,6 +93,24 @@ matcher_map!( "der", matchers::app::is_der ), + ( + MatcherType::APP, + "application/x-executable", + "obj", + matchers::app::is_coff_i386 + ), + ( + MatcherType::APP, + "application/x-executable", + "obj", + matchers::app::is_coff_x64 + ), + ( + MatcherType::APP, + "application/x-executable", + "obj", + matchers::app::is_coff_ia64 + ), // Book ( MatcherType::BOOK, @@ -490,6 +514,12 @@ matcher_map!( "zst", matchers::archive::is_zst ), + ( + MatcherType::ARCHIVE, + "application/x-ole-storage", + "msi", + matchers::archive::is_msi + ), // Text ( MatcherType::TEXT, diff --git a/src/matchers/app.rs b/src/matchers/app.rs index f0e6f50..626e5ab 100644 --- a/src/matchers/app.rs +++ b/src/matchers/app.rs @@ -32,6 +32,11 @@ pub fn is_exe(buf: &[u8]) -> bool { buf.len() > 1 && buf[0] == 0x4D && buf[1] == 0x5A } +/// Returns whether a buffer is a DLL. +pub fn is_dll(buf: &[u8]) -> bool { + is_exe(buf) +} + /// Returns whether a buffer is an ELF. pub fn is_elf(buf: &[u8]) -> bool { buf.len() > 52 && buf[0] == 0x7F && buf[1] == 0x45 && buf[2] == 0x4C && buf[3] == 0x46 @@ -100,3 +105,18 @@ pub fn is_der(buf: &[u8]) -> bool { buf.len() > 2 && buf[0] == 0x30 && buf[1] == 0x82 } + +/// Returns whether a buffer is a Common Object File Format for i386 architecture. +pub fn is_coff_i386(buf: &[u8]) -> bool { + buf.len() > 2 && buf[0] == 0x4C && buf[1] == 0x01 +} + +/// Returns whether a buffer is a Common Object File Format for x64 architecture. +pub fn is_coff_x64(buf: &[u8]) -> bool { + buf.len() > 2 && buf[0] == 0x64 && buf[1] == 0x86 +} + +/// Returns whether a buffer is a Common Object File Format for Itanium architecture. +pub fn is_coff_ia64(buf: &[u8]) -> bool { + buf.len() > 2 && buf[0] == 0x00 && buf[1] == 0x02 +} \ No newline at end of file diff --git a/src/matchers/archive.rs b/src/matchers/archive.rs index fba4e30..f247899 100644 --- a/src/matchers/archive.rs +++ b/src/matchers/archive.rs @@ -192,3 +192,16 @@ pub fn is_dcm(buf: &[u8]) -> bool { pub fn is_zst(buf: &[u8]) -> bool { buf.len() > 3 && buf[0] == 0x28 && buf[1] == 0xB5 && buf[2] == 0x2F && buf[3] == 0xFD } + +/// Returns whether a buffer is a MSI Windows Installer archive. +pub fn is_msi(buf: &[u8]) -> bool { + buf.len() > 7 + && buf[0] == 0xD0 + && buf[1] == 0xCF + && buf[2] == 0x11 + && buf[3] == 0xE0 + && buf[4] == 0xA1 + && buf[5] == 0xB1 + && buf[6] == 0x1A + && buf[7] == 0xE1 +} From 0b91198f3a2cb8d4e636947426b57503aa16a095 Mon Sep 17 00:00:00 2001 From: Bojan Date: Thu, 4 Mar 2021 21:11:31 -0400 Subject: [PATCH 2/4] fix format --- src/matchers/app.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/matchers/app.rs b/src/matchers/app.rs index 626e5ab..065b6fe 100644 --- a/src/matchers/app.rs +++ b/src/matchers/app.rs @@ -119,4 +119,4 @@ pub fn is_coff_x64(buf: &[u8]) -> bool { /// Returns whether a buffer is a Common Object File Format for Itanium architecture. pub fn is_coff_ia64(buf: &[u8]) -> bool { buf.len() > 2 && buf[0] == 0x00 && buf[1] == 0x02 -} \ No newline at end of file +} From c96434f4a17091ff8f11fd499820e6d40bbb7e5b Mon Sep 17 00:00:00 2001 From: Bojan Date: Sun, 7 Mar 2021 19:42:24 -0400 Subject: [PATCH 3/4] add a single is_coff function that composes the individual architecture ones. add docs --- src/map.rs | 14 +------------- src/matchers/app.rs | 9 ++++++++- 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/src/map.rs b/src/map.rs index 02a908b..a425e90 100644 --- a/src/map.rs +++ b/src/map.rs @@ -97,19 +97,7 @@ matcher_map!( MatcherType::APP, "application/x-executable", "obj", - matchers::app::is_coff_i386 - ), - ( - MatcherType::APP, - "application/x-executable", - "obj", - matchers::app::is_coff_x64 - ), - ( - MatcherType::APP, - "application/x-executable", - "obj", - matchers::app::is_coff_ia64 + matchers::app::is_coff ), // Book ( diff --git a/src/matchers/app.rs b/src/matchers/app.rs index 065b6fe..c82a297 100644 --- a/src/matchers/app.rs +++ b/src/matchers/app.rs @@ -21,6 +21,7 @@ pub fn is_wasm(buf: &[u8]) -> bool { } /// Returns whether a buffer is an EXE. +/// DLL and EXE have the same magic number, so returns true also for a DLL. /// /// # Example /// @@ -32,7 +33,8 @@ pub fn is_exe(buf: &[u8]) -> bool { buf.len() > 1 && buf[0] == 0x4D && buf[1] == 0x5A } -/// Returns whether a buffer is a DLL. +/// Returns whether a buffer is a DLL. +/// DLL and EXE have the same magic number, so returns true also for an EXE. pub fn is_dll(buf: &[u8]) -> bool { is_exe(buf) } @@ -120,3 +122,8 @@ pub fn is_coff_x64(buf: &[u8]) -> bool { pub fn is_coff_ia64(buf: &[u8]) -> bool { buf.len() > 2 && buf[0] == 0x00 && buf[1] == 0x02 } + +/// Returns whether a buffer is a Common Object File Format. +pub fn is_coff(buf: &[u8]) -> bool { + is_coff_x64(buf) || is_coff_i386(buf) ||is_coff_ia64(buf) +} From ff0851a313c43c7b1e7627b2a8b286dfceba2c15 Mon Sep 17 00:00:00 2001 From: Bojan Date: Sun, 7 Mar 2021 23:18:37 -0400 Subject: [PATCH 4/4] fix formatting --- src/matchers/app.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/matchers/app.rs b/src/matchers/app.rs index c82a297..de993e3 100644 --- a/src/matchers/app.rs +++ b/src/matchers/app.rs @@ -20,8 +20,7 @@ pub fn is_wasm(buf: &[u8]) -> bool { && buf[7] == 0x00 } -/// Returns whether a buffer is an EXE. -/// DLL and EXE have the same magic number, so returns true also for a DLL. +/// Returns whether a buffer is an EXE. DLL and EXE have the same magic number, so returns true also for a DLL. /// /// # Example /// @@ -33,8 +32,7 @@ pub fn is_exe(buf: &[u8]) -> bool { buf.len() > 1 && buf[0] == 0x4D && buf[1] == 0x5A } -/// Returns whether a buffer is a DLL. -/// DLL and EXE have the same magic number, so returns true also for an EXE. +/// Returns whether a buffer is a DLL. DLL and EXE have the same magic number, so returns true also for an EXE. pub fn is_dll(buf: &[u8]) -> bool { is_exe(buf) } @@ -125,5 +123,5 @@ pub fn is_coff_ia64(buf: &[u8]) -> bool { /// Returns whether a buffer is a Common Object File Format. pub fn is_coff(buf: &[u8]) -> bool { - is_coff_x64(buf) || is_coff_i386(buf) ||is_coff_ia64(buf) + is_coff_x64(buf) || is_coff_i386(buf) || is_coff_ia64(buf) }