From e3ce7151d75aaf38fad0e0f117e62456e01d1797 Mon Sep 17 00:00:00 2001 From: Ben Rodes Date: Wed, 23 Nov 2022 12:01:40 -0500 Subject: [PATCH 01/14] Create sync_remote_main.yml --- .github/workflows/sync_remote_main.yml | 28 ++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 .github/workflows/sync_remote_main.yml diff --git a/.github/workflows/sync_remote_main.yml b/.github/workflows/sync_remote_main.yml new file mode 100644 index 000000000000..a147629bfb53 --- /dev/null +++ b/.github/workflows/sync_remote_main.yml @@ -0,0 +1,28 @@ +name: Sync upstream codeql/github main +on: + workflow_dispatch: + schedule: + # actually, ~5 minutes is the highest + # effective frequency you will get + - cron: '* * * * *' +jobs: + merge: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + with: + token: ${{ secrets.WORKFLOW_TOKEN }} + - name: Merge upstream + run: | + git config --global user.name 'your-name' + git config --global user.email 'your-username@users.noreply.github.com' + # "git checkout main" is unnecessary, already here by default + git pull --unshallow # this option is very important, you would get + # complains about unrelated histories without it. + # (but actions/checkout@v2 can also be instructed + # to fetch all git depth right from the start) + git remote add upstream https://github.com/github/codeql.git + git fetch upstream + git checkout main + git merge --no-edit upstream/main + git push origin main From 40abfe313e18c35d0a0939963a6103621906a40f Mon Sep 17 00:00:00 2001 From: "REDMOND\\brodes" Date: Wed, 7 Dec 2022 13:31:39 -0500 Subject: [PATCH 02/14] Adding additional hashing algorithms, and block mode predicates. Also adding 'isKnown' support to get the universe of possible options. Finally added an unknown algorithm stub, so if no match is found, we can consistently have a string representing unknown. --- .../internal/CryptoAlgorithmNames.qll | 69 ++++++++++++++++++- 1 file changed, 67 insertions(+), 2 deletions(-) diff --git a/python/ql/lib/semmle/python/concepts/internal/CryptoAlgorithmNames.qll b/python/ql/lib/semmle/python/concepts/internal/CryptoAlgorithmNames.qll index a234ba2cc1fc..7fd4b974aad2 100644 --- a/python/ql/lib/semmle/python/concepts/internal/CryptoAlgorithmNames.qll +++ b/python/ql/lib/semmle/python/concepts/internal/CryptoAlgorithmNames.qll @@ -8,6 +8,24 @@ * The classification into strong and weak are based on Wikipedia, OWASP and Google (2021). */ + + /** + * Returns a string to represent generally unknown algorithms. + * To be used in place of hardcoded strings representing 'unknown' to maintain consistency. + */ + string unknownAlgorithmStub() + { + result = "" + } + + /** + * Holds if `name` is a known hashing algorithm in the model/library. + */ + predicate isKnownHashingAlgorithm(string name) + { + isStrongHashingAlgorithm(name) or isWeakHashingAlgorithm(name) + } + /** * Holds if `name` corresponds to a strong hashing algorithm. */ @@ -15,7 +33,8 @@ predicate isStrongHashingAlgorithm(string name) { name = [ "DSA", "ED25519", "ES256", "ECDSA256", "ES384", "ECDSA384", "ES512", "ECDSA512", "SHA2", - "SHA224", "SHA256", "SHA384", "SHA512", "SHA3", "SHA3224", "SHA3256", "SHA3384", "SHA3512" + "SHA224", "SHA256", "SHA384", "SHA512", "SHA3", "SHA3224", "SHA3256", "SHA3384", "SHA3512", + "SHAKE128", "SHAKE256" ] } @@ -30,6 +49,15 @@ predicate isWeakHashingAlgorithm(string name) { ] } + /** + * Holds if `name` is a known encryption algorithm in the model/library. + */ + predicate isKnownEncryptionAlgorithm(string name) + { + isStrongEncryptionAlgorithm(name) or isWeakEncryptionAlgorithm(name) + } + + /** * Holds if `name` corresponds to a strong encryption algorithm. */ @@ -54,6 +82,15 @@ predicate isWeakEncryptionAlgorithm(string name) { ] } + /** + * Holds if `name` is a known password hashing algorithm in the model/library. + */ + predicate isKnownPasswordHashingAlgorithm(string name) + { + isStrongPasswordHashingAlgorithm(name) or isWeakPasswordHashingAlgorithm(name) + } + + /** * Holds if `name` corresponds to a strong password hashing algorithm. */ @@ -64,7 +101,35 @@ predicate isStrongPasswordHashingAlgorithm(string name) { /** * Holds if `name` corresponds to a weak password hashing algorithm. */ -predicate isWeakPasswordHashingAlgorithm(string name) { name = "EVPKDF" } +predicate isWeakPasswordHashingAlgorithm(string name) { + name = "EVPKDF" +} + + /** + * Holds if `name` is a known cipher block mode algorithm in the model/library. + */ + predicate isKnownCipherBlockModeAlgorithm(string name) + { + isStrongCipherBlockModeAlgorithm(name) or isWeakCipherBlockModeAlgorithm(name) + } + + +/** + * Holds if `name` corresponds to a strong cipher block mode + */ +predicate isStrongCipherBlockModeAlgorithm(string name) +{ + name = ["CBC", "GCM", "CCM", "CFB", "OFB", "CFB8", "CTR", "OPENPGP", "XTS"] +} + +/** + * Holds if `name` corresponds to a weak cipher block mode + */ +predicate isWeakCipherBlockModeAlgorithm(string name) +{ + name = ["ECB"] +} + /** * Holds if `name` corresponds to a stream cipher. From 2c477c7cb7ad7eabc0134625550f8093f7401e56 Mon Sep 17 00:00:00 2001 From: "REDMOND\\brodes" Date: Wed, 7 Dec 2022 13:33:04 -0500 Subject: [PATCH 03/14] Changing BlockMode to use the isKnownCipherBlockModeAlgorithm predicate, and to have a fail safe 'unknown' option. If a block mode is unknown, the isWeak predicate will not stipulate the algorithm is weak by default. An isKnown predicate is added to determine if the algorithm is known. --- .../semmle/python/internal/ConceptsShared.qll | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/python/ql/lib/semmle/python/internal/ConceptsShared.qll b/python/ql/lib/semmle/python/internal/ConceptsShared.qll index 2f6c8bb8b29b..f921cdecbd8b 100644 --- a/python/ql/lib/semmle/python/internal/ConceptsShared.qll +++ b/python/ql/lib/semmle/python/internal/ConceptsShared.qll @@ -10,8 +10,10 @@ * `ConceptsShared.qll` ASAP. */ +import semmle.python.concepts.internal.CryptoAlgorithmNames private import ConceptsImports + /** * Provides models for cryptographic concepts. * @@ -81,10 +83,18 @@ module Cryptography { * data of arbitrary length using a block encryption algorithm. */ class BlockMode extends string { - BlockMode() { this = ["ECB", "CBC", "GCM", "CCM", "CFB", "OFB", "CTR", "OPENPGP"] } + BlockMode() { isKnownCipherBlockModeAlgorithm(this) or this = unknownAlgorithmStub()} + + /** + * Holds if this block mode is considered to be insecure. + * Assumed weak if the mode is not known. + */ + predicate isWeak() {isWeakCipherBlockModeAlgorithm(this) or not isKnownCipherBlockModeAlgorithm(this)} - /** Holds if this block mode is considered to be insecure. */ - predicate isWeak() { this = "ECB" } + /** + * Holds if this block mode is a known block mode + */ + predicate isKnown() {isKnownCipherBlockModeAlgorithm(this)} } } From a50b01fc2b9d1e6fcaa6e7ed062ec7cd7124ecc5 Mon Sep 17 00:00:00 2001 From: "REDMOND\\brodes" Date: Wed, 7 Dec 2022 13:34:53 -0500 Subject: [PATCH 04/14] Updating cryptodrome and crytopgraphy framework libraries to allow fo finding block modes that aren't known. This avoids cases where the library is updated and we don't recognize new modes as being weak or strong, instead they would've been ignored entirely. --- .../semmle/python/frameworks/Cryptodome.qll | 20 +++++++++++++--- .../semmle/python/frameworks/Cryptography.qll | 24 ++++++++++++++----- 2 files changed, 35 insertions(+), 9 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Cryptodome.qll b/python/ql/lib/semmle/python/frameworks/Cryptodome.qll index 0c3e8968c23b..f0fa34c5ccdc 100644 --- a/python/ql/lib/semmle/python/frameworks/Cryptodome.qll +++ b/python/ql/lib/semmle/python/frameworks/Cryptodome.qll @@ -9,6 +9,7 @@ private import python private import semmle.python.dataflow.new.DataFlow private import semmle.python.Concepts private import semmle.python.ApiGraphs +import semmle.python.concepts.internal.CryptoAlgorithmNames /** * Provides models for @@ -158,15 +159,28 @@ private module CryptodomeModel { override Cryptography::BlockMode getBlockMode() { // `modeName` is of the form "MODE_" - exists(string modeName | + exists(string foundMode | newCall.getArg(1) = API::moduleImport(["Crypto", "Cryptodome"]) .getMember("Cipher") .getMember(cipherName) - .getMember(modeName) + .getMember(foundMode) .getAValueReachableFromSource() | - result = modeName.splitAt("_", 1) + if isKnownCipherBlockModeAlgorithm(foundMode) + then result = foundMode.splitAt("_", 1) + else result = unknownAlgorithmStub() + ) + or + ( + not exists(string modeName | + newCall.getArg(1) = + API::moduleImport(["Crypto", "Cryptodome"]) + .getMember("Cipher") + .getMember(cipherName) + .getMember(modeName) + .getAValueReachableFromSource()) + and result = unknownAlgorithmStub() ) } } diff --git a/python/ql/lib/semmle/python/frameworks/Cryptography.qll b/python/ql/lib/semmle/python/frameworks/Cryptography.qll index d3e03083c09d..9c74b8ba9719 100644 --- a/python/ql/lib/semmle/python/frameworks/Cryptography.qll +++ b/python/ql/lib/semmle/python/frameworks/Cryptography.qll @@ -7,6 +7,7 @@ private import python private import semmle.python.dataflow.new.DataFlow private import semmle.python.Concepts private import semmle.python.ApiGraphs +import semmle.python.concepts.internal.CryptoAlgorithmNames /** * Provides models for the `cryptography` PyPI package. @@ -180,7 +181,7 @@ private module CryptographyModel { } /** Gets a reference to a Cipher instance using algorithm with `algorithmName`. */ - API::Node cipherInstance(string algorithmName, string modeName) { + API::Node symmetricCipherInstance(string algorithmName, string modeName) { exists(API::CallNode call | result = call.getReturn() | call = API::moduleImport("cryptography") @@ -192,11 +193,22 @@ private module CryptographyModel { algorithmClassRef(algorithmName).getReturn().getAValueReachableFromSource() in [ call.getArg(0), call.getArgByName("algorithm") ] and - exists(DataFlow::Node modeArg | modeArg in [call.getArg(1), call.getArgByName("mode")] | - if modeArg = modeClassRef(_).getReturn().getAValueReachableFromSource() - then modeArg = modeClassRef(modeName).getReturn().getAValueReachableFromSource() - else modeName = "" + exists(DataFlow::Node modeArg, string foundMode | modeArg in [call.getArg(1), call.getArgByName("mode")] | + ( + // Find the used mode name + if modeArg = modeClassRef(_).getReturn().getAValueReachableFromSource() + then modeArg = modeClassRef(foundMode).getReturn().getAValueReachableFromSource() + else foundMode = unknownAlgorithmStub() + ) + and + ( + // check the found mode name, setting modeName to the unknown stub if not a known algorithm + if isKnownCipherBlockModeAlgorithm(foundMode) + then modeName = foundMode + else modeName = unknownAlgorithmStub() + ) ) + ) } @@ -210,7 +222,7 @@ private module CryptographyModel { CryptographyGenericCipherOperation() { this = - cipherInstance(algorithmName, modeName) + symmetricCipherInstance(algorithmName, modeName) .getMember(["decryptor", "encryptor"]) .getReturn() .getMember(["update", "update_into"]) From 9bf37b3aa377bfdc9c5b4a98b9bcb83af867045a Mon Sep 17 00:00:00 2001 From: "REDMOND\\brodes" Date: Mon, 12 Dec 2022 10:31:28 -0500 Subject: [PATCH 05/14] Interim partially working refactor of crypto libraries for python. --- .../python/concepts/CryptoAlgorithms.qll | 28 +++++++- .../internal/CryptoAlgorithmNames.qll | 9 +-- .../semmle/python/frameworks/Cryptodome.qll | 61 ++++++++-------- .../semmle/python/frameworks/Cryptography.qll | 33 ++++----- .../ql/lib/semmle/python/frameworks/Rsa.qll | 57 ++++++++++----- .../lib/semmle/python/frameworks/Stdlib.qll | 21 ++++-- .../semmle/python/internal/ConceptsShared.qll | 69 +++++++++++++------ 7 files changed, 178 insertions(+), 100 deletions(-) diff --git a/python/ql/lib/semmle/python/concepts/CryptoAlgorithms.qll b/python/ql/lib/semmle/python/concepts/CryptoAlgorithms.qll index 22a2d1c1eb29..431eb234c568 100644 --- a/python/ql/lib/semmle/python/concepts/CryptoAlgorithms.qll +++ b/python/ql/lib/semmle/python/concepts/CryptoAlgorithms.qll @@ -9,7 +9,7 @@ private import internal.CryptoAlgorithmNames /** * A cryptographic algorithm. */ -private newtype TCryptographicAlgorithm = +private newtype TCryptographicAlgorithm = MkHashingAlgorithm(string name, boolean isWeak) { isStrongHashingAlgorithm(name) and isWeak = false or @@ -25,6 +25,21 @@ private newtype TCryptographicAlgorithm = or isWeakPasswordHashingAlgorithm(name) and isWeak = true } + or + MkUnknown() + +//add an unknown algorithm extends mkUnknown, isUnknown returns any and weak yes +class UnknownAlgorithm extends MkUnknown, CryptographicAlgorithm { + override predicate isUnknown() { any() } + + override string getName() { result = unknownAlgorithm() } + + override predicate isWeak() { any() } + + bindingset[name] + override predicate matchesName(string name) { none()} + +} /** * A cryptographic algorithm. @@ -53,6 +68,17 @@ abstract class CryptographicAlgorithm extends TCryptographicAlgorithm { * Holds if this algorithm is weak. */ abstract predicate isWeak(); + + // /** Gets the raw algorithm used, i.e., the algorithm extracted directly from the source*/ + // abstract string getAlgorithmRaw(); + + // /** Gets the raw block mode used, i.e., the block mode extracted directly from the source*/ + // abstract string getBlockModeRaw(); + + /** + * Holds if this algorithm is not known. + */ + predicate isUnknown() { this instanceof UnknownAlgorithm } } /** diff --git a/python/ql/lib/semmle/python/concepts/internal/CryptoAlgorithmNames.qll b/python/ql/lib/semmle/python/concepts/internal/CryptoAlgorithmNames.qll index 7fd4b974aad2..055718aada87 100644 --- a/python/ql/lib/semmle/python/concepts/internal/CryptoAlgorithmNames.qll +++ b/python/ql/lib/semmle/python/concepts/internal/CryptoAlgorithmNames.qll @@ -13,11 +13,12 @@ * Returns a string to represent generally unknown algorithms. * To be used in place of hardcoded strings representing 'unknown' to maintain consistency. */ - string unknownAlgorithmStub() + string unknownAlgorithm() { - result = "" + result = "UNKNOWN" } + /** * Holds if `name` is a known hashing algorithm in the model/library. */ @@ -32,7 +33,7 @@ predicate isStrongHashingAlgorithm(string name) { name = [ - "DSA", "ED25519", "ES256", "ECDSA256", "ES384", "ECDSA384", "ES512", "ECDSA512", "SHA2", + "BLAKE2", "BLAKE2b", "BLAKE2s", "DSA", "ED25519", "ES256", "ECDSA256", "ES384", "ECDSA384", "ES512", "ECDSA512", "SHA2", "SHA224", "SHA256", "SHA384", "SHA512", "SHA3", "SHA3224", "SHA3256", "SHA3384", "SHA3512", "SHAKE128", "SHAKE256" ] @@ -67,7 +68,7 @@ predicate isStrongEncryptionAlgorithm(string name) { "AES", "AES128", "AES192", "AES256", "AES512", "AES-128", "AES-192", "AES-256", "AES-512", "ARIA", "BLOWFISH", "BF", "ECIES", "CAST", "CAST5", "CAMELLIA", "CAMELLIA128", "CAMELLIA192", "CAMELLIA256", "CAMELLIA-128", "CAMELLIA-192", "CAMELLIA-256", "CHACHA", "GOST", "GOST89", - "IDEA", "RABBIT", "RSA", "SEED", "SM4" + "IDEA", "RABBIT", "RSA", "SEED", "SM3", "SM4" ] } diff --git a/python/ql/lib/semmle/python/frameworks/Cryptodome.qll b/python/ql/lib/semmle/python/frameworks/Cryptodome.qll index f0fa34c5ccdc..8e42103192c6 100644 --- a/python/ql/lib/semmle/python/frameworks/Cryptodome.qll +++ b/python/ql/lib/semmle/python/frameworks/Cryptodome.qll @@ -125,7 +125,10 @@ private module CryptodomeModel { this = newCall.getReturn().getMember(methodName).getACall() } - override Cryptography::CryptographicAlgorithm getAlgorithm() { result.matchesName(cipherName) } + override string getAlgorithmRaw() { + //result.matchesName(cipherName) + result = cipherName + } override DataFlow::Node getAnInput() { methodName = "encrypt" and @@ -157,31 +160,23 @@ private module CryptodomeModel { ] } - override Cryptography::BlockMode getBlockMode() { - // `modeName` is of the form "MODE_" - exists(string foundMode | - newCall.getArg(1) = - API::moduleImport(["Crypto", "Cryptodome"]) - .getMember("Cipher") - .getMember(cipherName) - .getMember(foundMode) - .getAValueReachableFromSource() - | - if isKnownCipherBlockModeAlgorithm(foundMode) - then result = foundMode.splitAt("_", 1) - else result = unknownAlgorithmStub() - ) - or - ( - not exists(string modeName | - newCall.getArg(1) = - API::moduleImport(["Crypto", "Cryptodome"]) - .getMember("Cipher") - .getMember(cipherName) - .getMember(modeName) - .getAValueReachableFromSource()) - and result = unknownAlgorithmStub() - ) + private predicate resolveModeName(string modeName) + { + newCall.getArg(1) = + API::moduleImport(["Crypto", "Cryptodome"]) + .getMember("Cipher") + .getMember(cipherName) + .getMember(modeName) + .getAValueReachableFromSource() + } + + override Cryptography::BlockMode getBlockModeRaw() { + // `modeName` is of the form "MODE_" + exists(string modeName | + if resolveModeName(modeName) + then result = modeName.splitAt("_", 1) + else modeName = unknownAlgorithm() and + result = unknownAlgorithm()) } } @@ -205,8 +200,9 @@ private module CryptodomeModel { .getACall() } - override Cryptography::CryptographicAlgorithm getAlgorithm() { - result.matchesName(signatureName) + override string getAlgorithmRaw() { + //result.matchesName(signatureName) + result = signatureName } override DataFlow::Node getAnInput() { @@ -221,7 +217,7 @@ private module CryptodomeModel { ) } - override Cryptography::BlockMode getBlockMode() { none() } + override string getBlockModeRaw() { none() } } /** @@ -242,10 +238,13 @@ private module CryptodomeModel { ) } - override Cryptography::CryptographicAlgorithm getAlgorithm() { result.matchesName(hashName) } + override string getAlgorithmRaw() { + //result.matchesName(hashName) + result = hashName + } override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } - override Cryptography::BlockMode getBlockMode() { none() } + override string getBlockModeRaw() { none() } } } diff --git a/python/ql/lib/semmle/python/frameworks/Cryptography.qll b/python/ql/lib/semmle/python/frameworks/Cryptography.qll index 9c74b8ba9719..d9fefdc1dd24 100644 --- a/python/ql/lib/semmle/python/frameworks/Cryptography.qll +++ b/python/ql/lib/semmle/python/frameworks/Cryptography.qll @@ -193,20 +193,11 @@ private module CryptographyModel { algorithmClassRef(algorithmName).getReturn().getAValueReachableFromSource() in [ call.getArg(0), call.getArgByName("algorithm") ] and - exists(DataFlow::Node modeArg, string foundMode | modeArg in [call.getArg(1), call.getArgByName("mode")] | - ( - // Find the used mode name - if modeArg = modeClassRef(_).getReturn().getAValueReachableFromSource() - then modeArg = modeClassRef(foundMode).getReturn().getAValueReachableFromSource() - else foundMode = unknownAlgorithmStub() - ) - and - ( - // check the found mode name, setting modeName to the unknown stub if not a known algorithm - if isKnownCipherBlockModeAlgorithm(foundMode) - then modeName = foundMode - else modeName = unknownAlgorithmStub() - ) + exists(DataFlow::Node modeArg | modeArg in [call.getArg(1), call.getArgByName("mode")] | + // Find the used mode name + if modeArg = modeClassRef(_).getReturn().getAValueReachableFromSource() + then modeArg = modeClassRef(modeName).getReturn().getAValueReachableFromSource() + else modeName = unknownAlgorithm() ) ) @@ -229,13 +220,14 @@ private module CryptographyModel { .getACall() } - override Cryptography::CryptographicAlgorithm getAlgorithm() { - result.matchesName(algorithmName) + override string getAlgorithmRaw() { + //result.matchesName(algorithmName) + result = algorithmName } override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } - override Cryptography::BlockMode getBlockMode() { result = modeName } + override string getBlockModeRaw() { result = modeName } } } @@ -281,13 +273,14 @@ private module CryptographyModel { this = hashInstance(algorithmName).getMember("update").getACall() } - override Cryptography::CryptographicAlgorithm getAlgorithm() { - result.matchesName(algorithmName) + override string getAlgorithmRaw() { + //result.matchesName(algorithmName) + result = algorithmName } override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } - override Cryptography::BlockMode getBlockMode() { none() } + override string getBlockModeRaw() { none() } } } } diff --git a/python/ql/lib/semmle/python/frameworks/Rsa.qll b/python/ql/lib/semmle/python/frameworks/Rsa.qll index e82581d46b65..163b39d81bdb 100644 --- a/python/ql/lib/semmle/python/frameworks/Rsa.qll +++ b/python/ql/lib/semmle/python/frameworks/Rsa.qll @@ -36,13 +36,16 @@ private module Rsa { class RsaEncryptCall extends Cryptography::CryptographicOperation::Range, DataFlow::CallCfgNode { RsaEncryptCall() { this = API::moduleImport("rsa").getMember("encrypt").getACall() } - override Cryptography::CryptographicAlgorithm getAlgorithm() { result.getName() = "RSA" } + override string getAlgorithmRaw() { + result = "RSA" + //result.getName() = "RSA" + } override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("message")] } - override Cryptography::BlockMode getBlockMode() { none() } + override string getBlockModeRaw() { none() } } /** @@ -53,11 +56,14 @@ private module Rsa { class RsaDecryptCall extends Cryptography::CryptographicOperation::Range, DataFlow::CallCfgNode { RsaDecryptCall() { this = API::moduleImport("rsa").getMember("decrypt").getACall() } - override Cryptography::CryptographicAlgorithm getAlgorithm() { result.getName() = "RSA" } + override string getAlgorithmRaw() { + result = "RSA" + //result.getName() = "RSA" + } override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("crypto")] } - override Cryptography::BlockMode getBlockMode() { none() } + override string getBlockModeRaw() { none() } } /** @@ -68,23 +74,31 @@ private module Rsa { class RsaSignCall extends Cryptography::CryptographicOperation::Range, DataFlow::CallCfgNode { RsaSignCall() { this = API::moduleImport("rsa").getMember("sign").getACall() } - override Cryptography::CryptographicAlgorithm getAlgorithm() { + override string getAlgorithmRaw() { // signature part - result.getName() = "RSA" + result = "RSA" or // hashing part exists(StrConst str, DataFlow::Node hashNameArg | hashNameArg in [this.getArg(2), this.getArgByName("hash_method")] and DataFlow::exprNode(str) = hashNameArg.getALocalSource() and - result.matchesName(str.getText()) - ) + result = str.getText()) + // // signature part + // result.getName() = "RSA" + // or + // // hashing part + // exists(StrConst str, DataFlow::Node hashNameArg | + // hashNameArg in [this.getArg(2), this.getArgByName("hash_method")] and + // DataFlow::exprNode(str) = hashNameArg.getALocalSource() and + // result.matchesName(str.getText()) + // ) } override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("message")] } - override Cryptography::BlockMode getBlockMode() { none() } + override Cryptography::BlockMode getBlockModeRaw() { none() } } /** @@ -95,10 +109,11 @@ private module Rsa { class RsaVerifyCall extends Cryptography::CryptographicOperation::Range, DataFlow::CallCfgNode { RsaVerifyCall() { this = API::moduleImport("rsa").getMember("verify").getACall() } - override Cryptography::CryptographicAlgorithm getAlgorithm() { + override string getAlgorithmRaw() { // note that technically there is also a hashing operation going on but we don't // know what algorithm is used up front, since it is encoded in the signature - result.getName() = "RSA" + result = "RSA" + //result.getName() = "RSA" } override DataFlow::Node getAnInput() { @@ -107,7 +122,7 @@ private module Rsa { result in [this.getArg(1), this.getArgByName("signature")] } - override Cryptography::BlockMode getBlockMode() { none() } + override string getBlockModeRaw() { none() } } /** @@ -119,19 +134,23 @@ private module Rsa { DataFlow::CallCfgNode { RsaComputeHashCall() { this = API::moduleImport("rsa").getMember("compute_hash").getACall() } - override Cryptography::CryptographicAlgorithm getAlgorithm() { + override string getAlgorithmRaw() { exists(StrConst str, DataFlow::Node hashNameArg | hashNameArg in [this.getArg(1), this.getArgByName("method_name")] and DataFlow::exprNode(str) = hashNameArg.getALocalSource() and - result.matchesName(str.getText()) - ) + result = str.getText()) + // exists(StrConst str, DataFlow::Node hashNameArg | + // hashNameArg in [this.getArg(1), this.getArgByName("method_name")] and + // DataFlow::exprNode(str) = hashNameArg.getALocalSource() and + // result.matchesName(str.getText()) + // ) } override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("message")] } - override Cryptography::BlockMode getBlockMode() { none() } + override string getBlockModeRaw() { none() } } /** @@ -142,12 +161,14 @@ private module Rsa { class RsaSignHashCall extends Cryptography::CryptographicOperation::Range, DataFlow::CallCfgNode { RsaSignHashCall() { this = API::moduleImport("rsa").getMember("sign_hash").getACall() } - override Cryptography::CryptographicAlgorithm getAlgorithm() { result.getName() = "RSA" } + override string getAlgorithmRaw() { + result = "RSA" + } override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("hash_value")] } - override Cryptography::BlockMode getBlockMode() { none() } + override string getBlockModeRaw() { none() } } } diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index f5d6dd8df1cf..7ec627987894 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -2669,11 +2669,14 @@ private module StdlibPrivate { exists(this.getParameter(1, "data")) } - override Cryptography::CryptographicAlgorithm getAlgorithm() { result.matchesName(hashName) } + override string getAlgorithmRaw() { + //result.matchesName(hashName) + result = hashName + } override DataFlow::Node getAnInput() { result = this.getParameter(1, "data").asSink() } - override Cryptography::BlockMode getBlockMode() { none() } + override string getBlockModeRaw() { none() } } /** @@ -2686,11 +2689,14 @@ private module StdlibPrivate { this = hashlibNewCall(hashName).getReturn().getMember("update").getACall() } - override Cryptography::CryptographicAlgorithm getAlgorithm() { result.matchesName(hashName) } + override string getAlgorithmRaw() { + //result.matchesName(hashName) + result = hashName + } override DataFlow::Node getAnInput() { result = this.getArg(0) } - override Cryptography::BlockMode getBlockMode() { none() } + override string getBlockModeRaw() { none() } } /** Helper predicate for the `HashLibGenericHashOperation` charpred, to prevent a bad join order. */ @@ -2713,9 +2719,12 @@ private module StdlibPrivate { bindingset[this] HashlibGenericHashOperation() { hashClass = hashlibMember(hashName) } - override Cryptography::CryptographicAlgorithm getAlgorithm() { result.matchesName(hashName) } + override string getAlgorithmRaw() { + //result.matchesName(hashName) + result = hashName + } - override Cryptography::BlockMode getBlockMode() { none() } + override string getBlockModeRaw() { none() } } /** diff --git a/python/ql/lib/semmle/python/internal/ConceptsShared.qll b/python/ql/lib/semmle/python/internal/ConceptsShared.qll index f921cdecbd8b..bfb57f7646f2 100644 --- a/python/ql/lib/semmle/python/internal/ConceptsShared.qll +++ b/python/ql/lib/semmle/python/internal/ConceptsShared.qll @@ -11,6 +11,7 @@ */ import semmle.python.concepts.internal.CryptoAlgorithmNames +import semmle.python.concepts.CryptoAlgorithms private import ConceptsImports @@ -23,13 +24,13 @@ private import ConceptsImports * to improve our libraries in the future to more precisely capture this aspect. */ module Cryptography { - class CryptographicAlgorithm = CryptoAlgorithms::CryptographicAlgorithm; + // class CryptographicAlgorithm = CryptoAlgorithms::CryptographicAlgorithm; - class EncryptionAlgorithm = CryptoAlgorithms::EncryptionAlgorithm; + // class EncryptionAlgorithm = CryptoAlgorithms::EncryptionAlgorithm; - class HashingAlgorithm = CryptoAlgorithms::HashingAlgorithm; + // class HashingAlgorithm = CryptoAlgorithms::HashingAlgorithm; - class PasswordHashingAlgorithm = CryptoAlgorithms::PasswordHashingAlgorithm; + // class PasswordHashingAlgorithm = CryptoAlgorithms::PasswordHashingAlgorithm; /** * A data-flow node that is an application of a cryptographic algorithm. For example, @@ -40,17 +41,30 @@ module Cryptography { */ class CryptographicOperation extends DataFlow::Node instanceof CryptographicOperation::Range { /** Gets the algorithm used, if it matches a known `CryptographicAlgorithm`. */ - CryptographicAlgorithm getAlgorithm() { result = super.getAlgorithm() } + CryptographicAlgorithm getAlgorithm() { + result.matchesName(super.getAlgorithmRaw()) or + not exists(CryptographicAlgorithm algo | algo.matchesName(super.getAlgorithmRaw())) and result instanceof UnknownAlgorithm } + /** Gets an input the algorithm is used on, for example the plain text input to be encrypted. */ DataFlow::Node getAnInput() { result = super.getAnInput() } + /** * Gets the block mode used to perform this cryptographic operation. * This may have no result - for example if the `CryptographicAlgorithm` used * is a stream cipher rather than a block cipher. */ - BlockMode getBlockMode() { result = super.getBlockMode() } + // TODO: modify to use raw to get unknown, return blockmode unknown (set a blockmode unknown) + // make a module blockmode + // make getblock mode final, and change all extending to use raw + final BlockMode getBlockMode() { + if isKnownCipherBlockModeAlgorithm(super.getBlockModeRaw()) then + result = super.getBlockModeRaw() + else + result = BlockMode::unknown() + } + } /** Provides classes for modeling new applications of a cryptographic algorithms. */ @@ -63,38 +77,53 @@ module Cryptography { * extend `CryptographicOperation` instead. */ abstract class Range extends DataFlow::Node { - /** Gets the algorithm used, if it matches a known `CryptographicAlgorithm`. */ - abstract CryptographicAlgorithm getAlgorithm(); + /** Gets the raw algorithm used, i.e., the algorithm extracted directly from the source*/ + abstract string getAlgorithmRaw(); + /** Gets an input the algorithm is used on, for example the plain text input to be encrypted. */ abstract DataFlow::Node getAnInput(); - /** - * Gets the block mode used to perform this cryptographic operation. - * This may have no result - for example if the `CryptographicAlgorithm` used - * is a stream cipher rather than a block cipher. - */ - abstract BlockMode getBlockMode(); + /** Gets the raw block mode used, i.e., the block mode extracted directly from the source*/ + abstract string getBlockModeRaw(); } } + /** * A cryptographic block cipher mode of operation. This can be used to encrypt * data of arbitrary length using a block encryption algorithm. */ class BlockMode extends string { - BlockMode() { isKnownCipherBlockModeAlgorithm(this) or this = unknownAlgorithmStub()} + BlockMode() { + isKnownCipherBlockModeAlgorithm(this) or + (not isKnownCipherBlockModeAlgorithm(this) and this = BlockMode::unknown()) + } /** - * Holds if this block mode is considered to be insecure. - * Assumed weak if the mode is not known. + * Holds if this block mode is known and considered to be insecure. + * NOTE: if the algorithm is not known, no assessment is made on if it is secure. + * Users should use the `isUnknown` predicate specifically to make their + * own assessment in these conditions. */ - predicate isWeak() {isWeakCipherBlockModeAlgorithm(this) or not isKnownCipherBlockModeAlgorithm(this)} + predicate isWeak() { + isWeakCipherBlockModeAlgorithm(this) + } /** - * Holds if this block mode is a known block mode + * Holds if this block mode is not a known block mode */ - predicate isKnown() {isKnownCipherBlockModeAlgorithm(this)} + predicate isUnknown() { + not isKnownCipherBlockModeAlgorithm(this) + } + } + + module BlockMode + { + BlockMode unknown() + { + result = unknownAlgorithm() + } } } From 36fb8f044728e6582e82f5bcfec8114436154496 Mon Sep 17 00:00:00 2001 From: "REDMOND\\brodes" Date: Mon, 12 Dec 2022 14:21:38 -0500 Subject: [PATCH 06/14] Updates for key derivation in stdlib --- .../lib/semmle/python/frameworks/Stdlib.qll | 30 ++++++++++++++++++- .../semmle/python/internal/ConceptsShared.qll | 4 +-- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 7ec627987894..25da62543905 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -2651,6 +2651,33 @@ private module StdlibPrivate { // --------------------------------------------------------------------------- // hashlib // --------------------------------------------------------------------------- + + + private API::CallNode hashlibpbkdf2HMACCall(string algorithmName) + { + algorithmName = + result.getParameter(0, "hash_name").getAValueReachingSink().asExpr().(StrConst).getText() and + result = API::moduleImport("hashlib").getMember("pbkdf2_hmac").getACall() + } + + class Hashlibpbkdf2HMAC extends Cryptography::CryptographicOperation::Range, API::CallNode { + string hashName; + + Hashlibpbkdf2HMAC() { + this = hashlibpbkdf2HMACCall(hashName) and + exists(this.getParameter(1, "data")) + } + + override string getAlgorithmRaw() { + //result.matchesName(hashName) + result = hashName + } + + override DataFlow::Node getAnInput() { result = this.getParameter(1, "password").asSink() } + + override string getBlockModeRaw() { none() } + } + /** Gets a call to `hashlib.new` with `algorithmName` as the first argument. */ private API::CallNode hashlibNewCall(string algorithmName) { algorithmName = @@ -2703,13 +2730,14 @@ private module StdlibPrivate { pragma[nomagic] private API::Node hashlibMember(string hashName) { result = API::moduleImport("hashlib").getMember(hashName) and - hashName != "new" + hashName != "new" and hashName != "pbkdf2_hmac" } /** * A hashing operation from the `hashlib` package using one of the predefined classes * (such as `hashlib.md5`). `hashlib.new` is not included, since it is handled by * `HashlibNewCall` and `HashlibNewUpdateCall`. + * `hashlib.pbkdf2_hmac` is also not included and is handled by `Hashlibpbkdf2HMAC` */ abstract class HashlibGenericHashOperation extends Cryptography::CryptographicOperation::Range, DataFlow::CallCfgNode { diff --git a/python/ql/lib/semmle/python/internal/ConceptsShared.qll b/python/ql/lib/semmle/python/internal/ConceptsShared.qll index bfb57f7646f2..78e6bf409256 100644 --- a/python/ql/lib/semmle/python/internal/ConceptsShared.qll +++ b/python/ql/lib/semmle/python/internal/ConceptsShared.qll @@ -24,6 +24,7 @@ private import ConceptsImports * to improve our libraries in the future to more precisely capture this aspect. */ module Cryptography { + import semmle.python.concepts.CryptoAlgorithms // class CryptographicAlgorithm = CryptoAlgorithms::CryptographicAlgorithm; // class EncryptionAlgorithm = CryptoAlgorithms::EncryptionAlgorithm; @@ -55,9 +56,6 @@ module Cryptography { * This may have no result - for example if the `CryptographicAlgorithm` used * is a stream cipher rather than a block cipher. */ - // TODO: modify to use raw to get unknown, return blockmode unknown (set a blockmode unknown) - // make a module blockmode - // make getblock mode final, and change all extending to use raw final BlockMode getBlockMode() { if isKnownCipherBlockModeAlgorithm(super.getBlockModeRaw()) then result = super.getBlockModeRaw() From 1ff5cc50475e4e8c9e71a267cb97aa7894197cd9 Mon Sep 17 00:00:00 2001 From: "REDMOND\\brodes" Date: Tue, 13 Dec 2022 11:49:08 -0500 Subject: [PATCH 07/14] Deleting workflow from branch. --- .github/workflows/sync_remote_main.yml | 28 -------------------------- 1 file changed, 28 deletions(-) delete mode 100644 .github/workflows/sync_remote_main.yml diff --git a/.github/workflows/sync_remote_main.yml b/.github/workflows/sync_remote_main.yml deleted file mode 100644 index a147629bfb53..000000000000 --- a/.github/workflows/sync_remote_main.yml +++ /dev/null @@ -1,28 +0,0 @@ -name: Sync upstream codeql/github main -on: - workflow_dispatch: - schedule: - # actually, ~5 minutes is the highest - # effective frequency you will get - - cron: '* * * * *' -jobs: - merge: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - with: - token: ${{ secrets.WORKFLOW_TOKEN }} - - name: Merge upstream - run: | - git config --global user.name 'your-name' - git config --global user.email 'your-username@users.noreply.github.com' - # "git checkout main" is unnecessary, already here by default - git pull --unshallow # this option is very important, you would get - # complains about unrelated histories without it. - # (but actions/checkout@v2 can also be instructed - # to fetch all git depth right from the start) - git remote add upstream https://github.com/github/codeql.git - git fetch upstream - git checkout main - git merge --no-edit upstream/main - git push origin main From 60bff804fbd60a67c871272660dba7b232aa9bdc Mon Sep 17 00:00:00 2001 From: "REDMOND\\brodes" Date: Tue, 13 Dec 2022 11:59:24 -0500 Subject: [PATCH 08/14] Cleanup --- .../python/concepts/CryptoAlgorithms.qll | 33 +++++++++---------- .../internal/CryptoAlgorithmNames.qll | 3 +- .../semmle/python/frameworks/Cryptodome.qll | 3 -- .../semmle/python/frameworks/Cryptography.qll | 2 -- .../ql/lib/semmle/python/frameworks/Rsa.qll | 3 -- .../lib/semmle/python/frameworks/Stdlib.qll | 6 +--- .../semmle/python/internal/ConceptsShared.qll | 7 ---- 7 files changed, 18 insertions(+), 39 deletions(-) diff --git a/python/ql/lib/semmle/python/concepts/CryptoAlgorithms.qll b/python/ql/lib/semmle/python/concepts/CryptoAlgorithms.qll index 431eb234c568..a661fdf69a6a 100644 --- a/python/ql/lib/semmle/python/concepts/CryptoAlgorithms.qll +++ b/python/ql/lib/semmle/python/concepts/CryptoAlgorithms.qll @@ -28,18 +28,6 @@ private newtype TCryptographicAlgorithm = or MkUnknown() -//add an unknown algorithm extends mkUnknown, isUnknown returns any and weak yes -class UnknownAlgorithm extends MkUnknown, CryptographicAlgorithm { - override predicate isUnknown() { any() } - - override string getName() { result = unknownAlgorithm() } - - override predicate isWeak() { any() } - - bindingset[name] - override predicate matchesName(string name) { none()} - -} /** * A cryptographic algorithm. @@ -69,18 +57,27 @@ abstract class CryptographicAlgorithm extends TCryptographicAlgorithm { */ abstract predicate isWeak(); - // /** Gets the raw algorithm used, i.e., the algorithm extracted directly from the source*/ - // abstract string getAlgorithmRaw(); - - // /** Gets the raw block mode used, i.e., the block mode extracted directly from the source*/ - // abstract string getBlockModeRaw(); - /** * Holds if this algorithm is not known. */ predicate isUnknown() { this instanceof UnknownAlgorithm } } +/** + * An 'Unknown' cryptographic algorithm, typically encountered when extracting as yet unmodelled API algorithms. + */ +class UnknownAlgorithm extends MkUnknown, CryptographicAlgorithm { + override predicate isUnknown() { any() } + + override string getName() { result = unknownAlgorithm() } + + override predicate isWeak() { any() } + + bindingset[name] + override predicate matchesName(string name) { none()} + +} + /** * A hashing algorithm such as `MD5` or `SHA512`. */ diff --git a/python/ql/lib/semmle/python/concepts/internal/CryptoAlgorithmNames.qll b/python/ql/lib/semmle/python/concepts/internal/CryptoAlgorithmNames.qll index 055718aada87..1b5a766253fc 100644 --- a/python/ql/lib/semmle/python/concepts/internal/CryptoAlgorithmNames.qll +++ b/python/ql/lib/semmle/python/concepts/internal/CryptoAlgorithmNames.qll @@ -11,7 +11,8 @@ /** * Returns a string to represent generally unknown algorithms. - * To be used in place of hardcoded strings representing 'unknown' to maintain consistency. + * Predicate is to be used to get a consistent string representation + * for unknown algorithms. */ string unknownAlgorithm() { diff --git a/python/ql/lib/semmle/python/frameworks/Cryptodome.qll b/python/ql/lib/semmle/python/frameworks/Cryptodome.qll index 8e42103192c6..b7be62e9e59d 100644 --- a/python/ql/lib/semmle/python/frameworks/Cryptodome.qll +++ b/python/ql/lib/semmle/python/frameworks/Cryptodome.qll @@ -126,7 +126,6 @@ private module CryptodomeModel { } override string getAlgorithmRaw() { - //result.matchesName(cipherName) result = cipherName } @@ -201,7 +200,6 @@ private module CryptodomeModel { } override string getAlgorithmRaw() { - //result.matchesName(signatureName) result = signatureName } @@ -239,7 +237,6 @@ private module CryptodomeModel { } override string getAlgorithmRaw() { - //result.matchesName(hashName) result = hashName } diff --git a/python/ql/lib/semmle/python/frameworks/Cryptography.qll b/python/ql/lib/semmle/python/frameworks/Cryptography.qll index d9fefdc1dd24..0a6b3fb358e1 100644 --- a/python/ql/lib/semmle/python/frameworks/Cryptography.qll +++ b/python/ql/lib/semmle/python/frameworks/Cryptography.qll @@ -221,7 +221,6 @@ private module CryptographyModel { } override string getAlgorithmRaw() { - //result.matchesName(algorithmName) result = algorithmName } @@ -274,7 +273,6 @@ private module CryptographyModel { } override string getAlgorithmRaw() { - //result.matchesName(algorithmName) result = algorithmName } diff --git a/python/ql/lib/semmle/python/frameworks/Rsa.qll b/python/ql/lib/semmle/python/frameworks/Rsa.qll index 163b39d81bdb..377a7380005c 100644 --- a/python/ql/lib/semmle/python/frameworks/Rsa.qll +++ b/python/ql/lib/semmle/python/frameworks/Rsa.qll @@ -38,7 +38,6 @@ private module Rsa { override string getAlgorithmRaw() { result = "RSA" - //result.getName() = "RSA" } override DataFlow::Node getAnInput() { @@ -58,7 +57,6 @@ private module Rsa { override string getAlgorithmRaw() { result = "RSA" - //result.getName() = "RSA" } override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("crypto")] } @@ -113,7 +111,6 @@ private module Rsa { // note that technically there is also a hashing operation going on but we don't // know what algorithm is used up front, since it is encoded in the signature result = "RSA" - //result.getName() = "RSA" } override DataFlow::Node getAnInput() { diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 25da62543905..1ef6e874f1e4 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -2669,7 +2669,6 @@ private module StdlibPrivate { } override string getAlgorithmRaw() { - //result.matchesName(hashName) result = hashName } @@ -2697,7 +2696,6 @@ private module StdlibPrivate { } override string getAlgorithmRaw() { - //result.matchesName(hashName) result = hashName } @@ -2717,7 +2715,6 @@ private module StdlibPrivate { } override string getAlgorithmRaw() { - //result.matchesName(hashName) result = hashName } @@ -2747,8 +2744,7 @@ private module StdlibPrivate { bindingset[this] HashlibGenericHashOperation() { hashClass = hashlibMember(hashName) } - override string getAlgorithmRaw() { - //result.matchesName(hashName) + override string getAlgorithmRaw() { result = hashName } diff --git a/python/ql/lib/semmle/python/internal/ConceptsShared.qll b/python/ql/lib/semmle/python/internal/ConceptsShared.qll index 78e6bf409256..1a05cc332697 100644 --- a/python/ql/lib/semmle/python/internal/ConceptsShared.qll +++ b/python/ql/lib/semmle/python/internal/ConceptsShared.qll @@ -25,13 +25,6 @@ private import ConceptsImports */ module Cryptography { import semmle.python.concepts.CryptoAlgorithms - // class CryptographicAlgorithm = CryptoAlgorithms::CryptographicAlgorithm; - - // class EncryptionAlgorithm = CryptoAlgorithms::EncryptionAlgorithm; - - // class HashingAlgorithm = CryptoAlgorithms::HashingAlgorithm; - - // class PasswordHashingAlgorithm = CryptoAlgorithms::PasswordHashingAlgorithm; /** * A data-flow node that is an application of a cryptographic algorithm. For example, From 05f91f38dc4b79f2ca36805049d55b4fd5af5fe6 Mon Sep 17 00:00:00 2001 From: "REDMOND\\brodes" Date: Tue, 13 Dec 2022 12:04:48 -0500 Subject: [PATCH 09/14] More cleanup --- python/ql/lib/semmle/python/frameworks/Rsa.qll | 9 --------- 1 file changed, 9 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Rsa.qll b/python/ql/lib/semmle/python/frameworks/Rsa.qll index 377a7380005c..868e514988bd 100644 --- a/python/ql/lib/semmle/python/frameworks/Rsa.qll +++ b/python/ql/lib/semmle/python/frameworks/Rsa.qll @@ -81,15 +81,6 @@ private module Rsa { hashNameArg in [this.getArg(2), this.getArgByName("hash_method")] and DataFlow::exprNode(str) = hashNameArg.getALocalSource() and result = str.getText()) - // // signature part - // result.getName() = "RSA" - // or - // // hashing part - // exists(StrConst str, DataFlow::Node hashNameArg | - // hashNameArg in [this.getArg(2), this.getArgByName("hash_method")] and - // DataFlow::exprNode(str) = hashNameArg.getALocalSource() and - // result.matchesName(str.getText()) - // ) } override DataFlow::Node getAnInput() { From a1368a47bfe3da6ef617191770303fec6454cfc6 Mon Sep 17 00:00:00 2001 From: "REDMOND\\brodes" Date: Tue, 13 Dec 2022 12:05:41 -0500 Subject: [PATCH 10/14] More cleanup --- python/ql/lib/semmle/python/frameworks/Rsa.qll | 5 ----- 1 file changed, 5 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Rsa.qll b/python/ql/lib/semmle/python/frameworks/Rsa.qll index 868e514988bd..3a1a99b6e411 100644 --- a/python/ql/lib/semmle/python/frameworks/Rsa.qll +++ b/python/ql/lib/semmle/python/frameworks/Rsa.qll @@ -127,11 +127,6 @@ private module Rsa { hashNameArg in [this.getArg(1), this.getArgByName("method_name")] and DataFlow::exprNode(str) = hashNameArg.getALocalSource() and result = str.getText()) - // exists(StrConst str, DataFlow::Node hashNameArg | - // hashNameArg in [this.getArg(1), this.getArgByName("method_name")] and - // DataFlow::exprNode(str) = hashNameArg.getALocalSource() and - // result.matchesName(str.getText()) - // ) } override DataFlow::Node getAnInput() { From 856458013477e68eab4503e3bb2a16ed5489958c Mon Sep 17 00:00:00 2001 From: "REDMOND\\brodes" Date: Tue, 20 Dec 2022 12:05:13 -0500 Subject: [PATCH 11/14] adding a predicate to determine if an encryption algorithm is asymmetric --- .../ql/lib/semmle/python/concepts/CryptoAlgorithms.qll | 2 ++ .../python/concepts/internal/CryptoAlgorithmNames.qll | 9 +++++++++ 2 files changed, 11 insertions(+) diff --git a/python/ql/lib/semmle/python/concepts/CryptoAlgorithms.qll b/python/ql/lib/semmle/python/concepts/CryptoAlgorithms.qll index a661fdf69a6a..3a930f721cf2 100644 --- a/python/ql/lib/semmle/python/concepts/CryptoAlgorithms.qll +++ b/python/ql/lib/semmle/python/concepts/CryptoAlgorithms.qll @@ -105,6 +105,8 @@ class EncryptionAlgorithm extends MkEncryptionAlgorithm, CryptographicAlgorithm override predicate isWeak() { isWeak = true } + predicate isAsymmetricEncryption() { isAsymmetricEncryption(name) } + /** Holds if this algorithm is a stream cipher. */ predicate isStreamCipher() { isStreamCipher(name) } } diff --git a/python/ql/lib/semmle/python/concepts/internal/CryptoAlgorithmNames.qll b/python/ql/lib/semmle/python/concepts/internal/CryptoAlgorithmNames.qll index 1b5a766253fc..41401c81605c 100644 --- a/python/ql/lib/semmle/python/concepts/internal/CryptoAlgorithmNames.qll +++ b/python/ql/lib/semmle/python/concepts/internal/CryptoAlgorithmNames.qll @@ -137,3 +137,12 @@ predicate isWeakCipherBlockModeAlgorithm(string name) * Holds if `name` corresponds to a stream cipher. */ predicate isStreamCipher(string name) { name = ["CHACHA", "RC4", "ARC4", "ARCFOUR", "RABBIT"] } + + +/** + * Holds if `name` corresponds to an asymmetric encryption. + */ +bindingset[name] +predicate isAsymmetricEncryption(string name){ + name.regexpMatch("(?i)^rsa.*") +} From 1be6bc363d720315e896c0462f357163addb09e7 Mon Sep 17 00:00:00 2001 From: "REDMOND\\brodes" Date: Wed, 21 Dec 2022 12:09:39 -0500 Subject: [PATCH 12/14] updating algorithm names. Removed some entries that are redundant, and should be caught through name normalization. Added EAX block mode. --- .../python/concepts/internal/CryptoAlgorithmNames.qll | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/python/ql/lib/semmle/python/concepts/internal/CryptoAlgorithmNames.qll b/python/ql/lib/semmle/python/concepts/internal/CryptoAlgorithmNames.qll index 41401c81605c..8d96277e9f7f 100644 --- a/python/ql/lib/semmle/python/concepts/internal/CryptoAlgorithmNames.qll +++ b/python/ql/lib/semmle/python/concepts/internal/CryptoAlgorithmNames.qll @@ -8,7 +8,6 @@ * The classification into strong and weak are based on Wikipedia, OWASP and Google (2021). */ - /** * Returns a string to represent generally unknown algorithms. * Predicate is to be used to get a consistent string representation @@ -66,9 +65,9 @@ predicate isWeakHashingAlgorithm(string name) { predicate isStrongEncryptionAlgorithm(string name) { name = [ - "AES", "AES128", "AES192", "AES256", "AES512", "AES-128", "AES-192", "AES-256", "AES-512", + "AES", "AES128", "AES192", "AES256", "AES512", "ARIA", "BLOWFISH", "BF", "ECIES", "CAST", "CAST5", "CAMELLIA", "CAMELLIA128", "CAMELLIA192", - "CAMELLIA256", "CAMELLIA-128", "CAMELLIA-192", "CAMELLIA-256", "CHACHA", "GOST", "GOST89", + "CAMELLIA256", "CHACHA", "GOST", "GOST89", "IDEA", "RABBIT", "RSA", "SEED", "SM3", "SM4" ] } @@ -121,7 +120,7 @@ predicate isWeakPasswordHashingAlgorithm(string name) { */ predicate isStrongCipherBlockModeAlgorithm(string name) { - name = ["CBC", "GCM", "CCM", "CFB", "OFB", "CFB8", "CTR", "OPENPGP", "XTS"] + name = ["CBC", "GCM", "CCM", "CFB", "OFB", "CFB8", "CTR", "OPENPGP", "XTS", "EAX"] } /** From 066270c790d5b1f217332649931f331edcbdf885 Mon Sep 17 00:00:00 2001 From: "REDMOND\\brodes" Date: Wed, 21 Dec 2022 12:09:55 -0500 Subject: [PATCH 13/14] Changed mechanics for unknown block mode. --- .../ql/lib/semmle/python/internal/ConceptsShared.qll | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/python/ql/lib/semmle/python/internal/ConceptsShared.qll b/python/ql/lib/semmle/python/internal/ConceptsShared.qll index 1a05cc332697..b8c8a2df2a29 100644 --- a/python/ql/lib/semmle/python/internal/ConceptsShared.qll +++ b/python/ql/lib/semmle/python/internal/ConceptsShared.qll @@ -53,7 +53,7 @@ module Cryptography { if isKnownCipherBlockModeAlgorithm(super.getBlockModeRaw()) then result = super.getBlockModeRaw() else - result = BlockMode::unknown() + result = unknownAlgorithm() } } @@ -88,7 +88,7 @@ module Cryptography { class BlockMode extends string { BlockMode() { isKnownCipherBlockModeAlgorithm(this) or - (not isKnownCipherBlockModeAlgorithm(this) and this = BlockMode::unknown()) + ((not isKnownCipherBlockModeAlgorithm(this)) and this = unknownAlgorithm()) } /** @@ -108,14 +108,6 @@ module Cryptography { not isKnownCipherBlockModeAlgorithm(this) } } - - module BlockMode - { - BlockMode unknown() - { - result = unknownAlgorithm() - } - } } /** Provides classes for modeling HTTP-related APIs. */ From 10986e6c702900741f3c45da220f72aa5c3a6be8 Mon Sep 17 00:00:00 2001 From: "REDMOND\\brodes" Date: Wed, 4 Jan 2023 13:46:30 -0500 Subject: [PATCH 14/14] Corrected casing on blake sha strings. Added additional sha algorithms. Updated stdlib framework to include hmac hashing library. --- .../internal/CryptoAlgorithmNames.qll | 6 +- .../lib/semmle/python/frameworks/Stdlib.qll | 69 +++++++++++++++++++ 2 files changed, 72 insertions(+), 3 deletions(-) diff --git a/python/ql/lib/semmle/python/concepts/internal/CryptoAlgorithmNames.qll b/python/ql/lib/semmle/python/concepts/internal/CryptoAlgorithmNames.qll index 8d96277e9f7f..9fc05ed30cb1 100644 --- a/python/ql/lib/semmle/python/concepts/internal/CryptoAlgorithmNames.qll +++ b/python/ql/lib/semmle/python/concepts/internal/CryptoAlgorithmNames.qll @@ -33,9 +33,9 @@ predicate isStrongHashingAlgorithm(string name) { name = [ - "BLAKE2", "BLAKE2b", "BLAKE2s", "DSA", "ED25519", "ES256", "ECDSA256", "ES384", "ECDSA384", "ES512", "ECDSA512", "SHA2", - "SHA224", "SHA256", "SHA384", "SHA512", "SHA3", "SHA3224", "SHA3256", "SHA3384", "SHA3512", - "SHAKE128", "SHAKE256" + "BLAKE2", "BLAKE2B", "BLAKE2S", "DSA", "ED25519", "ES256", "ECDSA256", "ES384", "ECDSA384", "ES512", "ECDSA512", "SHA2", + "SHA224", "SHA256", "SHA384", "SHA512", "SHA512224", "SHA512256", "SHA3", "SHA3224", "SHA3256", "SHA3384", "SHA3512", + "SHAKE128", "SHAKE256", "SM3", "WHIRLPOOL" ] } diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 1ef6e874f1e4..42671f95e8de 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -2648,6 +2648,75 @@ private module StdlibPrivate { } } + // --------------------------------------------------------------------------- + // hmac + // --------------------------------------------------------------------------- + + /** Gets a call to `hmac.new`, `hmac.digest` or a direct construction of an HMAC object + * with `algorithmName` as 3rd/`digestmod` argument */ + private API::CallNode baseHMACOperation(string algorithmName) { + ( + result = API::moduleImport("hmac").getMember("new").getACall() or + result = API::moduleImport("hmac").getMember("digest").getACall() or + result = API::moduleImport("hmac").getMember("HMAC").getACall() + ) + and + ( + // handles cases where the algorithm is passed as a string on an hlib function: + // e.g., hmac.new(key, msg=None, digestmod="") vs hmac.new(key, msg=None, digestmod=hashlib.) + algorithmName = + result.getParameter(2, "digestmod").getAValueReachingSink().asExpr().(StrConst).getText() or + result.getParameter(2, "digestmod").getAValueReachingSink() = hashlibMember(algorithmName).asSource() or + // also handle older library versions where digestmod could be empty, in these cases it is assumed to be md5 + // This default was removed in version 3.8: https://docs.python.org/3/library/hmac.html + (not exists(result.getParameter(2, "digestmod")) and algorithmName = "md5") + ) + } + /** + * A hashing operation by constructing an hmac.HMAC object, + * either directly or through `hmac.new`, and supplying a `msg` argument. + * Additionally includes operations for `hmac.digest` as it has the same call signature. + * The `msg` parameter must be present. `HMAC.update` is handled separately for supplying + * a message later. + */ + class HMACDirectHash extends Cryptography::CryptographicOperation::Range, API::CallNode { + string hashName; + + HMACDirectHash() { + this = baseHMACOperation(hashName) and + exists(this.getParameter(1, "msg")) + } + + override string getAlgorithmRaw() { + result = hashName + } + + override DataFlow::Node getAnInput() { result = this.getParameter(1, "msg").asSink() } + + override string getBlockModeRaw() { none() } + } + + + /** + * A hashing operation by using the `HMAC.update` method on an HMAC object constructed via + * `hmac.new` or direction HMAC construction. + */ + class HMACUpdateCall extends Cryptography::CryptographicOperation::Range, API::CallNode { + string hashName; + + HMACUpdateCall() { + this = baseHMACOperation(hashName).getReturn().getMember("update").getACall() + } + + override string getAlgorithmRaw() { + result = hashName + } + + override DataFlow::Node getAnInput() { result = this.getArg(0) } + + override string getBlockModeRaw() { none() } + } + // --------------------------------------------------------------------------- // hashlib // ---------------------------------------------------------------------------