From 14eaea3de6666e1fed8d974c9135f118fbf8dbe9 Mon Sep 17 00:00:00 2001 From: Andrea Brancaleoni Date: Mon, 8 Apr 2024 17:24:10 +0200 Subject: [PATCH] security rules: April 2024 Update ``` @ nonfree.audit (+0, -1) - generic.dockerfile.security.last-user-is-root.last-user-is-root @ nonfree.others (+0, -0) @ nonfree.security_noaudit_novuln (+0, -0) @ nonfree.vulns (+4, -0) + javascript.jwt-simple.security.jwt-simple-noverify.jwt-simple-noverify + generic.secrets.gitleaks.facebook-secret.facebook-secret + generic.secrets.gitleaks.facebook-page-access-token.facebook-page-access-token + generic.secrets.gitleaks.facebook-access-token.facebook-access-token @ oss.audit (+1, -0) + trailofbits.python.pandas-eval.pandas-eval @ oss.others (+0, -0) @ oss.security_noaudit_novuln (+0, -0) @ oss.vulns (+4, -1) + trailofbits.python.pickles-in-tensorflow.pickles-in-tensorflow + trailofbits.python.msgpack-numpy.msgpack-numpy + trailofbits.python.pickles-in-keras-deprecation.pickles-in-keras-deprecation + trailofbits.python.pickles-in-keras.pickles-in-keras - trailofbits.go.anonymous-race-condition.anonymous-race-condition ``` --- .../generated/nonfree/audit.yaml | 91 ++--- .../generated/nonfree/vulns.yaml | 193 +++++++++- assets/semgrep_rules/generated/oss/audit.yaml | 76 +++- .../semgrep_rules/generated/oss/others.yaml | 8 +- assets/semgrep_rules/generated/oss/vulns.yaml | 344 +++++++++++------- 5 files changed, 502 insertions(+), 210 deletions(-) diff --git a/assets/semgrep_rules/generated/nonfree/audit.yaml b/assets/semgrep_rules/generated/nonfree/audit.yaml index 731e0b68..89927e71 100644 --- a/assets/semgrep_rules/generated/nonfree/audit.yaml +++ b/assets/semgrep_rules/generated/nonfree/audit.yaml @@ -1558,22 +1558,17 @@ rules: severity: ERROR - id: dockerfile.security.last-user-is-root.last-user-is-root patterns: - - pattern-inside: | - USER $F - ... - USER $X - - pattern-not-inside: | - ... - USER $X - ... - USER $F - - focus-metavariable: "$X" - - metavariable-regex: - metavariable: "$X" - regex: "^(root)$" - - metavariable-regex: - metavariable: "$F" - regex: "(.*(?!root))" + - pattern: USER root + - pattern-not-inside: + patterns: + - pattern: | + USER root + ... + USER $X + - metavariable-pattern: + metavariable: "$X" + patterns: + - pattern-not: root message: The last user in the container is 'root'. This is a security hazard because if an attacker gains control of the container they will have root access. Switch back to another user after running commands as 'root'. @@ -1604,8 +1599,8 @@ rules: semgrep.dev: rule: rule_id: ReU2n5 - version_id: e1T01GL - url: https://semgrep.dev/playground/r/e1T01GL/dockerfile.security.last-user-is-root.last-user-is-root + version_id: kbTw78l + url: https://semgrep.dev/playground/r/kbTw78l/dockerfile.security.last-user-is-root.last-user-is-root origin: community - id: dockerfile.security.missing-user-entrypoint.missing-user-entrypoint patterns: @@ -1774,46 +1769,6 @@ rules: - pattern: 'sh -i ...<...> /dev/tcp/.../... ...<&... 1>&... 2>& ' -- id: generic.dockerfile.security.last-user-is-root.last-user-is-root - patterns: - - pattern: USER root - - pattern-not-inside: | - USER root - ... - USER $ANYTHING - message: The last user in the container is 'root'. This is a security hazard because - if an attacker gains control of the container they will have root access. Switch - back to another user after running commands as 'root'. - severity: ERROR - languages: - - dockerfile - metadata: - cwe: - - 'CWE-269: Improper Privilege Management' - source-rule-url: https://github.com/hadolint/hadolint/wiki/DL3002 - references: - - https://github.com/hadolint/hadolint/wiki/DL3002 - category: security - technology: - - dockerfile - confidence: MEDIUM - owasp: - - A04:2021 - Insecure Design - subcategory: - - audit - likelihood: MEDIUM - impact: MEDIUM - license: Commons Clause License Condition v1.0[LGPL-2.1-only] - vulnerability_class: - - Improper Authorization - source: https://semgrep.dev/r/generic.dockerfile.security.last-user-is-root.last-user-is-root - shortlink: https://sg.run/N461 - semgrep.dev: - rule: - rule_id: L1UyO5 - version_id: qkT2xK0 - url: https://semgrep.dev/playground/r/qkT2xK0/generic.dockerfile.security.last-user-is-root.last-user-is-root - origin: community - id: generic.nginx.security.alias-path-traversal.alias-path-traversal patterns: - pattern: | @@ -3818,7 +3773,7 @@ rules: url: https://semgrep.dev/playground/r/d6TrA5w/generic.secrets.security.detected-square-oauth-secret.detected-square-oauth-secret origin: community - id: generic.secrets.security.detected-ssh-password.detected-ssh-password - pattern-regex: sshpass -p.*['|\\\"] + pattern-regex: sshpass -p\s*['|\\\"][^%] languages: - regex message: SSH Password detected @@ -3850,8 +3805,8 @@ rules: semgrep.dev: rule: rule_id: PeUZ4d - version_id: ZRTQNvQ - url: https://semgrep.dev/playground/r/ZRTQNvQ/generic.secrets.security.detected-ssh-password.detected-ssh-password + version_id: 3ZT6geb + url: https://semgrep.dev/playground/r/3ZT6geb/generic.secrets.security.detected-ssh-password.detected-ssh-password origin: community - id: generic.secrets.security.detected-stripe-api-key.detected-stripe-api-key pattern-regex: sk_live_[0-9a-zA-Z]{24} @@ -6772,8 +6727,8 @@ rules: semgrep.dev: rule: rule_id: L1Uyvp - version_id: qkTbbZp - url: https://semgrep.dev/playground/r/qkTbbZp/java.lang.security.audit.cookie-missing-secure-flag.cookie-missing-secure-flag + version_id: GxTv63G + url: https://semgrep.dev/playground/r/GxTv63G/java.lang.security.audit.cookie-missing-secure-flag.cookie-missing-secure-flag origin: community message: A cookie was detected without setting the 'secure' flag. The 'secure' flag for cookies prevents the client from transmitting the cookie over insecure channels @@ -22728,8 +22683,8 @@ rules: semgrep.dev: rule: rule_id: WAUZz5 - version_id: zyTKDAv - url: https://semgrep.dev/playground/r/zyTKDAv/ruby.jwt.security.jwt-hardcode.ruby-jwt-hardcoded-secret + version_id: ExTq53v + url: https://semgrep.dev/playground/r/ExTq53v/ruby.jwt.security.jwt-hardcode.ruby-jwt-hardcoded-secret origin: community patterns: - pattern-inside: | @@ -22756,6 +22711,12 @@ rules: $SECRET = "..." ... JWT.decode($PAYLOAD,$SECRET,...) + - pattern-not: 'JWT.encode($PAYLOAD, nil, ... , jwks: ..., ...) + + ' + - pattern-not: 'JWT.decode($PAYLOAD, nil, ..., jwks: ..., ...) + + ' languages: - ruby severity: ERROR diff --git a/assets/semgrep_rules/generated/nonfree/vulns.yaml b/assets/semgrep_rules/generated/nonfree/vulns.yaml index 45bbe9c0..e5ec9185 100644 --- a/assets/semgrep_rules/generated/nonfree/vulns.yaml +++ b/assets/semgrep_rules/generated/nonfree/vulns.yaml @@ -3254,6 +3254,129 @@ rules: origin: community patterns: - pattern-regex: (?i)(?:etsy)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9]{24})(?:['|\"|\n|\r|\s|\x60|;]|$) +- id: generic.secrets.gitleaks.facebook-access-token.facebook-access-token + message: A gitleaks facebook-access-token was detected which attempts to identify + hard-coded credentials. It is not recommended to store credentials in source-code, + as this risks secrets being leaked and used by either an internal or external + malicious adversary. It is recommended to use environment variables to securely + provide credentials or retrieve credentials from a secure vault or HSM (Hardware + Security Module). + languages: + - regex + severity: INFO + metadata: + likelihood: LOW + impact: MEDIUM + confidence: LOW + category: security + cwe: + - 'CWE-798: Use of Hard-coded Credentials' + cwe2021-top25: true + cwe2022-top25: true + owasp: + - A07:2021 - Identification and Authentication Failures + references: + - https://cheatsheetseries.owasp.org/cheatsheets/Secrets_Management_Cheat_Sheet.html + source-rule-url: https://github.com/zricethezav/gitleaks/tree/master/cmd/generate/config/rules + subcategory: + - vuln + technology: + - gitleaks + license: Commons Clause License Condition v1.0[LGPL-2.1-only] + vulnerability_class: + - Hard-coded Secrets + source: https://semgrep.dev/r/generic.secrets.gitleaks.facebook-access-token.facebook-access-token + shortlink: https://sg.run/Ab0Pg + semgrep.dev: + rule: + rule_id: 4bUR8vw + version_id: e1TrP21 + url: https://semgrep.dev/playground/r/e1TrP21/generic.secrets.gitleaks.facebook-access-token.facebook-access-token + origin: community + patterns: + - pattern-regex: (?i)\b(\d{15,16}\|[0-9a-z\-_]{27})(?:['|\"|\n|\r|\s|\x60|;]|$) +- id: generic.secrets.gitleaks.facebook-page-access-token.facebook-page-access-token + message: A gitleaks facebook-page-access-token was detected which attempts to identify + hard-coded credentials. It is not recommended to store credentials in source-code, + as this risks secrets being leaked and used by either an internal or external + malicious adversary. It is recommended to use environment variables to securely + provide credentials or retrieve credentials from a secure vault or HSM (Hardware + Security Module). + languages: + - regex + severity: INFO + metadata: + likelihood: LOW + impact: MEDIUM + confidence: LOW + category: security + cwe: + - 'CWE-798: Use of Hard-coded Credentials' + cwe2021-top25: true + cwe2022-top25: true + owasp: + - A07:2021 - Identification and Authentication Failures + references: + - https://cheatsheetseries.owasp.org/cheatsheets/Secrets_Management_Cheat_Sheet.html + source-rule-url: https://github.com/zricethezav/gitleaks/tree/master/cmd/generate/config/rules + subcategory: + - vuln + technology: + - gitleaks + license: Commons Clause License Condition v1.0[LGPL-2.1-only] + vulnerability_class: + - Hard-coded Secrets + source: https://semgrep.dev/r/generic.secrets.gitleaks.facebook-page-access-token.facebook-page-access-token + shortlink: https://sg.run/BYK5b + semgrep.dev: + rule: + rule_id: PeUJbAl + version_id: vdT4bA8 + url: https://semgrep.dev/playground/r/vdT4bA8/generic.secrets.gitleaks.facebook-page-access-token.facebook-page-access-token + origin: community + patterns: + - pattern-regex: (?i)\b(EAA[MC][a-z0-9]{20,})(?:['|\"|\n|\r|\s|\x60|;]|$) +- id: generic.secrets.gitleaks.facebook-secret.facebook-secret + message: A gitleaks facebook-secret was detected which attempts to identify hard-coded + credentials. It is not recommended to store credentials in source-code, as this + risks secrets being leaked and used by either an internal or external malicious + adversary. It is recommended to use environment variables to securely provide + credentials or retrieve credentials from a secure vault or HSM (Hardware Security + Module). + languages: + - regex + severity: INFO + metadata: + likelihood: LOW + impact: MEDIUM + confidence: LOW + category: security + cwe: + - 'CWE-798: Use of Hard-coded Credentials' + cwe2021-top25: true + cwe2022-top25: true + owasp: + - A07:2021 - Identification and Authentication Failures + references: + - https://cheatsheetseries.owasp.org/cheatsheets/Secrets_Management_Cheat_Sheet.html + source-rule-url: https://github.com/zricethezav/gitleaks/tree/master/cmd/generate/config/rules + subcategory: + - vuln + technology: + - gitleaks + license: Commons Clause License Condition v1.0[LGPL-2.1-only] + vulnerability_class: + - Hard-coded Secrets + source: https://semgrep.dev/r/generic.secrets.gitleaks.facebook-secret.facebook-secret + shortlink: https://sg.run/DblB2 + semgrep.dev: + rule: + rule_id: JDUNK7E + version_id: d6T4N5y + url: https://semgrep.dev/playground/r/d6T4N5y/generic.secrets.gitleaks.facebook-secret.facebook-secret + origin: community + patterns: + - pattern-regex: (?i)(?:facebook)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-f0-9]{32})(?:['|\"|\n|\r|\s|\x60|;]|$) - id: generic.secrets.gitleaks.facebook.facebook message: A gitleaks facebook was detected which attempts to identify hard-coded credentials. It is not recommended to store credentials in source-code, as this @@ -5216,11 +5339,11 @@ rules: semgrep.dev: rule: rule_id: WAUeZl - version_id: 2KTzrnR - url: https://semgrep.dev/playground/r/2KTzrnR/generic.secrets.gitleaks.mailchimp-api-key.mailchimp-api-key + version_id: ZRTGRv2 + url: https://semgrep.dev/playground/r/ZRTGRv2/generic.secrets.gitleaks.mailchimp-api-key.mailchimp-api-key origin: community patterns: - - pattern-regex: (?i)(?:mailchimp)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-f0-9]{32}-us20)(?:['|\"|\n|\r|\s|\x60|;]|$) + - pattern-regex: (?i)(?:MailchimpSDK.initialize|mailchimp)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-f0-9]{32}-us\d\d)(?:['|\"|\n|\r|\s|\x60|;]|$) - id: generic.secrets.gitleaks.mailgun-private-api-token.mailgun-private-api-token message: A gitleaks mailgun-private-api-token was detected which attempts to identify hard-coded credentials. It is not recommended to store credentials in source-code, @@ -7433,11 +7556,11 @@ rules: semgrep.dev: rule: rule_id: WAUePl - version_id: o5TglQp - url: https://semgrep.dev/playground/r/o5TglQp/generic.secrets.gitleaks.square-access-token.square-access-token + version_id: nWTGD1Q + url: https://semgrep.dev/playground/r/nWTGD1Q/generic.secrets.gitleaks.square-access-token.square-access-token origin: community patterns: - - pattern-regex: (?i)\b(sq0atp-[0-9A-Za-z\-_]{22})(?:['|\"|\n|\r|\s|\x60|;]|$) + - pattern-regex: (?i)\b((EAAA|sq0atp-)[0-9A-Za-z\-_]{22,60})(?:['|\"|\n|\r|\s|\x60|;]|$) - id: generic.secrets.gitleaks.squarespace-access-token.squarespace-access-token message: A gitleaks squarespace-access-token was detected which attempts to identify hard-coded credentials. It is not recommended to store credentials in source-code, @@ -19193,6 +19316,64 @@ rules: $JWT = require("jsonwebtoken"); ... - pattern: "$JWT.verify($P, $X, {algorithms:[...,'none',...]},...)" +- id: javascript.jwt-simple.security.jwt-simple-noverify.jwt-simple-noverify + message: Detected the decoding of a JWT token without a verify step. JWT tokens + must be verified before use, otherwise the token's integrity is unknown. This + means a malicious actor could forge a JWT token with any claims. Set 'verify' + to `true` before using the token. + severity: ERROR + metadata: + owasp: + - A05:2021 - Security Misconfiguration + - A07:2021 - Identification and Authentication Failures + cwe: + - 'CWE-287: Improper Authentication' + - 'CWE-345: Insufficient Verification of Data Authenticity' + - 'CWE-347: Improper Verification of Cryptographic Signature' + category: security + subcategory: + - vuln + technology: + - jwt-simple + - jwt + confidence: HIGH + likelihood: MEDIUM + impact: HIGH + references: + - https://www.npmjs.com/package/jwt-simple + - https://cwe.mitre.org/data/definitions/287 + - https://cwe.mitre.org/data/definitions/345 + - https://cwe.mitre.org/data/definitions/347 + license: Commons Clause License Condition v1.0[LGPL-2.1-only] + vulnerability_class: + - Cryptographic Issues + - Improper Authentication + source: https://semgrep.dev/r/javascript.jwt-simple.security.jwt-simple-noverify.jwt-simple-noverify + shortlink: https://sg.run/zdjod + semgrep.dev: + rule: + rule_id: r6UyNLy + version_id: 44TgJGG + url: https://semgrep.dev/playground/r/44TgJGG/javascript.jwt-simple.security.jwt-simple-noverify.jwt-simple-noverify + origin: community + languages: + - javascript + - typescript + patterns: + - pattern-inside: | + $JWT = require('jwt-simple'); + ... + - pattern: "$JWT.decode($TOKEN, $SECRET, $NOVERIFY, ...)" + - metavariable-pattern: + metavariable: "$NOVERIFY" + patterns: + - pattern-either: + - pattern: 'true + + ' + - pattern: '"..." + + ' - id: javascript.lang.security.audit.code-string-concat.code-string-concat message: Found data from an Express or Next web request flowing to `eval`. If this data is user-controllable this can lead to execution of arbitrary system commands diff --git a/assets/semgrep_rules/generated/oss/audit.yaml b/assets/semgrep_rules/generated/oss/audit.yaml index 3b24034e..df7f1b68 100644 --- a/assets/semgrep_rules/generated/oss/audit.yaml +++ b/assets/semgrep_rules/generated/oss/audit.yaml @@ -2244,7 +2244,7 @@ rules: - pattern: recv - pattern: recvfrom - id: trailofbits.generic.container-privileged.container-privileged - message: Found container command with extended privileges + message: Found container command (docker, podman) with extended privileges languages: - generic severity: WARNING @@ -2268,8 +2268,8 @@ rules: semgrep.dev: rule: rule_id: ReUD0BO - version_id: vdT1k6d - url: https://semgrep.dev/playground/r/vdT1k6d/trailofbits.generic.container-privileged.container-privileged + version_id: WrTNkvB + url: https://semgrep.dev/playground/r/WrTNkvB/trailofbits.generic.container-privileged.container-privileged origin: community pattern-either: - pattern: docker ... --privileged @@ -2385,12 +2385,15 @@ rules: semgrep.dev: rule: rule_id: DbU6R39 - version_id: nWTOeLd - url: https://semgrep.dev/playground/r/nWTOeLd/trailofbits.generic.curl-unencrypted-url.curl-unencrypted-url + version_id: 0bTrQAn + url: https://semgrep.dev/playground/r/0bTrQAn/trailofbits.generic.curl-unencrypted-url.curl-unencrypted-url origin: community - pattern-either: - - pattern: curl ... http:// - - pattern: curl ... ftp:// + patterns: + - pattern-either: + - pattern: curl ... http:// + - pattern: curl ... ftp:// + - pattern-not-inside: curl ... http://127.0.0.1 + - pattern-not-inside: curl ... http://localhost - id: trailofbits.generic.gpg-insecure-flags.gpg-insecure-flags message: Found `gpg` command using insecure flags languages: @@ -3163,6 +3166,63 @@ rules: ... - pattern: "$SESSION.register_custom_ops_library(...)" - pattern-not: $SESSION.register_custom_ops_library("...") +- id: trailofbits.python.pandas-eval.pandas-eval + message: Pandas eval() and query() may be dangerous if used to evaluate dynamic + content. If this content can be input from outside the program, this may be a + code injection vulnerability. Ensure evaluated content is not definable by external + sources. + languages: + - python + severity: ERROR + metadata: + category: security + cwe: 'CWE-95: Improper Neutralization of Directives in Dynamically Evaluated Code + (''Eval Injection'')' + subcategory: + - audit + confidence: LOW + likelihood: LOW + impact: HIGH + technology: + - pandas + description: Potential arbitrary code execution from `pandas` functions that evaluate + user-provided expressions + references: + - https://blog.trailofbits.com/2021/03/15/never-a-dill-moment-exploiting-machine-learning-pickle-files/ + license: AGPL-3.0 license + vulnerability_class: + - Code Injection + source: https://semgrep.dev/r/trailofbits.python.pandas-eval.pandas-eval + shortlink: https://sg.run/bwJed + semgrep.dev: + rule: + rule_id: ReUDw9J + version_id: 2KTLoZO + url: https://semgrep.dev/playground/r/2KTLoZO/trailofbits.python.pandas-eval.pandas-eval + origin: community + patterns: + - pattern-inside: | + import pandas + ... + - pattern-either: + - patterns: + - pattern: pandas.DataFrame.$FN(...) + - pattern-not: pandas.DataFrame.$FN("...", ...) + - pattern-not: pandas.DataFrame.$FN(f"", ...) + - patterns: + - pattern: pandas.$FN(...) + - pattern-not: pandas.$FN("...", ...) + - pattern-not: pandas.$FN(f"", ...) + - patterns: + - pattern-inside: | + $DF = pandas.DataFrame(...) + ... + - pattern: "$DF.$FN(...)" + - pattern-not: $DF.$FN("...", ...) + - pattern-not: $DF.$FN(f"", ...) + - metavariable-regex: + metavariable: "$FN" + regex: "(eval|query)" - id: trailofbits.python.pytorch-classes-load-library.pytorch-classes-load-library message: Loading custom operator libraries can result in arbitrary code execution languages: diff --git a/assets/semgrep_rules/generated/oss/others.yaml b/assets/semgrep_rules/generated/oss/others.yaml index 9e67956a..a213701d 100644 --- a/assets/semgrep_rules/generated/oss/others.yaml +++ b/assets/semgrep_rules/generated/oss/others.yaml @@ -1890,11 +1890,13 @@ rules: semgrep.dev: rule: rule_id: KxU507 - version_id: yeTRvvY - url: https://semgrep.dev/playground/r/yeTRvvY/trailofbits.python.numpy-in-pytorch-modules.numpy-in-pytorch-modules + version_id: pZTz48j + url: https://semgrep.dev/playground/r/pZTz48j/trailofbits.python.numpy-in-pytorch-modules.numpy-in-pytorch-modules origin: community patterns: - - pattern: "$RESULT = numpy.$FUNCTION(...)" + - pattern-either: + - pattern: numpy.$FN(...) + - pattern: numpy. ... .$FN(...) - pattern-inside: | class $MODULE(torch.nn.Module): ... diff --git a/assets/semgrep_rules/generated/oss/vulns.yaml b/assets/semgrep_rules/generated/oss/vulns.yaml index 39c00736..3e0c9e44 100644 --- a/assets/semgrep_rules/generated/oss/vulns.yaml +++ b/assets/semgrep_rules/generated/oss/vulns.yaml @@ -72,122 +72,6 @@ rules: - pattern: org.apache.commons.lang.StringEscapeUtils.escapeHtml(...) - pattern: org.springframework.web.util.HtmlUtils.htmlEscape(...) - pattern: org.owasp.esapi.ESAPI.encoder().encodeForHTML(...) -- id: trailofbits.go.anonymous-race-condition.anonymous-race-condition - message: Possible race condition due to memory aliasing of variable `$X` - languages: - - go - severity: ERROR - metadata: - category: security - cwe: 'CWE-362: Concurrent Execution using Shared Resource with Improper Synchronization - (''Race Condition'')' - subcategory: - - vuln - confidence: MEDIUM - likelihood: HIGH - impact: MEDIUM - technology: - - "--no-technology--" - description: Race conditions within anonymous goroutines - references: - - https://github.com/golang/go/wiki/CommonMistakes#using-goroutines-on-loop-iterator-variables - license: AGPL-3.0 license - vulnerability_class: - - Other - source: https://semgrep.dev/r/trailofbits.go.anonymous-race-condition.anonymous-race-condition - shortlink: https://sg.run/BL22 - semgrep.dev: - rule: - rule_id: nJUr88 - version_id: LjTq4yK - url: https://semgrep.dev/playground/r/LjTq4yK/trailofbits.go.anonymous-race-condition.anonymous-race-condition - origin: community - patterns: - - pattern-either: - - pattern: | - for $Y, $X := range ... { - ... - go func(...){ - ... - $FOO(..., $X, ...) - ... - }(...) - ... - } - - pattern: | - for $Y, $X := range ... { - ... - go func(...){ - ... - $FOO(..., $Y, ...) - ... - }(...) - ... - } - - pattern: | - for $Y, $X := range ... { - ... - go func(...){ - ... - $X(...) - ... - }(...) - ... - } - - pattern: | - for $X:=...;$Y;$Z { - ... - go func(...) { - ... - $FOO(..., $X,...) - ... - }(...) - ... - } - - pattern: | - for $Y, $X := range ... { - ... - go func(...){ - ... - $X. ... .$M(...) - ... - }(...) - ... - } - - pattern-not: | - for ..., $X := range ... { - ... - ..., $X := ..., $X - ... - go func(...){ - ... - $FOO(..., $X, ...) - ... - }(...) - ... - } - - pattern-not: | - for ..., $X := range ... { - ... - $X, ... := $X, ... - ... - go func(...){ - ... - $FOO(..., $X, ...) - ... - }(...) - ... - } - - pattern-not: | - for $Y, $X := range ... { - ... - $Y, $X := $Y, $X - ... - go func(...){ - ... - }(...) - ... - } - id: trailofbits.go.hanging-goroutine.hanging-goroutine message: Potential goroutine leak due to unbuffered channel send inside loop or unbuffered channel receive in select block @@ -302,7 +186,8 @@ rules: $CHANNEL := make(..., $T) ... - id: trailofbits.go.missing-runlock-on-rwmutex.missing-runlock-on-rwmutex - message: Missing `RUnlock` on an `RWMutex` lock before returning from a function + message: Missing `RUnlock` on an `RWMutex` (`$T` variable) lock before returning + from a function languages: - go severity: ERROR @@ -328,13 +213,17 @@ rules: semgrep.dev: rule: rule_id: 8GUzNK - version_id: BjTDX4z - url: https://semgrep.dev/playground/r/BjTDX4z/trailofbits.go.missing-runlock-on-rwmutex.missing-runlock-on-rwmutex + version_id: YDTAKBW + url: https://semgrep.dev/playground/r/YDTAKBW/trailofbits.go.missing-runlock-on-rwmutex.missing-runlock-on-rwmutex origin: community patterns: - pattern-either: - pattern: panic(...) - pattern: return ... + - metavariable-pattern: + metavariable: "$T" + patterns: + - pattern: "($T : sync.RWMutex)\n" - pattern-inside: | $T.RLock() ... @@ -344,6 +233,13 @@ rules: - pattern-not-inside: | defer $T.RUnlock() ... + - pattern-not-inside: | + defer func(...) { + ... + $T.RUnlock() + ... + }(...) + ... - pattern-not-inside: "$FOO(..., ..., func(...) { \n ... \n})\n" - pattern-not-inside: | return func(...) { @@ -352,8 +248,8 @@ rules: ... } - id: trailofbits.go.missing-unlock-before-return.missing-unlock-before-return - message: Missing mutex unlock before returning from a function. This could result - in panics resulting from double lock operations + message: Missing mutex unlock (`$T` variable) before returning from a function. This + could result in panics resulting from double lock operations languages: - go severity: ERROR @@ -379,13 +275,17 @@ rules: semgrep.dev: rule: rule_id: L1U5Gz - version_id: DkT96BG - url: https://semgrep.dev/playground/r/DkT96BG/trailofbits.go.missing-unlock-before-return.missing-unlock-before-return + version_id: 6xTEwrB + url: https://semgrep.dev/playground/r/6xTEwrB/trailofbits.go.missing-unlock-before-return.missing-unlock-before-return origin: community patterns: - pattern-either: - pattern: panic(...) - pattern: return ... + - metavariable-pattern: + metavariable: "$T" + patterns: + - pattern: "($T : sync.Mutex)\n" - pattern-inside: | $T.Lock() ... @@ -395,6 +295,13 @@ rules: - pattern-not-inside: | defer $T.Unlock() ... + - pattern-not-inside: | + defer func(...) { + ... + $T.Unlock() + ... + }(...) + ... - pattern-not-inside: "$FOO(..., ..., func(...) { \n ... \n})\n" - pattern-not-inside: | return func(...) { @@ -1312,8 +1219,8 @@ rules: semgrep.dev: rule: rule_id: 0oUrdJ - version_id: X0TQZZl - url: https://semgrep.dev/playground/r/X0TQZZl/trailofbits.python.lxml-in-pandas.lxml-in-pandas + version_id: o5T2pO6 + url: https://semgrep.dev/playground/r/o5T2pO6/trailofbits.python.lxml-in-pandas.lxml-in-pandas origin: community pattern-either: - patterns: @@ -1339,6 +1246,149 @@ rules: - pattern: 'pandas.read_html(**$KWARGS) ' +- id: trailofbits.python.msgpack-numpy.msgpack-numpy + message: Found usage of msgpack-numpy unpacking, which relies on pickle to deserialize + numpy arrays containing objects. Functions reliant on pickle can result in arbitrary + code execution. Consider switching to a safer serialization method. + languages: + - python + severity: ERROR + metadata: + category: security + cwe: 'CWE-502: Deserialization of Untrusted Data' + subcategory: + - vuln + confidence: MEDIUM + likelihood: MEDIUM + impact: HIGH + technology: + - numpy + description: Potential arbitrary code execution from functions reliant on pickling + references: + - https://blog.trailofbits.com/2021/03/15/never-a-dill-moment-exploiting-machine-learning-pickle-files/ + license: AGPL-3.0 license + vulnerability_class: + - 'Insecure Deserialization ' + source: https://semgrep.dev/r/trailofbits.python.msgpack-numpy.msgpack-numpy + shortlink: https://sg.run/r6pr1 + semgrep.dev: + rule: + rule_id: GdUvWBy + version_id: zyTn0O7 + url: https://semgrep.dev/playground/r/zyTn0O7/trailofbits.python.msgpack-numpy.msgpack-numpy + origin: community + pattern-either: + - patterns: + - pattern: msgpack.$FN(...) + - metavariable-regex: + metavariable: "$FN" + regex: "(loads?|dumps?|packb?|unpackb?)" + - pattern-inside: | + msgpack_numpy.patch() + ... + - patterns: + - pattern: msgpack.$FN(..., object_hook=msgpack_numpy.decode, ...) + - metavariable-regex: + metavariable: "$FN" + regex: unpackb? + - patterns: + - pattern: msgpack.$FN(..., default=msgpack_numpy.encode, ...) + - metavariable-regex: + metavariable: "$FN" + regex: packb? +- id: trailofbits.python.pickles-in-keras-deprecation.pickles-in-keras-deprecation + message: The usage of pickle and hdf5 formats for model files are deprecated in + Keras. The keras.models.load_model function is deprecated as well. Keras is now + embedded in Tensorflow 2 under tensorflow.keras. + languages: + - python + severity: WARNING + metadata: + category: security + cwe: 'CWE-502: Deserialization of Untrusted Data' + subcategory: + - vuln + confidence: MEDIUM + likelihood: MEDIUM + impact: HIGH + technology: + - keras + description: Potential arbitrary code execution from Keras' load_model function + references: + - https://blog.trailofbits.com/2021/03/15/never-a-dill-moment-exploiting-machine-learning-pickle-files/ + license: AGPL-3.0 license + vulnerability_class: + - 'Insecure Deserialization ' + source: https://semgrep.dev/r/trailofbits.python.pickles-in-keras-deprecation.pickles-in-keras-deprecation + shortlink: https://sg.run/NbJRG + semgrep.dev: + rule: + rule_id: AbU9npB + version_id: X0Tg01y + url: https://semgrep.dev/playground/r/X0Tg01y/trailofbits.python.pickles-in-keras-deprecation.pickles-in-keras-deprecation + origin: community + patterns: + - pattern-either: + - pattern: keras.models.load_model(...) + - pattern: tensorflow.keras.models.load_model(...) + - pattern: keras.saving.load_model(...) + - pattern: tensorflow.keras.saving.load_model(...) + - pattern-not: + patterns: + - pattern-either: + - pattern: keras.models.load_model($FILE) + - pattern: tensorflow.keras.models.load_model($FILE) + - pattern: keras.saving.load_model($FILE) + - pattern: tensorflow.keras.saving.load_model($FILE) + - metavariable-regex: + metavariable: "$FILE" + regex: ".*\\.keras" +- id: trailofbits.python.pickles-in-keras.pickles-in-keras + message: |- + Keras' load_model function may result in arbitrary code execution: - It can load vulnerable pickled models - It can load an hdf5 model that contains a lambda layer with arbitrary code + that will be executed every time the model is used (loading, training, eval) + Note: Keras loading with the built-in file format should be safe as long as checks are not disabled. + languages: + - python + severity: ERROR + metadata: + category: security + cwe: 'CWE-502: Deserialization of Untrusted Data' + subcategory: + - vuln + confidence: MEDIUM + likelihood: MEDIUM + impact: HIGH + technology: + - keras + description: Potential arbitrary code execution from Keras' load_model function + references: + - https://blog.trailofbits.com/2021/03/15/never-a-dill-moment-exploiting-machine-learning-pickle-files/ + license: AGPL-3.0 license + vulnerability_class: + - 'Insecure Deserialization ' + source: https://semgrep.dev/r/trailofbits.python.pickles-in-keras.pickles-in-keras + shortlink: https://sg.run/kxK8o + semgrep.dev: + rule: + rule_id: BYUXGv6 + version_id: jQTQ082 + url: https://semgrep.dev/playground/r/jQTQ082/trailofbits.python.pickles-in-keras.pickles-in-keras + origin: community + patterns: + - pattern-either: + - patterns: + - pattern: keras.models.load_model(...) + - pattern-not: keras.models.load_model("...", ...) + - patterns: + - pattern: tensorflow.keras.models.load_model(...) + - pattern-not: tensorflow.keras.models.load_model("...", ...) + - patterns: + - pattern: keras.saving.load_model(...) + - pattern-not: keras.saving.load_model("...", ...) + - patterns: + - pattern: tensorflow.keras.saving.load_model(...) + - pattern-not: tensorflow.keras.saving.load_model("...", ...) - id: trailofbits.python.pickles-in-numpy.pickles-in-numpy message: Functions reliant on pickle can result in arbitrary code execution. Consider using fickling or switching to a safer serialization method @@ -1495,8 +1545,8 @@ rules: semgrep.dev: rule: rule_id: JDU6WD - version_id: xyTKWWO - url: https://semgrep.dev/playground/r/xyTKWWO/trailofbits.python.pickles-in-pytorch.pickles-in-pytorch + version_id: 1QT5wrl + url: https://semgrep.dev/playground/r/1QT5wrl/trailofbits.python.pickles-in-pytorch.pickles-in-pytorch origin: community patterns: - pattern-either: @@ -1505,7 +1555,45 @@ rules: - pattern-not: torch.load("...") - pattern-not: torch.save(..., "...") - pattern-not: torch.save($M.state_dict(), ...) - - pattern-not-inside: "$M.load_state_dict(torch.load(...))" + - pattern-not-inside: "$M.load_state_dict(...)" + - pattern-not: + patterns: + - pattern: torch.save($STATE_DICT, ...) + - pattern-inside: | + $STATE_DICT = $M.state_dict() + ... +- id: trailofbits.python.pickles-in-tensorflow.pickles-in-tensorflow + message: Tensorflow's low-level load function may result in arbitrary code execution. + languages: + - python + severity: ERROR + metadata: + category: security + cwe: 'CWE-502: Deserialization of Untrusted Data' + subcategory: + - vuln + confidence: MEDIUM + likelihood: MEDIUM + impact: HIGH + technology: + - keras + description: Potential arbitrary code execution from tensorflow's load function + references: + - https://blog.trailofbits.com/2021/03/15/never-a-dill-moment-exploiting-machine-learning-pickle-files/ + license: AGPL-3.0 license + vulnerability_class: + - 'Insecure Deserialization ' + source: https://semgrep.dev/r/trailofbits.python.pickles-in-tensorflow.pickles-in-tensorflow + shortlink: https://sg.run/wd5jn + semgrep.dev: + rule: + rule_id: DbU6e7r + version_id: 9lTZ9vg + url: https://semgrep.dev/playground/r/9lTZ9vg/trailofbits.python.pickles-in-tensorflow.pickles-in-tensorflow + origin: community + patterns: + - pattern: tensorflow.saved_model.load(...) + - pattern-not: tensorflow.saved_model.load("...", ...) - id: trailofbits.python.scikit-joblib-load.scikit-joblib-load message: Scikit `joblib` uses pickle under the hood. Functions reliant on pickle can result in arbitrary code execution. Consider using `skops` instead.