From 5f4bc4197bd8f3862a0907632cdfddfda114f50c Mon Sep 17 00:00:00 2001 From: Joe Farebrother Date: Tue, 7 May 2024 19:06:02 +0100 Subject: [PATCH 1/5] Add private category to sensitive data heuristics --- .../internal/SensitiveDataHeuristics.qll | 43 ++++++++++++++++++- .../internal/SensitiveDataHeuristics.qll | 43 ++++++++++++++++++- .../internal/SensitiveDataHeuristics.qll | 43 ++++++++++++++++++- .../internal/SensitiveDataHeuristics.qll | 43 ++++++++++++++++++- 4 files changed, 168 insertions(+), 4 deletions(-) diff --git a/javascript/ql/lib/semmle/javascript/security/internal/SensitiveDataHeuristics.qll b/javascript/ql/lib/semmle/javascript/security/internal/SensitiveDataHeuristics.qll index 0778aa4d23e2..b7f9b06c9c20 100644 --- a/javascript/ql/lib/semmle/javascript/security/internal/SensitiveDataHeuristics.qll +++ b/javascript/ql/lib/semmle/javascript/security/internal/SensitiveDataHeuristics.qll @@ -14,13 +14,14 @@ * - id: a user name or other account information; * - password: a password or authorization key; * - certificate: a certificate. + * - private: private data such as credit card numbers * * While classifications are represented as strings, this should not be relied upon. * Instead, use the predicates in `SensitiveDataClassification::` to work with * classifications. */ class SensitiveDataClassification extends string { - SensitiveDataClassification() { this in ["secret", "id", "password", "certificate"] } + SensitiveDataClassification() { this in ["secret", "id", "password", "certificate", "private"] } } /** @@ -38,6 +39,9 @@ module SensitiveDataClassification { /** Gets the classification for certificates. */ SensitiveDataClassification certificate() { result = "certificate" } + + /** Gets the classification for private data. */ + SensitiveDataClassification private() { result = "private" } } /** @@ -77,6 +81,40 @@ module HeuristicNames { */ string maybeCertificate() { result = "(?is).*(cert)(?!.*(format|name|ification)).*" } + /** + * Gets a regular expression that identifies strings that may indicate the presence of + * private data. + */ + string maybePrivate() { + result = + "(?is).*(" + + // Inspired by the list on https://cwe.mitre.org/data/definitions/359.html + // Government identifiers, such as Social Security Numbers + "social.?security|employer.?identification|national.?insurance|resident.?id|" + + "passport.?(num|no)|([_-]|\\b)ssn([_-]|\\b)|" + + // Contact information, such as home addresses + "post.?code|zip.?code|home.?addr|" + + // and telephone numbers + "(mob(ile)?|home).?(num|no|tel|phone)|(tel|fax|phone).?(num|no)|telephone|" + + "emergency.?contact|" + + // Geographic location - where the user is (or was) + "l(atitude|ongitude)|nationality|" + + // Financial data - such as credit card numbers, salary, bank accounts, and debts + "(credit|debit|bank|visa).?(card|num|no|acc(ou?)nt)|acc(ou)?nt.?(no|num|credit)|" + + "salary|billing|credit.?(rating|score)|([_-]|\\b)ccn([_-]|\\b)" + + // Communications - e-mail addresses, private e-mail messages, SMS text messages, chat logs, etc. + // "e(mail|_mail)|" + // this seems too noisy + // Health - medical conditions, insurance status, prescription records + "birth.?da(te|y)|da(te|y).?(of.?)?birth|" + + "medical|(health|care).?plan|healthkit|appointment|prescription|" + + "blood.?(type|alcohol|glucose|pressure)|heart.?(rate|rhythm)|body.?(mass|fat)|" + + "menstrua|pregnan|insulin|inhaler|" + + // Relationships - work and family + "employ(er|ee)|spouse|maiden.?name" + + // --- + ").*" + } + /** * Gets a regular expression that identifies strings that may indicate the presence * of sensitive data, with `classification` describing the kind of sensitive data involved. @@ -90,6 +128,9 @@ module HeuristicNames { or result = maybeCertificate() and classification = SensitiveDataClassification::certificate() + or + result = maybePrivate() and + classification = SensitiveDataClassification::private() } /** diff --git a/python/ql/lib/semmle/python/security/internal/SensitiveDataHeuristics.qll b/python/ql/lib/semmle/python/security/internal/SensitiveDataHeuristics.qll index 0778aa4d23e2..b7f9b06c9c20 100644 --- a/python/ql/lib/semmle/python/security/internal/SensitiveDataHeuristics.qll +++ b/python/ql/lib/semmle/python/security/internal/SensitiveDataHeuristics.qll @@ -14,13 +14,14 @@ * - id: a user name or other account information; * - password: a password or authorization key; * - certificate: a certificate. + * - private: private data such as credit card numbers * * While classifications are represented as strings, this should not be relied upon. * Instead, use the predicates in `SensitiveDataClassification::` to work with * classifications. */ class SensitiveDataClassification extends string { - SensitiveDataClassification() { this in ["secret", "id", "password", "certificate"] } + SensitiveDataClassification() { this in ["secret", "id", "password", "certificate", "private"] } } /** @@ -38,6 +39,9 @@ module SensitiveDataClassification { /** Gets the classification for certificates. */ SensitiveDataClassification certificate() { result = "certificate" } + + /** Gets the classification for private data. */ + SensitiveDataClassification private() { result = "private" } } /** @@ -77,6 +81,40 @@ module HeuristicNames { */ string maybeCertificate() { result = "(?is).*(cert)(?!.*(format|name|ification)).*" } + /** + * Gets a regular expression that identifies strings that may indicate the presence of + * private data. + */ + string maybePrivate() { + result = + "(?is).*(" + + // Inspired by the list on https://cwe.mitre.org/data/definitions/359.html + // Government identifiers, such as Social Security Numbers + "social.?security|employer.?identification|national.?insurance|resident.?id|" + + "passport.?(num|no)|([_-]|\\b)ssn([_-]|\\b)|" + + // Contact information, such as home addresses + "post.?code|zip.?code|home.?addr|" + + // and telephone numbers + "(mob(ile)?|home).?(num|no|tel|phone)|(tel|fax|phone).?(num|no)|telephone|" + + "emergency.?contact|" + + // Geographic location - where the user is (or was) + "l(atitude|ongitude)|nationality|" + + // Financial data - such as credit card numbers, salary, bank accounts, and debts + "(credit|debit|bank|visa).?(card|num|no|acc(ou?)nt)|acc(ou)?nt.?(no|num|credit)|" + + "salary|billing|credit.?(rating|score)|([_-]|\\b)ccn([_-]|\\b)" + + // Communications - e-mail addresses, private e-mail messages, SMS text messages, chat logs, etc. + // "e(mail|_mail)|" + // this seems too noisy + // Health - medical conditions, insurance status, prescription records + "birth.?da(te|y)|da(te|y).?(of.?)?birth|" + + "medical|(health|care).?plan|healthkit|appointment|prescription|" + + "blood.?(type|alcohol|glucose|pressure)|heart.?(rate|rhythm)|body.?(mass|fat)|" + + "menstrua|pregnan|insulin|inhaler|" + + // Relationships - work and family + "employ(er|ee)|spouse|maiden.?name" + + // --- + ").*" + } + /** * Gets a regular expression that identifies strings that may indicate the presence * of sensitive data, with `classification` describing the kind of sensitive data involved. @@ -90,6 +128,9 @@ module HeuristicNames { or result = maybeCertificate() and classification = SensitiveDataClassification::certificate() + or + result = maybePrivate() and + classification = SensitiveDataClassification::private() } /** diff --git a/ruby/ql/lib/codeql/ruby/security/internal/SensitiveDataHeuristics.qll b/ruby/ql/lib/codeql/ruby/security/internal/SensitiveDataHeuristics.qll index 0778aa4d23e2..b7f9b06c9c20 100644 --- a/ruby/ql/lib/codeql/ruby/security/internal/SensitiveDataHeuristics.qll +++ b/ruby/ql/lib/codeql/ruby/security/internal/SensitiveDataHeuristics.qll @@ -14,13 +14,14 @@ * - id: a user name or other account information; * - password: a password or authorization key; * - certificate: a certificate. + * - private: private data such as credit card numbers * * While classifications are represented as strings, this should not be relied upon. * Instead, use the predicates in `SensitiveDataClassification::` to work with * classifications. */ class SensitiveDataClassification extends string { - SensitiveDataClassification() { this in ["secret", "id", "password", "certificate"] } + SensitiveDataClassification() { this in ["secret", "id", "password", "certificate", "private"] } } /** @@ -38,6 +39,9 @@ module SensitiveDataClassification { /** Gets the classification for certificates. */ SensitiveDataClassification certificate() { result = "certificate" } + + /** Gets the classification for private data. */ + SensitiveDataClassification private() { result = "private" } } /** @@ -77,6 +81,40 @@ module HeuristicNames { */ string maybeCertificate() { result = "(?is).*(cert)(?!.*(format|name|ification)).*" } + /** + * Gets a regular expression that identifies strings that may indicate the presence of + * private data. + */ + string maybePrivate() { + result = + "(?is).*(" + + // Inspired by the list on https://cwe.mitre.org/data/definitions/359.html + // Government identifiers, such as Social Security Numbers + "social.?security|employer.?identification|national.?insurance|resident.?id|" + + "passport.?(num|no)|([_-]|\\b)ssn([_-]|\\b)|" + + // Contact information, such as home addresses + "post.?code|zip.?code|home.?addr|" + + // and telephone numbers + "(mob(ile)?|home).?(num|no|tel|phone)|(tel|fax|phone).?(num|no)|telephone|" + + "emergency.?contact|" + + // Geographic location - where the user is (or was) + "l(atitude|ongitude)|nationality|" + + // Financial data - such as credit card numbers, salary, bank accounts, and debts + "(credit|debit|bank|visa).?(card|num|no|acc(ou?)nt)|acc(ou)?nt.?(no|num|credit)|" + + "salary|billing|credit.?(rating|score)|([_-]|\\b)ccn([_-]|\\b)" + + // Communications - e-mail addresses, private e-mail messages, SMS text messages, chat logs, etc. + // "e(mail|_mail)|" + // this seems too noisy + // Health - medical conditions, insurance status, prescription records + "birth.?da(te|y)|da(te|y).?(of.?)?birth|" + + "medical|(health|care).?plan|healthkit|appointment|prescription|" + + "blood.?(type|alcohol|glucose|pressure)|heart.?(rate|rhythm)|body.?(mass|fat)|" + + "menstrua|pregnan|insulin|inhaler|" + + // Relationships - work and family + "employ(er|ee)|spouse|maiden.?name" + + // --- + ").*" + } + /** * Gets a regular expression that identifies strings that may indicate the presence * of sensitive data, with `classification` describing the kind of sensitive data involved. @@ -90,6 +128,9 @@ module HeuristicNames { or result = maybeCertificate() and classification = SensitiveDataClassification::certificate() + or + result = maybePrivate() and + classification = SensitiveDataClassification::private() } /** diff --git a/swift/ql/lib/codeql/swift/security/internal/SensitiveDataHeuristics.qll b/swift/ql/lib/codeql/swift/security/internal/SensitiveDataHeuristics.qll index 0778aa4d23e2..b7f9b06c9c20 100644 --- a/swift/ql/lib/codeql/swift/security/internal/SensitiveDataHeuristics.qll +++ b/swift/ql/lib/codeql/swift/security/internal/SensitiveDataHeuristics.qll @@ -14,13 +14,14 @@ * - id: a user name or other account information; * - password: a password or authorization key; * - certificate: a certificate. + * - private: private data such as credit card numbers * * While classifications are represented as strings, this should not be relied upon. * Instead, use the predicates in `SensitiveDataClassification::` to work with * classifications. */ class SensitiveDataClassification extends string { - SensitiveDataClassification() { this in ["secret", "id", "password", "certificate"] } + SensitiveDataClassification() { this in ["secret", "id", "password", "certificate", "private"] } } /** @@ -38,6 +39,9 @@ module SensitiveDataClassification { /** Gets the classification for certificates. */ SensitiveDataClassification certificate() { result = "certificate" } + + /** Gets the classification for private data. */ + SensitiveDataClassification private() { result = "private" } } /** @@ -77,6 +81,40 @@ module HeuristicNames { */ string maybeCertificate() { result = "(?is).*(cert)(?!.*(format|name|ification)).*" } + /** + * Gets a regular expression that identifies strings that may indicate the presence of + * private data. + */ + string maybePrivate() { + result = + "(?is).*(" + + // Inspired by the list on https://cwe.mitre.org/data/definitions/359.html + // Government identifiers, such as Social Security Numbers + "social.?security|employer.?identification|national.?insurance|resident.?id|" + + "passport.?(num|no)|([_-]|\\b)ssn([_-]|\\b)|" + + // Contact information, such as home addresses + "post.?code|zip.?code|home.?addr|" + + // and telephone numbers + "(mob(ile)?|home).?(num|no|tel|phone)|(tel|fax|phone).?(num|no)|telephone|" + + "emergency.?contact|" + + // Geographic location - where the user is (or was) + "l(atitude|ongitude)|nationality|" + + // Financial data - such as credit card numbers, salary, bank accounts, and debts + "(credit|debit|bank|visa).?(card|num|no|acc(ou?)nt)|acc(ou)?nt.?(no|num|credit)|" + + "salary|billing|credit.?(rating|score)|([_-]|\\b)ccn([_-]|\\b)" + + // Communications - e-mail addresses, private e-mail messages, SMS text messages, chat logs, etc. + // "e(mail|_mail)|" + // this seems too noisy + // Health - medical conditions, insurance status, prescription records + "birth.?da(te|y)|da(te|y).?(of.?)?birth|" + + "medical|(health|care).?plan|healthkit|appointment|prescription|" + + "blood.?(type|alcohol|glucose|pressure)|heart.?(rate|rhythm)|body.?(mass|fat)|" + + "menstrua|pregnan|insulin|inhaler|" + + // Relationships - work and family + "employ(er|ee)|spouse|maiden.?name" + + // --- + ").*" + } + /** * Gets a regular expression that identifies strings that may indicate the presence * of sensitive data, with `classification` describing the kind of sensitive data involved. @@ -90,6 +128,9 @@ module HeuristicNames { or result = maybeCertificate() and classification = SensitiveDataClassification::certificate() + or + result = maybePrivate() and + classification = SensitiveDataClassification::private() } /** From 23fbfcee276ed0c33ecc41d028fe883c494b206e Mon Sep 17 00:00:00 2001 From: Joe Farebrother Date: Tue, 7 May 2024 19:36:50 +0100 Subject: [PATCH 2/5] Exclude new private heuristics from swift sensitive credential --- swift/ql/lib/codeql/swift/security/SensitiveExprs.qll | 1 + 1 file changed, 1 insertion(+) diff --git a/swift/ql/lib/codeql/swift/security/SensitiveExprs.qll b/swift/ql/lib/codeql/swift/security/SensitiveExprs.qll index c7c204af9625..0c712b4fbfdf 100644 --- a/swift/ql/lib/codeql/swift/security/SensitiveExprs.qll +++ b/swift/ql/lib/codeql/swift/security/SensitiveExprs.qll @@ -49,6 +49,7 @@ class SensitiveCredential extends SensitiveDataType, TCredential { exists(SensitiveDataClassification classification | not classification = SensitiveDataClassification::password() and // covered by `SensitivePassword` not classification = SensitiveDataClassification::id() and // not accurate enough + not classification = SensitiveDataClassification::private() and // covered by `SensitivePrivateInfo` result = HeuristicNames::maybeSensitiveRegexp(classification) ) or From 9aff22c6644bc694b98e762b739c124196682568 Mon Sep 17 00:00:00 2001 From: Joe Farebrother Date: Thu, 9 May 2024 09:39:03 +0100 Subject: [PATCH 3/5] Fix typos in sensitive data regex --- .../security/internal/SensitiveDataHeuristics.qll | 6 +++--- .../python/security/internal/SensitiveDataHeuristics.qll | 6 +++--- .../ruby/security/internal/SensitiveDataHeuristics.qll | 6 +++--- .../swift/security/internal/SensitiveDataHeuristics.qll | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/javascript/ql/lib/semmle/javascript/security/internal/SensitiveDataHeuristics.qll b/javascript/ql/lib/semmle/javascript/security/internal/SensitiveDataHeuristics.qll index b7f9b06c9c20..eb8a0c1fe756 100644 --- a/javascript/ql/lib/semmle/javascript/security/internal/SensitiveDataHeuristics.qll +++ b/javascript/ql/lib/semmle/javascript/security/internal/SensitiveDataHeuristics.qll @@ -98,10 +98,10 @@ module HeuristicNames { "(mob(ile)?|home).?(num|no|tel|phone)|(tel|fax|phone).?(num|no)|telephone|" + "emergency.?contact|" + // Geographic location - where the user is (or was) - "l(atitude|ongitude)|nationality|" + + "latitude|longitude|nationality|" + // Financial data - such as credit card numbers, salary, bank accounts, and debts - "(credit|debit|bank|visa).?(card|num|no|acc(ou?)nt)|acc(ou)?nt.?(no|num|credit)|" + - "salary|billing|credit.?(rating|score)|([_-]|\\b)ccn([_-]|\\b)" + + "(credit|debit|bank|visa).?(card|num|no|acc(ou)?nt)|acc(ou)?nt.?(no|num|credit)|" + + "salary|billing|credit.?(rating|score)|([_-]|\\b)ccn([_-]|\\b)|" + // Communications - e-mail addresses, private e-mail messages, SMS text messages, chat logs, etc. // "e(mail|_mail)|" + // this seems too noisy // Health - medical conditions, insurance status, prescription records diff --git a/python/ql/lib/semmle/python/security/internal/SensitiveDataHeuristics.qll b/python/ql/lib/semmle/python/security/internal/SensitiveDataHeuristics.qll index b7f9b06c9c20..eb8a0c1fe756 100644 --- a/python/ql/lib/semmle/python/security/internal/SensitiveDataHeuristics.qll +++ b/python/ql/lib/semmle/python/security/internal/SensitiveDataHeuristics.qll @@ -98,10 +98,10 @@ module HeuristicNames { "(mob(ile)?|home).?(num|no|tel|phone)|(tel|fax|phone).?(num|no)|telephone|" + "emergency.?contact|" + // Geographic location - where the user is (or was) - "l(atitude|ongitude)|nationality|" + + "latitude|longitude|nationality|" + // Financial data - such as credit card numbers, salary, bank accounts, and debts - "(credit|debit|bank|visa).?(card|num|no|acc(ou?)nt)|acc(ou)?nt.?(no|num|credit)|" + - "salary|billing|credit.?(rating|score)|([_-]|\\b)ccn([_-]|\\b)" + + "(credit|debit|bank|visa).?(card|num|no|acc(ou)?nt)|acc(ou)?nt.?(no|num|credit)|" + + "salary|billing|credit.?(rating|score)|([_-]|\\b)ccn([_-]|\\b)|" + // Communications - e-mail addresses, private e-mail messages, SMS text messages, chat logs, etc. // "e(mail|_mail)|" + // this seems too noisy // Health - medical conditions, insurance status, prescription records diff --git a/ruby/ql/lib/codeql/ruby/security/internal/SensitiveDataHeuristics.qll b/ruby/ql/lib/codeql/ruby/security/internal/SensitiveDataHeuristics.qll index b7f9b06c9c20..eb8a0c1fe756 100644 --- a/ruby/ql/lib/codeql/ruby/security/internal/SensitiveDataHeuristics.qll +++ b/ruby/ql/lib/codeql/ruby/security/internal/SensitiveDataHeuristics.qll @@ -98,10 +98,10 @@ module HeuristicNames { "(mob(ile)?|home).?(num|no|tel|phone)|(tel|fax|phone).?(num|no)|telephone|" + "emergency.?contact|" + // Geographic location - where the user is (or was) - "l(atitude|ongitude)|nationality|" + + "latitude|longitude|nationality|" + // Financial data - such as credit card numbers, salary, bank accounts, and debts - "(credit|debit|bank|visa).?(card|num|no|acc(ou?)nt)|acc(ou)?nt.?(no|num|credit)|" + - "salary|billing|credit.?(rating|score)|([_-]|\\b)ccn([_-]|\\b)" + + "(credit|debit|bank|visa).?(card|num|no|acc(ou)?nt)|acc(ou)?nt.?(no|num|credit)|" + + "salary|billing|credit.?(rating|score)|([_-]|\\b)ccn([_-]|\\b)|" + // Communications - e-mail addresses, private e-mail messages, SMS text messages, chat logs, etc. // "e(mail|_mail)|" + // this seems too noisy // Health - medical conditions, insurance status, prescription records diff --git a/swift/ql/lib/codeql/swift/security/internal/SensitiveDataHeuristics.qll b/swift/ql/lib/codeql/swift/security/internal/SensitiveDataHeuristics.qll index b7f9b06c9c20..eb8a0c1fe756 100644 --- a/swift/ql/lib/codeql/swift/security/internal/SensitiveDataHeuristics.qll +++ b/swift/ql/lib/codeql/swift/security/internal/SensitiveDataHeuristics.qll @@ -98,10 +98,10 @@ module HeuristicNames { "(mob(ile)?|home).?(num|no|tel|phone)|(tel|fax|phone).?(num|no)|telephone|" + "emergency.?contact|" + // Geographic location - where the user is (or was) - "l(atitude|ongitude)|nationality|" + + "latitude|longitude|nationality|" + // Financial data - such as credit card numbers, salary, bank accounts, and debts - "(credit|debit|bank|visa).?(card|num|no|acc(ou?)nt)|acc(ou)?nt.?(no|num|credit)|" + - "salary|billing|credit.?(rating|score)|([_-]|\\b)ccn([_-]|\\b)" + + "(credit|debit|bank|visa).?(card|num|no|acc(ou)?nt)|acc(ou)?nt.?(no|num|credit)|" + + "salary|billing|credit.?(rating|score)|([_-]|\\b)ccn([_-]|\\b)|" + // Communications - e-mail addresses, private e-mail messages, SMS text messages, chat logs, etc. // "e(mail|_mail)|" + // this seems too noisy // Health - medical conditions, insurance status, prescription records From f1ab3f40f3a748e5dba6c43d923d1a6f17dff27d Mon Sep 17 00:00:00 2001 From: Joe Farebrother Date: Thu, 9 May 2024 09:47:44 +0100 Subject: [PATCH 4/5] Add unit tests --- .../CleartextLogging.expected | 64 +++++++++++++++++-- .../Security/CWE-312-CleartextLogging/test.py | 31 +++++++++ 2 files changed, 89 insertions(+), 6 deletions(-) diff --git a/python/ql/test/query-tests/Security/CWE-312-CleartextLogging/CleartextLogging.expected b/python/ql/test/query-tests/Security/CWE-312-CleartextLogging/CleartextLogging.expected index f4b5ef932048..1322f5b80e65 100644 --- a/python/ql/test/query-tests/Security/CWE-312-CleartextLogging/CleartextLogging.expected +++ b/python/ql/test/query-tests/Security/CWE-312-CleartextLogging/CleartextLogging.expected @@ -7,8 +7,21 @@ edges | test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:19:5:19:12 | ControlFlowNode for password | provenance | | | test.py:44:5:44:5 | ControlFlowNode for x | test.py:45:11:45:11 | ControlFlowNode for x | provenance | | | test.py:44:9:44:25 | ControlFlowNode for Attribute() | test.py:44:5:44:5 | ControlFlowNode for x | provenance | | -| test.py:70:5:70:10 | ControlFlowNode for config | test.py:74:11:74:31 | ControlFlowNode for Subscript | provenance | | -| test.py:72:21:72:37 | ControlFlowNode for Attribute | test.py:70:5:70:10 | ControlFlowNode for config | provenance | | +| test.py:48:14:48:35 | ControlFlowNode for social_security_number | test.py:49:15:49:36 | ControlFlowNode for social_security_number | provenance | | +| test.py:48:38:48:40 | ControlFlowNode for ssn | test.py:50:15:50:17 | ControlFlowNode for ssn | provenance | | +| test.py:48:54:48:63 | ControlFlowNode for passportNo | test.py:52:15:52:24 | ControlFlowNode for passportNo | provenance | | +| test.py:54:34:54:45 | ControlFlowNode for home_address | test.py:57:15:57:26 | ControlFlowNode for home_address | provenance | | +| test.py:59:14:59:26 | ControlFlowNode for user_latitude | test.py:60:15:60:27 | ControlFlowNode for user_latitude | provenance | | +| test.py:59:29:59:42 | ControlFlowNode for user_longitude | test.py:61:15:61:28 | ControlFlowNode for user_longitude | provenance | | +| test.py:63:14:63:26 | ControlFlowNode for mobile_number | test.py:64:15:64:27 | ControlFlowNode for mobile_number | provenance | | +| test.py:63:29:63:35 | ControlFlowNode for phoneNo | test.py:65:15:65:21 | ControlFlowNode for phoneNo | provenance | | +| test.py:67:14:67:23 | ControlFlowNode for creditcard | test.py:68:15:68:24 | ControlFlowNode for creditcard | provenance | | +| test.py:67:26:67:35 | ControlFlowNode for debit_card | test.py:69:15:69:24 | ControlFlowNode for debit_card | provenance | | +| test.py:67:38:67:48 | ControlFlowNode for bank_number | test.py:70:15:70:25 | ControlFlowNode for bank_number | provenance | | +| test.py:67:76:67:78 | ControlFlowNode for ccn | test.py:73:15:73:17 | ControlFlowNode for ccn | provenance | | +| test.py:67:81:67:88 | ControlFlowNode for user_ccn | test.py:74:15:74:22 | ControlFlowNode for user_ccn | provenance | | +| test.py:101:5:101:10 | ControlFlowNode for config | test.py:105:11:105:31 | ControlFlowNode for Subscript | provenance | | +| test.py:103:21:103:37 | ControlFlowNode for Attribute | test.py:101:5:101:10 | ControlFlowNode for config | provenance | | nodes | test.py:19:5:19:12 | ControlFlowNode for password | semmle.label | ControlFlowNode for password | | test.py:19:16:19:29 | ControlFlowNode for get_password() | semmle.label | ControlFlowNode for get_password() | @@ -24,9 +37,35 @@ nodes | test.py:44:5:44:5 | ControlFlowNode for x | semmle.label | ControlFlowNode for x | | test.py:44:9:44:25 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | | test.py:45:11:45:11 | ControlFlowNode for x | semmle.label | ControlFlowNode for x | -| test.py:70:5:70:10 | ControlFlowNode for config | semmle.label | ControlFlowNode for config | -| test.py:72:21:72:37 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| test.py:74:11:74:31 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| test.py:48:14:48:35 | ControlFlowNode for social_security_number | semmle.label | ControlFlowNode for social_security_number | +| test.py:48:38:48:40 | ControlFlowNode for ssn | semmle.label | ControlFlowNode for ssn | +| test.py:48:54:48:63 | ControlFlowNode for passportNo | semmle.label | ControlFlowNode for passportNo | +| test.py:49:15:49:36 | ControlFlowNode for social_security_number | semmle.label | ControlFlowNode for social_security_number | +| test.py:50:15:50:17 | ControlFlowNode for ssn | semmle.label | ControlFlowNode for ssn | +| test.py:52:15:52:24 | ControlFlowNode for passportNo | semmle.label | ControlFlowNode for passportNo | +| test.py:54:34:54:45 | ControlFlowNode for home_address | semmle.label | ControlFlowNode for home_address | +| test.py:57:15:57:26 | ControlFlowNode for home_address | semmle.label | ControlFlowNode for home_address | +| test.py:59:14:59:26 | ControlFlowNode for user_latitude | semmle.label | ControlFlowNode for user_latitude | +| test.py:59:29:59:42 | ControlFlowNode for user_longitude | semmle.label | ControlFlowNode for user_longitude | +| test.py:60:15:60:27 | ControlFlowNode for user_latitude | semmle.label | ControlFlowNode for user_latitude | +| test.py:61:15:61:28 | ControlFlowNode for user_longitude | semmle.label | ControlFlowNode for user_longitude | +| test.py:63:14:63:26 | ControlFlowNode for mobile_number | semmle.label | ControlFlowNode for mobile_number | +| test.py:63:29:63:35 | ControlFlowNode for phoneNo | semmle.label | ControlFlowNode for phoneNo | +| test.py:64:15:64:27 | ControlFlowNode for mobile_number | semmle.label | ControlFlowNode for mobile_number | +| test.py:65:15:65:21 | ControlFlowNode for phoneNo | semmle.label | ControlFlowNode for phoneNo | +| test.py:67:14:67:23 | ControlFlowNode for creditcard | semmle.label | ControlFlowNode for creditcard | +| test.py:67:26:67:35 | ControlFlowNode for debit_card | semmle.label | ControlFlowNode for debit_card | +| test.py:67:38:67:48 | ControlFlowNode for bank_number | semmle.label | ControlFlowNode for bank_number | +| test.py:67:76:67:78 | ControlFlowNode for ccn | semmle.label | ControlFlowNode for ccn | +| test.py:67:81:67:88 | ControlFlowNode for user_ccn | semmle.label | ControlFlowNode for user_ccn | +| test.py:68:15:68:24 | ControlFlowNode for creditcard | semmle.label | ControlFlowNode for creditcard | +| test.py:69:15:69:24 | ControlFlowNode for debit_card | semmle.label | ControlFlowNode for debit_card | +| test.py:70:15:70:25 | ControlFlowNode for bank_number | semmle.label | ControlFlowNode for bank_number | +| test.py:73:15:73:17 | ControlFlowNode for ccn | semmle.label | ControlFlowNode for ccn | +| test.py:74:15:74:22 | ControlFlowNode for user_ccn | semmle.label | ControlFlowNode for user_ccn | +| test.py:101:5:101:10 | ControlFlowNode for config | semmle.label | ControlFlowNode for config | +| test.py:103:21:103:37 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| test.py:105:11:105:31 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | subpaths #select | test.py:20:48:20:55 | ControlFlowNode for password | test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:20:48:20:55 | ControlFlowNode for password | This expression logs $@ as clear text. | test.py:19:16:19:29 | ControlFlowNode for get_password() | sensitive data (password) | @@ -39,4 +78,17 @@ subpaths | test.py:39:22:39:35 | ControlFlowNode for get_password() | test.py:39:22:39:35 | ControlFlowNode for get_password() | test.py:39:22:39:35 | ControlFlowNode for get_password() | This expression logs $@ as clear text. | test.py:39:22:39:35 | ControlFlowNode for get_password() | sensitive data (password) | | test.py:40:22:40:35 | ControlFlowNode for get_password() | test.py:40:22:40:35 | ControlFlowNode for get_password() | test.py:40:22:40:35 | ControlFlowNode for get_password() | This expression logs $@ as clear text. | test.py:40:22:40:35 | ControlFlowNode for get_password() | sensitive data (password) | | test.py:45:11:45:11 | ControlFlowNode for x | test.py:44:9:44:25 | ControlFlowNode for Attribute() | test.py:45:11:45:11 | ControlFlowNode for x | This expression logs $@ as clear text. | test.py:44:9:44:25 | ControlFlowNode for Attribute() | sensitive data (password) | -| test.py:74:11:74:31 | ControlFlowNode for Subscript | test.py:72:21:72:37 | ControlFlowNode for Attribute | test.py:74:11:74:31 | ControlFlowNode for Subscript | This expression logs $@ as clear text. | test.py:72:21:72:37 | ControlFlowNode for Attribute | sensitive data (password) | +| test.py:49:15:49:36 | ControlFlowNode for social_security_number | test.py:48:14:48:35 | ControlFlowNode for social_security_number | test.py:49:15:49:36 | ControlFlowNode for social_security_number | This expression logs $@ as clear text. | test.py:48:14:48:35 | ControlFlowNode for social_security_number | sensitive data (private) | +| test.py:50:15:50:17 | ControlFlowNode for ssn | test.py:48:38:48:40 | ControlFlowNode for ssn | test.py:50:15:50:17 | ControlFlowNode for ssn | This expression logs $@ as clear text. | test.py:48:38:48:40 | ControlFlowNode for ssn | sensitive data (private) | +| test.py:52:15:52:24 | ControlFlowNode for passportNo | test.py:48:54:48:63 | ControlFlowNode for passportNo | test.py:52:15:52:24 | ControlFlowNode for passportNo | This expression logs $@ as clear text. | test.py:48:54:48:63 | ControlFlowNode for passportNo | sensitive data (private) | +| test.py:57:15:57:26 | ControlFlowNode for home_address | test.py:54:34:54:45 | ControlFlowNode for home_address | test.py:57:15:57:26 | ControlFlowNode for home_address | This expression logs $@ as clear text. | test.py:54:34:54:45 | ControlFlowNode for home_address | sensitive data (private) | +| test.py:60:15:60:27 | ControlFlowNode for user_latitude | test.py:59:14:59:26 | ControlFlowNode for user_latitude | test.py:60:15:60:27 | ControlFlowNode for user_latitude | This expression logs $@ as clear text. | test.py:59:14:59:26 | ControlFlowNode for user_latitude | sensitive data (private) | +| test.py:61:15:61:28 | ControlFlowNode for user_longitude | test.py:59:29:59:42 | ControlFlowNode for user_longitude | test.py:61:15:61:28 | ControlFlowNode for user_longitude | This expression logs $@ as clear text. | test.py:59:29:59:42 | ControlFlowNode for user_longitude | sensitive data (private) | +| test.py:64:15:64:27 | ControlFlowNode for mobile_number | test.py:63:14:63:26 | ControlFlowNode for mobile_number | test.py:64:15:64:27 | ControlFlowNode for mobile_number | This expression logs $@ as clear text. | test.py:63:14:63:26 | ControlFlowNode for mobile_number | sensitive data (private) | +| test.py:65:15:65:21 | ControlFlowNode for phoneNo | test.py:63:29:63:35 | ControlFlowNode for phoneNo | test.py:65:15:65:21 | ControlFlowNode for phoneNo | This expression logs $@ as clear text. | test.py:63:29:63:35 | ControlFlowNode for phoneNo | sensitive data (private) | +| test.py:68:15:68:24 | ControlFlowNode for creditcard | test.py:67:14:67:23 | ControlFlowNode for creditcard | test.py:68:15:68:24 | ControlFlowNode for creditcard | This expression logs $@ as clear text. | test.py:67:14:67:23 | ControlFlowNode for creditcard | sensitive data (private) | +| test.py:69:15:69:24 | ControlFlowNode for debit_card | test.py:67:26:67:35 | ControlFlowNode for debit_card | test.py:69:15:69:24 | ControlFlowNode for debit_card | This expression logs $@ as clear text. | test.py:67:26:67:35 | ControlFlowNode for debit_card | sensitive data (private) | +| test.py:70:15:70:25 | ControlFlowNode for bank_number | test.py:67:38:67:48 | ControlFlowNode for bank_number | test.py:70:15:70:25 | ControlFlowNode for bank_number | This expression logs $@ as clear text. | test.py:67:38:67:48 | ControlFlowNode for bank_number | sensitive data (private) | +| test.py:73:15:73:17 | ControlFlowNode for ccn | test.py:67:76:67:78 | ControlFlowNode for ccn | test.py:73:15:73:17 | ControlFlowNode for ccn | This expression logs $@ as clear text. | test.py:67:76:67:78 | ControlFlowNode for ccn | sensitive data (private) | +| test.py:74:15:74:22 | ControlFlowNode for user_ccn | test.py:67:81:67:88 | ControlFlowNode for user_ccn | test.py:74:15:74:22 | ControlFlowNode for user_ccn | This expression logs $@ as clear text. | test.py:67:81:67:88 | ControlFlowNode for user_ccn | sensitive data (private) | +| test.py:105:11:105:31 | ControlFlowNode for Subscript | test.py:103:21:103:37 | ControlFlowNode for Attribute | test.py:105:11:105:31 | ControlFlowNode for Subscript | This expression logs $@ as clear text. | test.py:103:21:103:37 | ControlFlowNode for Attribute | sensitive data (password) | diff --git a/python/ql/test/query-tests/Security/CWE-312-CleartextLogging/test.py b/python/ql/test/query-tests/Security/CWE-312-CleartextLogging/test.py index b5ebe7593bae..94a8c9db836f 100644 --- a/python/ql/test/query-tests/Security/CWE-312-CleartextLogging/test.py +++ b/python/ql/test/query-tests/Security/CWE-312-CleartextLogging/test.py @@ -44,6 +44,37 @@ def print_password(): x = getpass.getpass() print(x) # NOT OK +def log_private(): + def log1(social_security_number, ssn, className, passportNo): + print(social_security_number) # NOT OK + print(ssn) # NOT OK + print(className) # OK + print(passportNo) # NOT OK + + def log2(post_code, zipCode, home_address): + print(post_code) # NOT OK, but NOT FOUND - "code" is treated as enxrypted and thus not sensitive + print(zipCode) # NOT OK, but NOT FOUND - "code" is treated as enxrypted and thus not sensitive + print(home_address) # NOT OK + + def log3(user_latitude, user_longitude): + print(user_latitude) # NOT OK + print(user_longitude) # NOT OK + + def log4(mobile_number, phoneNo): + print(mobile_number) # NOT OK + print(phoneNo) # NOT OK + + def log5(creditcard, debit_card, bank_number, bank_account, accountNo, ccn, user_ccn, succNode): + print(creditcard) # NOT OK + print(debit_card) # NOT OK + print(bank_number) # NOT OK + print(bank_account) # NOT OK, but NOT FOUND - "account" is treated as having the "id" classification and thus excluded. + print(accountNo) # NOT OK, but NOT FOUND - "account" is treated as having the "id" classification and thus excluded. + print(ccn) # NOT OK + print(user_ccn) # NOT OK + print(succNode) # OK + + def FPs(account, account_id): # we assume that any account parameter is sensitive (id/username) From da93a08639398117578965b21021f9fefecc768a Mon Sep 17 00:00:00 2001 From: Joe Farebrother Date: Thu, 9 May 2024 10:03:20 +0100 Subject: [PATCH 5/5] Add change notes No change note is needed for Swift, as the new heuristics are unused and thus should not affect any queries. --- .../ql/lib/change-notes/2024-05-09-sensitive-heuristics.md | 4 ++++ python/ql/lib/change-notes/2024-05-09-sensitive-heuristics.md | 4 ++++ ruby/ql/lib/change-notes/2024-05-09-sensitive-heuristics.md | 4 ++++ 3 files changed, 12 insertions(+) create mode 100644 javascript/ql/lib/change-notes/2024-05-09-sensitive-heuristics.md create mode 100644 python/ql/lib/change-notes/2024-05-09-sensitive-heuristics.md create mode 100644 ruby/ql/lib/change-notes/2024-05-09-sensitive-heuristics.md diff --git a/javascript/ql/lib/change-notes/2024-05-09-sensitive-heuristics.md b/javascript/ql/lib/change-notes/2024-05-09-sensitive-heuristics.md new file mode 100644 index 000000000000..a1b98f691c17 --- /dev/null +++ b/javascript/ql/lib/change-notes/2024-05-09-sensitive-heuristics.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* Additional heuristics for a new sensitive data classification for private information (e.g. credit card numbers) have been added to the shared `SensitiveDataHeuristics.qll` library. This may result in additional results for queries that use sensitive data such as `js/clear-text-storage-sensitive-data` and `js/clear-text-logging`. \ No newline at end of file diff --git a/python/ql/lib/change-notes/2024-05-09-sensitive-heuristics.md b/python/ql/lib/change-notes/2024-05-09-sensitive-heuristics.md new file mode 100644 index 000000000000..e51617a97859 --- /dev/null +++ b/python/ql/lib/change-notes/2024-05-09-sensitive-heuristics.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* Additional heuristics for a new sensitive data classification for private information (e.g. credit card numbers) have been added to the shared `SensitiveDataHeuristics.qll` library. This may result in additional results for queries that use sensitive data such as `py/clear-text-storage-sensitive-data` and `py/clear-text-logging-sensitive-data`. \ No newline at end of file diff --git a/ruby/ql/lib/change-notes/2024-05-09-sensitive-heuristics.md b/ruby/ql/lib/change-notes/2024-05-09-sensitive-heuristics.md new file mode 100644 index 000000000000..c9db56787092 --- /dev/null +++ b/ruby/ql/lib/change-notes/2024-05-09-sensitive-heuristics.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* Additional heuristics for a new sensitive data classification for private information (e.g. credit card numbers) have been added to the shared `SensitiveDataHeuristics.qll` library. This may result in additional results for queries that use sensitive data such as `rb/sensitive-get-query`. \ No newline at end of file