From 4656d95702e57e8a46d61b4445360e9043d52e93 Mon Sep 17 00:00:00 2001 From: salmanrajz Date: Tue, 31 Mar 2026 19:51:51 +0400 Subject: [PATCH 1/6] fix: handle UnicodeDecodeError on usernames with special characters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #2730. Usernames containing non-ASCII characters (e.g. 'Émile') can trigger a UnicodeDecodeError inside the requests library during redirect handling. This exception is not a subclass of requests.exceptions.RequestException, so it escaped all existing except blocks in get_response() and crashed the program. Added a catch for UnicodeError (parent of both UnicodeDecodeError and UnicodeEncodeError) so these sites are gracefully skipped instead of crashing the entire scan. Added regression tests in tests/test_unicode.py. --- sherlock_project/sherlock.py | 3 +++ tests/test_unicode.py | 47 ++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100644 tests/test_unicode.py diff --git a/sherlock_project/sherlock.py b/sherlock_project/sherlock.py index f78d4b8cac..ab7d993778 100644 --- a/sherlock_project/sherlock.py +++ b/sherlock_project/sherlock.py @@ -136,6 +136,9 @@ def get_response(request_future, error_type, social_network): except requests.exceptions.RequestException as err: error_context = "Unknown Error" exception_text = str(err) + except UnicodeError as err: + error_context = "Encoding Error" + exception_text = str(err) return response, error_context, exception_text diff --git a/tests/test_unicode.py b/tests/test_unicode.py new file mode 100644 index 0000000000..fa6e3a3038 --- /dev/null +++ b/tests/test_unicode.py @@ -0,0 +1,47 @@ +"""Tests for handling usernames with special/unicode characters.""" + +from concurrent.futures import Future + +from sherlock_project.sherlock import get_response + + +def _make_future_with_exception(exc): + """Create a Future that raises the given exception.""" + future = Future() + future.set_exception(exc) + return future + + +def test_get_response_handles_unicode_decode_error(): + """Regression test for issue #2730. + + Usernames with special characters (e.g. 'Émile') can trigger a + UnicodeDecodeError inside the requests library during redirect + handling. This must not crash the program. + """ + future = _make_future_with_exception( + UnicodeDecodeError("utf-8", b"\xe9", 0, 1, "invalid continuation byte") + ) + response, error_context, exception_text = get_response( + request_future=future, + error_type=["status_code"], + social_network="TestSite", + ) + assert response is None + assert error_context == "Encoding Error" + assert "utf-8" in exception_text + + +def test_get_response_handles_unicode_encode_error(): + """UnicodeEncodeError should also be caught (subclass of UnicodeError).""" + future = _make_future_with_exception( + UnicodeEncodeError("ascii", "É", 0, 1, "ordinal not in range(128)") + ) + response, error_context, exception_text = get_response( + request_future=future, + error_type=["status_code"], + social_network="TestSite", + ) + assert response is None + assert error_context == "Encoding Error" + assert "ascii" in exception_text From 32fde9bfc634ca2547b82cfa79c27bf1af1b3150 Mon Sep 17 00:00:00 2001 From: salmanrajz Date: Tue, 31 Mar 2026 20:11:55 +0400 Subject: [PATCH 2/6] fix: update NSFW tests to use sites not in exclusions list Pornhub was added to the remote false_positive_exclusions.txt, causing test_remove_nsfw and test_nsfw_explicit_selection to fail since the site gets filtered out before the test runs. Replaced with Xvideos and Erome which are NSFW-flagged but not excluded. --- tests/test_ux.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_ux.py b/tests/test_ux.py index 3c62463b50..1feaf88a19 100644 --- a/tests/test_ux.py +++ b/tests/test_ux.py @@ -4,7 +4,7 @@ from sherlock_interactives import InteractivesSubprocessError def test_remove_nsfw(sites_obj): - nsfw_target: str = 'Pornhub' + nsfw_target: str = 'Xvideos' assert nsfw_target in {site.name: site.information for site in sites_obj} sites_obj.remove_nsfw_sites() assert nsfw_target not in {site.name: site.information for site in sites_obj} @@ -12,8 +12,8 @@ def test_remove_nsfw(sites_obj): # Parametrized sites should *not* include Motherless, which is acting as the control @pytest.mark.parametrize('nsfwsites', [ - ['Pornhub'], - ['Pornhub', 'Xvideos'], + ['Xvideos'], + ['Xvideos', 'Erome'], ]) def test_nsfw_explicit_selection(sites_obj, nsfwsites): for site in nsfwsites: From d731f715bfc46bb24314a014e1d3c8b4a4781727 Mon Sep 17 00:00:00 2001 From: QuanNguyen Date: Sun, 26 Apr 2026 15:44:27 +0200 Subject: [PATCH 3/6] Fix Cracked Forum false positives Made-with: Cursor --- sherlock_project/resources/data.json | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 47c39438de..4a70564f2e 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -602,10 +602,9 @@ "username_claimed": "blue" }, "Cracked Forum": { - "errorMsg": "The member you specified is either invalid or doesn't exist", - "errorType": "message", - "url": "https://cracked.sh/{}", - "urlMain": "https://cracked.sh/", + "errorType": "status_code", + "url": "https://cracked.ax/{}", + "urlMain": "https://cracked.ax/", "username_claimed": "Blue" }, "Credly": { From a9960ff9a404896ed05313001f70b18ab7871f0b Mon Sep 17 00:00:00 2001 From: QuanNguyen Date: Sun, 26 Apr 2026 16:00:27 +0200 Subject: [PATCH 4/6] Fix akniga false negatives Made-with: Cursor --- sherlock_project/resources/data.json | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 47c39438de..70c8ab9649 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -2827,8 +2827,10 @@ }, "akniga": { "errorType": "status_code", + "errorCode": 404, + "request_method": "GET", "url": "https://akniga.org/profile/{}", - "urlMain": "https://akniga.org/profile/blue/", + "urlMain": "https://akniga.org/", "username_claimed": "blue" }, "authorSTREAM": { From 2e2248a8a695eb147f3ea82d759eb69bb2f6785d Mon Sep 17 00:00:00 2001 From: Mohamed Solaiman Date: Tue, 28 Apr 2026 17:01:37 +0000 Subject: [PATCH 5/6] fix: resolve false positives for ArtStation, GeeksforGeeks, and LushStories - ArtStation: Add urlProbe using the JSON API endpoint (https://www.artstation.com/users/{}.json) which returns proper 404 for non-existing users, instead of the main page which returns 200 for both existing and non-existing profiles. Closes #2714 - GeeksforGeeks: Switch from status_code to message detection. Both existing and non-existing profiles return HTTP 200, but non-existing profiles have "false" in the page title. Closes #2782 - LushStories: Switch from status_code to response_url detection. Non-existing profiles redirect (302) to /login while existing profiles return 200. Closes #2371 --- sherlock_project/resources/data.json | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 47c39438de..6f38a83686 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -159,6 +159,7 @@ "errorType": "status_code", "url": "https://www.artstation.com/{}", "urlMain": "https://www.artstation.com/", + "urlProbe": "https://www.artstation.com/users/{}.json", "username_claimed": "Blue" }, "Asciinema": { @@ -952,7 +953,8 @@ "username_claimed": "blue" }, "GeeksforGeeks": { - "errorType": "status_code", + "errorMsg": "false | GeeksforGeeks Profile", + "errorType": "message", "url": "https://auth.geeksforgeeks.org/user/{}", "urlMain": "https://www.geeksforgeeks.org/", "username_claimed": "adam" @@ -1526,7 +1528,8 @@ "username_claimed": "lottiefiles" }, "LushStories": { - "errorType": "status_code", + "errorType": "response_url", + "errorUrl": "https://www.lushstories.com/login", "isNSFW": true, "url": "https://www.lushstories.com/profile/{}", "urlMain": "https://www.lushstories.com/", From dca64e35d36217335fb45373cde4891226aae182 Mon Sep 17 00:00:00 2001 From: Mohamed Solaiman Date: Tue, 28 Apr 2026 17:03:23 +0000 Subject: [PATCH 6/6] feat: add Carrd, SpaceHey, and Substack as supported sites - Carrd: Simple website builder with profiles at {username}.carrd.co. Uses status_code detection (404 for non-existing profiles). - SpaceHey: Retro social network inspired by MySpace. Uses message detection ("Not Found (Error 404) | SpaceHey" title for non-existing profiles). - Substack: Newsletter/publishing platform with profiles at {username}.substack.com. Uses status_code detection (404 for non-existing publications). --- sherlock_project/resources/data.json | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 47c39438de..f71b6455b7 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -404,6 +404,13 @@ "urlMain": "https://carbonmade.com/", "username_claimed": "jenny" }, + "Carrd": { + "errorType": "status_code", + "regexCheck": "^[a-zA-Z0-9_-]{3,50}$", + "url": "https://{}.carrd.co/", + "urlMain": "https://carrd.co/", + "username_claimed": "blue" + }, "Career.habr": { "errorMsg": "

\u041e\u0448\u0438\u0431\u043a\u0430 404

", "errorType": "message", @@ -2279,6 +2286,13 @@ "urlMain": "https://sourceforge.net/", "username_claimed": "blue" }, + "SpaceHey": { + "errorType": "message", + "errorMsg": "Not Found (Error 404) | SpaceHey", + "url": "https://spacehey.com/{}", + "urlMain": "https://spacehey.com/", + "username_claimed": "blue" + }, "SoylentNews": { "errorMsg": "The user you requested does not exist, no matter how much you wish this might be the case.", "errorType": "message", @@ -2376,6 +2390,13 @@ "urlMain": "https://www.strava.com/", "username_claimed": "blue" }, + "Substack": { + "errorType": "status_code", + "regexCheck": "^[a-zA-Z0-9][a-zA-Z0-9_-]{1,60}$", + "url": "https://{}.substack.com/", + "urlMain": "https://substack.com/", + "username_claimed": "green" + }, "SublimeForum": { "errorType": "status_code", "url": "https://forum.sublimetext.com/u/{}",