Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 32 additions & 7 deletions sherlock_project/resources/data.json
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@
"errorType": "status_code",
"url": "https://www.artstation.com/{}",
"urlMain": "https://www.artstation.com/",
"urlProbe": "https://www.artstation.com/users/{}.json",
"username_claimed": "Blue"
},
"Asciinema": {
Expand Down Expand Up @@ -404,6 +405,13 @@
"urlMain": "https://carbonmade.com/",
"username_claimed": "jenny"
},
"Carrd": {
"errorType": "status_code",
"regexCheck": "^[a-zA-Z0-9_-]{3,50}$",
"url": "https://{}.carrd.co/",
"urlMain": "https://carrd.co/",
"username_claimed": "blue"
},
"Career.habr": {
"errorMsg": "<h1>\u041e\u0448\u0438\u0431\u043a\u0430 404</h1>",
"errorType": "message",
Expand Down Expand Up @@ -602,10 +610,9 @@
"username_claimed": "blue"
},
"Cracked Forum": {
"errorMsg": "The member you specified is either invalid or doesn't exist",
"errorType": "message",
"url": "https://cracked.sh/{}",
"urlMain": "https://cracked.sh/",
"errorType": "status_code",
"url": "https://cracked.ax/{}",
"urlMain": "https://cracked.ax/",
"username_claimed": "Blue"
},
"Credly": {
Expand Down Expand Up @@ -952,7 +959,8 @@
"username_claimed": "blue"
},
"GeeksforGeeks": {
"errorType": "status_code",
"errorMsg": "false | GeeksforGeeks Profile",
"errorType": "message",
"url": "https://auth.geeksforgeeks.org/user/{}",
"urlMain": "https://www.geeksforgeeks.org/",
"username_claimed": "adam"
Expand Down Expand Up @@ -1526,7 +1534,8 @@
"username_claimed": "lottiefiles"
},
"LushStories": {
"errorType": "status_code",
"errorType": "response_url",
"errorUrl": "https://www.lushstories.com/login",
"isNSFW": true,
"url": "https://www.lushstories.com/profile/{}",
"urlMain": "https://www.lushstories.com/",
Expand Down Expand Up @@ -2279,6 +2288,13 @@
"urlMain": "https://sourceforge.net/",
"username_claimed": "blue"
},
"SpaceHey": {
"errorType": "message",
"errorMsg": "Not Found (Error 404) | SpaceHey",
"url": "https://spacehey.com/{}",
"urlMain": "https://spacehey.com/",
"username_claimed": "blue"
},
"SoylentNews": {
"errorMsg": "The user you requested does not exist, no matter how much you wish this might be the case.",
"errorType": "message",
Expand Down Expand Up @@ -2376,6 +2392,13 @@
"urlMain": "https://www.strava.com/",
"username_claimed": "blue"
},
"Substack": {
"errorType": "status_code",
"regexCheck": "^[a-zA-Z0-9][a-zA-Z0-9_-]{1,60}$",
"url": "https://{}.substack.com/",
"urlMain": "https://substack.com/",
"username_claimed": "green"
},
"SublimeForum": {
"errorType": "status_code",
"url": "https://forum.sublimetext.com/u/{}",
Expand Down Expand Up @@ -2827,8 +2850,10 @@
},
"akniga": {
"errorType": "status_code",
"errorCode": 404,
"request_method": "GET",
"url": "https://akniga.org/profile/{}",
"urlMain": "https://akniga.org/profile/blue/",
"urlMain": "https://akniga.org/",
"username_claimed": "blue"
},
"authorSTREAM": {
Expand Down
3 changes: 3 additions & 0 deletions sherlock_project/sherlock.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,9 @@ def get_response(request_future, error_type, social_network):
except requests.exceptions.RequestException as err:
error_context = "Unknown Error"
exception_text = str(err)
except UnicodeError as err:
error_context = "Encoding Error"
exception_text = str(err)

return response, error_context, exception_text

Expand Down
47 changes: 47 additions & 0 deletions tests/test_unicode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
"""Tests for handling usernames with special/unicode characters."""

from concurrent.futures import Future

from sherlock_project.sherlock import get_response


def _make_future_with_exception(exc):
"""Create a Future that raises the given exception."""
future = Future()
future.set_exception(exc)
return future


def test_get_response_handles_unicode_decode_error():
"""Regression test for issue #2730.
Usernames with special characters (e.g. 'Émile') can trigger a
UnicodeDecodeError inside the requests library during redirect
handling. This must not crash the program.
"""
future = _make_future_with_exception(
UnicodeDecodeError("utf-8", b"\xe9", 0, 1, "invalid continuation byte")
)
response, error_context, exception_text = get_response(
request_future=future,
error_type=["status_code"],
social_network="TestSite",
)
assert response is None
assert error_context == "Encoding Error"
assert "utf-8" in exception_text


def test_get_response_handles_unicode_encode_error():
"""UnicodeEncodeError should also be caught (subclass of UnicodeError)."""
future = _make_future_with_exception(
UnicodeEncodeError("ascii", "É", 0, 1, "ordinal not in range(128)")
)
response, error_context, exception_text = get_response(
request_future=future,
error_type=["status_code"],
social_network="TestSite",
)
assert response is None
assert error_context == "Encoding Error"
assert "ascii" in exception_text
6 changes: 3 additions & 3 deletions tests/test_ux.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,16 @@
from sherlock_interactives import InteractivesSubprocessError

def test_remove_nsfw(sites_obj):
nsfw_target: str = 'Pornhub'
nsfw_target: str = 'Xvideos'
assert nsfw_target in {site.name: site.information for site in sites_obj}
sites_obj.remove_nsfw_sites()
assert nsfw_target not in {site.name: site.information for site in sites_obj}


# Parametrized sites should *not* include Motherless, which is acting as the control
@pytest.mark.parametrize('nsfwsites', [
['Pornhub'],
['Pornhub', 'Xvideos'],
['Xvideos'],
['Xvideos', 'Erome'],
])
def test_nsfw_explicit_selection(sites_obj, nsfwsites):
for site in nsfwsites:
Expand Down
Loading