Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added error handling #18

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions daget/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,15 @@ def main():
# get doi/url and resolve to landing page
try:
url = get_redirect_url(args.url)

if get_file_list_from_repo(url) is None:
raise RepoError(f'Landing page is not supported {url}')
except ResolveError as err:
print(bcolors.FAIL, f'error resolving {args.url}', bcolors.ENDC)
print(bcolors.FAIL, f'Error resolving {args.url}: {err}', bcolors.ENDC)
exit(1)
except Exception as e:
# Catch any other unexpected exceptions, including URLError
print(bcolors.FAIL, f'{e}', bcolors.ENDC)
exit(1)

print(f'landing page: {url}')
Expand All @@ -37,7 +44,7 @@ def main():
if len(os.listdir(desitnation)) != 0:
print(bcolors.FAIL, f'{desitnation} must be a empty directory or new directory path', bcolors.ENDC)
exit(1)

print(f'destination: {desitnation}')

files = get_file_list_from_repo(url)
Expand Down
15 changes: 12 additions & 3 deletions daget/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
class ResolveError(ValueError):
pass
def __init__(self, message, http_response_code=None):
super().__init__(message)
self.http_response_code = http_response_code

class RepoError(Exception):
pass
class RepoError(ValueError):
def __init__(self, message, url, supported_urls=None, http_response_code=None):
super().__init__(message)
self.url = url
self.supported_urls = supported_urls or ["dataverse.harvard.edu", "dataverse.no", "snd.se/catalogue", "su.figshare.com", "figshare.scilifelab.se", "zenodo.org"]
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hardcoded list of repository url:s should be removed.
daget should try to get a file list via schema.org distribution (if it´s not figshare or zenodo) and if this fails it should throw the error instead. keeping a list of all suported url:s in the source coude is not a sustainable soultion

self.http_response_code = http_response_code

if url not in self.supported_urls:
raise self
31 changes: 25 additions & 6 deletions daget/utils.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,39 @@
import re
import socket
import urllib, urllib.error
from daget.exceptions import RepoError, ResolveError


def get_redirect_url(url):
# if url provided is a shorthand doi (TODO: check with regex)
if not url.startswith(('http://', 'https://')):
if not re.match(r'^https?://', url):
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

url = 'https://doi.org/' + url

opener = urllib.request.build_opener()
opener.addheaders = [('User-Agent', 'daget')]
urllib.request.install_opener(opener)
try:
r = urllib.request.urlopen(url)
return r.geturl()
except urllib.error.HTTPError:
raise ResolveError(f"{url} not found")

try:
Copy link
Owner

@borsna borsna Nov 20, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks good, giving more precise errors is good

response = urllib.request.urlopen(url)
return response.geturl()

except urllib.error.HTTPError as e:
# Catch HTTP errors and extract relevant information
error_message = f"HTTPError: {e.code} - {e.reason}"
raise ResolveError(f"{url} not found. {error_message}")

except urllib.error.URLError as e:
# Catch URL errors (e.g., network issues) and provide relevant information
if isinstance(e.reason, str):
error_message = f"URLError: {e.reason}"
else:
error_message = f"URLError: {str(e.reason)}"

# Additional handling for socket.gaierror
if isinstance(e.reason, socket.gaierror):
error_message += f", errno: {e.reason.errno}, strerror: {e.reason.strerror}"

raise ResolveError(f"Error connecting to {url}. {error_message}")

def download_file(url, target):
opener = urllib.request.build_opener()
Expand Down