Added error handling

borsna · Dec 15, 2023 · 8482899 · 8482899
1 parent 96b3229
commit 8482899
Show file tree

Hide file tree

Showing 3 changed files with 46 additions and 11 deletions.
diff --git a/daget/__main__.py b/daget/__main__.py
@@ -22,8 +22,15 @@ def main():
   # get doi/url and resolve to landing page
   try:
     url = get_redirect_url(args.url)
+
+    if get_file_list_from_repo(url) is None:
+      raise RepoError(f'Landing page is not supported {url}')
   except ResolveError as err:
-    print(bcolors.FAIL, f'error resolving {args.url}', bcolors.ENDC)
+    print(bcolors.FAIL, f'Error resolving {args.url}: {err}', bcolors.ENDC)
+    exit(1)
+  except Exception as e:
+    # Catch any other unexpected exceptions, including URLError
+    print(bcolors.FAIL, f'{e}', bcolors.ENDC)
     exit(1)
 
   print(f'landing page: {url}')
@@ -37,7 +44,7 @@ def main():
     if len(os.listdir(desitnation)) != 0:
       print(bcolors.FAIL, f'{desitnation} must be a empty directory or new directory path', bcolors.ENDC)
       exit(1)
-
+      
   print(f'destination: {desitnation}')
 
   files = get_file_list_from_repo(url)

diff --git a/daget/exceptions.py b/daget/exceptions.py
@@ -1,5 +1,14 @@
 class ResolveError(ValueError):
-    pass
+    def __init__(self, message, http_response_code=None):
+        super().__init__(message)
+        self.http_response_code = http_response_code
 
-class RepoError(Exception):
-    pass
+class RepoError(ValueError):
+    def __init__(self, message, url, supported_urls=None, http_response_code=None):
+        super().__init__(message)
+        self.url = url
+        self.supported_urls = supported_urls or ["dataverse.harvard.edu", "dataverse.no", "snd.se/catalogue", "su.figshare.com", "figshare.scilifelab.se", "zenodo.org"]
+        self.http_response_code = http_response_code
+
+        if url not in self.supported_urls:
+            raise self
diff --git a/daget/utils.py b/daget/utils.py
@@ -1,20 +1,39 @@
+import re
+import socket
 import urllib, urllib.error
 from daget.exceptions import RepoError, ResolveError
 
 
 def get_redirect_url(url):
   # if url provided is a shorthand doi (TODO: check with regex)
-  if not url.startswith(('http://', 'https://')):
+  if not re.match(r'^https?://', url):
     url = 'https://doi.org/' + url
 
   opener = urllib.request.build_opener()
   opener.addheaders = [('User-Agent', 'daget')]
   urllib.request.install_opener(opener)
-  try:  
-    r = urllib.request.urlopen(url)
-    return r.geturl()
-  except urllib.error.HTTPError:
-    raise ResolveError(f"{url} not found") 
+
+  try:
+    response = urllib.request.urlopen(url)
+    return response.geturl()
+
+  except urllib.error.HTTPError as e:
+      # Catch HTTP errors and extract relevant information
+      error_message = f"HTTPError: {e.code} - {e.reason}"
+      raise ResolveError(f"{url} not found. {error_message}")
+
+  except urllib.error.URLError as e:
+      # Catch URL errors (e.g., network issues) and provide relevant information
+      if isinstance(e.reason, str):
+          error_message = f"URLError: {e.reason}"
+      else:
+          error_message = f"URLError: {str(e.reason)}"
+
+      # Additional handling for socket.gaierror
+      if isinstance(e.reason, socket.gaierror):
+          error_message += f", errno: {e.reason.errno}, strerror: {e.reason.strerror}"
+
+      raise ResolveError(f"Error connecting to {url}. {error_message}")
 
 def download_file(url, target):
   opener = urllib.request.build_opener()