diff --git a/helper.py b/helper.py index 4ad35f6..95b41b8 100644 --- a/helper.py +++ b/helper.py @@ -55,9 +55,11 @@ def parse_arguments(): def normalize_urls(urls): #1. Strip whitespace and keep only the last part in case of spaces - #2. Add protocol in front of urls which are not http or ftp - #3. Remove duplicates from url list + #2. Remove trailing slash from urls + #3. Add protocol in front of urls which are not http or ftp + #4. Remove duplicates from url list urls = [url.strip().split()[-1] for url in urls] + urls = [url.rstrip('/') for url in urls] urls = ['http://' + url if urlparse.urlparse(url).scheme != 'http' and urlparse.urlparse(url).scheme != 'ftp' else url for url in urls] urls = list(set(urls)) @@ -75,5 +77,5 @@ def print_args(args): def print_urls(urls): print "\n{URL list}:" for i, url in enumerate(urls): - print str(i+1) + '. ' + url + print str(i + 1) + '. ' + url print "\nTotal: %d URLs" % len(urls) \ No newline at end of file