Skip to content

Commit

Permalink
Remove trailing slash from urls
Browse files Browse the repository at this point in the history
  • Loading branch information
ikoniaris committed Feb 18, 2014
1 parent ec236f9 commit 5d2b64a
Showing 1 changed file with 5 additions and 3 deletions.
8 changes: 5 additions & 3 deletions helper.py
Expand Up @@ -55,9 +55,11 @@ def parse_arguments():

def normalize_urls(urls):
#1. Strip whitespace and keep only the last part in case of spaces
#2. Add protocol in front of urls which are not http or ftp
#3. Remove duplicates from url list
#2. Remove trailing slash from urls
#3. Add protocol in front of urls which are not http or ftp
#4. Remove duplicates from url list
urls = [url.strip().split()[-1] for url in urls]
urls = [url.rstrip('/') for url in urls]
urls = ['http://' + url if urlparse.urlparse(url).scheme != 'http' and urlparse.urlparse(url).scheme != 'ftp'
else url for url in urls]
urls = list(set(urls))
Expand All @@ -75,5 +77,5 @@ def print_args(args):
def print_urls(urls):
print "\n{URL list}:"
for i, url in enumerate(urls):
print str(i+1) + '. ' + url
print str(i + 1) + '. ' + url
print "\nTotal: %d URLs" % len(urls)

0 comments on commit 5d2b64a

Please sign in to comment.