Skip to content
Browse files

use string.punctuation and unicodedata

  • Loading branch information...
1 parent 384c83d commit 7558ff8931a4c10062c39bad29dec8fc64dc837a straup committed
Showing with 21 additions and 16 deletions.
  1. +21 −16 bin/utils.py
View
37 bin/utils.py
@@ -1,5 +1,7 @@
-import os.path
import pprint
+import os.path
+import string
+import unicodedata
def dumper(data):
print pprint.pformat(data)
@@ -18,23 +20,26 @@ def id2path(id):
return os.path.join(*parts)
-def clean_meta_name(name):
-
- # sudo make me better
+def clean_meta_name(name, allow_punctuation=[]):
name = name.strip()
- name = name.replace(" ", "-")
- name = name.replace("?", "")
- name = name.replace("&", "")
- name = name.replace(":", "")
- name = name.replace("/", "-")
- name = name.replace(",", "-")
- name = name.replace("'", "-")
- name = name.replace("(", "-")
- name = name.replace(")", "")
- name = name.replace("`", "")
- name = name.replace("--", "-")
- name = name.replace("..", ".")
name = name.lower()
+
+ name = remove_accents(name)
+
+ for c in string.punctuation:
+ if c in allow_punctuation:
+ continue
+
+ name = name.replace(c, "")
+
+ name = name.replace(" ", "-")
+ name = name.replace("--", "-")
+
return name
+
+def remove_accents(input_str):
+ nkfd_form = unicodedata.normalize('NFKD', unicode(input_str))
+ only_ascii = nkfd_form.encode('ASCII', 'ignore')
+ return only_ascii

0 comments on commit 7558ff8

Please sign in to comment.
Something went wrong with that request. Please try again.