-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Ana Marjanica
committed
Apr 12, 2012
1 parent
6f62dd3
commit 8f23ff6
Showing
4 changed files
with
102 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
package hr.element.etb | ||
package slug | ||
|
||
import com.ibm.icu.text.Transliterator | ||
import java.util.regex.Pattern | ||
|
||
|
||
|
||
object SMSifier { | ||
protected val SMSUnsafeChars = """[^!-/0-9:-@A-Za-z£¥\n\s]+""" | ||
|
||
private val default = | ||
new SMSifier(TransliterateRules.latinToASCII, "-") | ||
|
||
def apply(text: String) = default(text) | ||
} | ||
|
||
|
||
case class SMSifier (transRules: String, replacement: String) { | ||
private val Trans = Transliterator.getInstance(transRules) | ||
|
||
private val SMSUnsafeReplacePattern = | ||
"(%s|%s)+" format(SMSifier.SMSUnsafeChars, Pattern.quote(replacement)) r | ||
|
||
private val SMSUnsafeTrimPattern = | ||
"^%s|%1$s$" format(SMSifier.SMSUnsafeChars) r | ||
|
||
private val SMSWhiteSpace = | ||
"""\s+"""r | ||
|
||
// convert latin letters to ASCII (ex. đ->d) | ||
protected val transliterate = | ||
Trans.transliterate(_: String) | ||
|
||
// trim all SMS non-safe characters from the beginning and the end | ||
protected val trim = | ||
SMSUnsafeTrimPattern.replaceAllIn(_: String, "") | ||
|
||
protected val trimWhiteSpaceToOne = | ||
SMSWhiteSpace.replaceAllIn(_: String, " ") | ||
|
||
// replace all SMS non-safe characters | ||
protected val sanitize = | ||
SMSUnsafeReplacePattern.replaceAllIn(_: String, replacement) | ||
|
||
def apply(text: String) = { | ||
(transliterate andThen trim andThen trimWhiteSpaceToOne andThen sanitize)(text) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
package hr.element.etb.slug | ||
package test | ||
|
||
import org.scalatest._ | ||
import org.scalatest.matchers._ | ||
|
||
|
||
|
||
class SMSSpec extends FeatureSpec with GivenWhenThen with MustMatchers{ | ||
|
||
feature("SMS URL sanitization"){ | ||
info("SMS must convert every evil character combination into a pretty URL-safe string") | ||
|
||
scenario("SMS unsafe character removal"){ | ||
val in = "aZ{}[]! a"; | ||
given ("a string containing: %s" format in) | ||
val res = "aZ-! a" | ||
then ("it should return a string: %s" format res) | ||
val out = SMSifier(in) | ||
out must equal (res) | ||
} | ||
|
||
scenario("SMS unsafe character trimming"){ | ||
val in = "!aćsčš "; | ||
given ("a string containing: %s" format in) | ||
val res = "!acscs " | ||
then ("it should return a string: %s" format res) | ||
val out = SMSifier(in) | ||
out must equal (res) | ||
} | ||
|
||
scenario("SMS whitespace trimming to one character"){ | ||
val in = "Helooooooooooooo oooo"; | ||
given ("a string containing: %s" format in) | ||
val res = "Helooooooooooooo oooo"; | ||
then ("it should return a string: %s" format res) | ||
val out = SMSifier(in) | ||
out must equal (res) | ||
} | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters