/
string.rb
59 lines (47 loc) · 1.44 KB
/
string.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# coding: UTF-8
require 'iconv' unless RUBY_VERSION >= "1.9"
class String
def number?
true if Float(self) rescue false
end
def remove_non_ascii(replacement="")
if RUBY_VERSION >= "1.9"
encoding_options = {
:invalid => :replace, # Replace invalid byte sequences
:undef => :replace, # Replace anything not defined in ASCII
:replace => '', # Use a blank for those replacements
:universal_newline => true # Always break lines with \n
}
self.encode Encoding.find('ASCII'), encoding_options
else
Iconv.conv('ASCII//IGNORE', 'UTF8', self)
end
end
def encodef to = 'UTF-8', from = 'iso8859-1'
if RUBY_VERSION >= "1.9"
self.force_encoding(from).encode to
else
Iconv.conv to, from, self
end
end
def transliterate
ActiveSupport::Inflector.transliterate(self)
end
CompanyNatureSuffixes = ['s.a|s/a|sa', 's.c|s/c|sc', 'ltda', 'ltd', 'inc', 'gmbh']
def remove_company_nature
p = Proc.new do |string|
CompanyNatureSuffixes.inject(string) do |string, nature|
string.gsub /\b(#{nature})\.?$/i, ''
end.strip
end
string = p.call self
# there are cases where two natures appears
string = p.call string
end
def remove_symbols
self.gsub /[^a-z0-9\s]/i, ''
end
def name_normalization
self.squish.remove_company_nature.transliterate.remove_symbols.downcase
end
end