Added Oniguruma to support UCS-2 regular expressions.

Ported the Detector class with tests (failing). UCS2String mimics a String class. Added language profiles copied from http://language-detection.googlecode.com/svn/trunk/profiles.
jasiek · Mar 28, 2011 · 77be06e · 77be06e
1 parent 6b7f721
commit 77be06e
Show file tree

Hide file tree

Showing 58 changed files with 364 additions and 6 deletions.
diff --git a/Gemfile b/Gemfile
@@ -1,6 +1,7 @@
 source :gemcutter
 gem "i18n", "0.5.0"
 gem "activesupport", "3.0.5"
+gem "oniguruma", "1.1.0"
 
 group :development do
   gem "bundler", "~> 1.0.0"

diff --git a/Gemfile.lock b/Gemfile.lock
@@ -8,6 +8,7 @@ GEM
       bundler (~> 1.0.0)
       git (>= 1.2.5)
       rake
+    oniguruma (1.1.0)
     rake (0.8.7)
     rcov (0.9.9)
 
@@ -19,4 +20,5 @@ DEPENDENCIES
   bundler (~> 1.0.0)
   i18n (= 0.5.0)
   jeweler (~> 1.5.2)
+  oniguruma (= 1.1.0)
   rcov
diff --git a/lib/langusta.rb b/lib/langusta.rb
@@ -1,12 +1,18 @@
 $: << File.expand_path(File.dirname(__FILE__))
 
+require 'iconv'
+require 'oniguruma'
+
 module Langusta
   VERSION = '0.0.1'
 
+  autoload :UCS2String, 'langusta/ucs2_string'
   autoload :Language, 'langusta/language'
   autoload :LangProfile, 'langusta/lang_profile'
   autoload :Detector, 'langusta/detector'
   autoload :JavaPropertyReader, 'langusta/java_property_reader'
   autoload :UnicodeBlock, 'langusta/unicode_block'
   autoload :NGram, 'langusta/n_gram'
+  autoload :DetectorFactory, 'langusta/detector_factory'
+  autoload :Detector, 'langusta/detector'
 end
diff --git a/lib/langusta/detector.rb b/lib/langusta/detector.rb
@@ -1,2 +1,184 @@
-class Langusta::Detector
+module Langusta
+  class Detector
+    attr_accessor :verbose, :alpha, :max_text_length
+
+    ALPHA_DEFAULT = 0.5
+    ALPHA_WIDTH = 0.05
+    ITERATION_LIMIT = 1000
+    PROB_THRESHOLD = 0.1
+    CONV_THRESHOLD = 0.99999
+    BASE_FREQ = 10000
+    UNKNOWN_LANG = "unknown"
+
+    URL_REGEX = Oniguruma::ORegexp.new("https?://[-_.?&~;+=/#0-9A-Za-z]+", :encoding => Oniguruma::ENCODING_UTF16_BE)
+    MAIL_REGEX = Oniguruma::ORegexp.new("[-_.0-9A-Za-z]+@[-_0-9A-Za-z]+[-_.0-9A-Za-z]+", :encoding => Oniguruma::ENCODING_UTF16_BE)
+
+    def initialize(factory)
+      @word_lang_prob_map = factory.word_lang_prob_map
+      @lang_list = factory.lang_list
+      @text = UCS2String.new('')
+      @langprob = nil
+      @alpha = ALPHA_DEFAULT
+      @n_trial = 7
+      @max_text_length = 10000
+      @prior_map = nil
+      @verbose = false
+    end
+
+    def append(text)
+      text.gsub!(URL_REGEX, "\x00\x20")
+      text.gsub!(MAIL_REGEX, "\x00\x20")
+      text.each_char do |c|
+        NGram.normalize(c)
+      end
+      @text = text.gsub!(Oniguruma::ORegexp.new("(\x00\x20)*", :encoding => Oniguruma::ENCODING_UTF16_BE), "\x00\x20")
+    end
+
+    def detect
+      probabilities = get_probabilities()
+      (probabilities.length > 0) ? probabilities.first.lang : UNKNOWN_LANG
+    end
+
+    def detect_block
+      cleaning_text()
+      ngrams = extract_ngrams()
+      raise "no features in text" if ngrams.empty?
+      langprob = Array.new(@lang_list.length)
+
+      n_trial.times do
+        prob = init_probability()
+        alpha = @alpha + next_gaussian() * ALPHA_WIDTH
+
+        i = 0
+        Kernel.loop do
+          r = Kernel.rand(ngrams.length)
+          update_lang_prob(prob, ngrams.get(r), alpha)
+          if i % 5
+            break if normalize_prob(prob) > CONV_THRESHOLD || i >= ITERATION_LIMIT
+            # verbose
+          end
+        end
+        langprob.length.times do |j|
+          langprob[j] += prob[j] / n_trial
+        end
+        # verbose
+      end
+    end
+
+    def set_prior_map(prior_map)
+      @prior_map = Array.new[@lang_list.length]
+      sump = 0.0
+      @prior_map.length.times do |i|
+        lang = @lang_list[i]
+        if @prior_map.has_key?(lang)
+          p = @prior_map[lang]
+          raise "probability must be non-negative" if p < 0
+          @prior_map[i] = p
+          sump += p
+        end
+      end
+      raise "more one of prob must be non-zero" if sump <= 0
+      @prior_map.map! do |p|
+        p /= sump
+      end
+    end
+
+    def self.normalize_prob(prob)
+      maxp = 0.0; sump = 0.0
+      prob.each do |p|
+        sump += p
+      end
+      prob.map! do |p|
+        q = p / sump
+        maxp = q if q > maxp
+        q
+      end
+      maxp
+    end
+
+    private
+    def cleaning_text
+      non_latin_count = latin_count = 0
+      @text.each_char do |c|
+        if c < "\00z" && c >= "\x00A"
+          latin_count += 1
+        elsif c > "\x03\x00" && UnicodeBlock.of(c) != UnicodeBlock::LATIN_EXTENDED_ADDITIONAL
+          non_latin_count += 1
+        end
+      end
+      if latin_count * 2 < non_latin_count
+        text_without_latin = StringIO.new
+        @text.each_char do |c|
+          text_without_latin << c if c > "\x00z" || c < "\x00A"
+        end
+        @text = text_without_latin.to_s
+      end
+    end
+
+    def extract_ngrams
+      list = []
+      ngram = NGram.new
+      @text.each_char do |char|
+        ngram.add(char)
+        (1..NGram.N_GRAM).each do |n|
+          w = ngram.get(n)
+          list << w if w && @word_lang_prob_map.has_key?(w)
+        end
+      end
+      list
+    end
+
+    def get_probabilities
+      if @langprob.nil?
+        detect_block()
+      end
+      sort_probability(@langprob)
+      @langprob
+    end
+
+    def init_probability
+      prob = Array.new(@lang_list.length)
+      if @prior_map
+        prob = @prior_map.clone
+      else
+        prob.length.times do |i|
+          prob[i] = 1.0 / @lang_list.length
+        end
+      end
+      prob
+    end
+
+    def sort_probability(prob)
+      list = prob.zip(@lang_list)
+      list.sort_by! do |x|
+        x[0]
+      end
+      list.select! do |x|
+        x[0] > PROB_THRESHOLD
+      end
+      list.map do |x|
+        x[1]
+      end
+    end
+
+    def update_lang_prob(prob, word, alpha)
+      return false if word.nil? || ! @word_lang_prob_map.has_key?(word)
+
+      lang_prob_map = @word_lang_prob_map[word]
+      # verbose
+      weight = alpha / BASE_FREQ
+      prob.length.times do |i|
+        prob[i] *= weight + lang_prob_map[i]
+      end
+      true
+    end
+
+    def word_prob_to_string(prob)
+      prob.zip(@lang_list).select do |p, lang|
+        p > 0.00001
+      end.map do |p, lang|
+        "%s:%.5f" % [p, lang]
+      end.join(' ')
+    end
+  end
 end
diff --git a/lib/langusta/detector_factory.rb b/lib/langusta/detector_factory.rb
@@ -0,0 +1,39 @@
+module Langusta
+  class LangDetectException < StandardError; end
+
+  class DetectorFactory
+    attr_reader :word_lang_prob_map, :lang_list
+
+    def initialize
+      @word_lang_prob_map = {}
+      @lang_list = []
+    end
+
+    def add_profile(profile, index, langsize)
+      raise LangDetectException.new("duplicate the same language profile") if @lang_list.include?(profile.name)
+      @lang_list << profile.name
+      profile.freq.keys.each do |word|
+        if not @word_lang_prob_map.has_key?(word)
+          @word_lang_prob_map[word] = Array.new(langsize)
+        end
+        prob = 1.0 * profile.freq[word] / profile.n_words[word.length - 1]
+        @word_lang_prob_map[word][index] = prob
+      end
+    end
+
+    def create(alpha=nil)
+      if alpha
+        detector = create_detector()
+        detector.alpha = alpha
+        detector
+      else
+        create_detector()
+      end
+    end
+
+    def create_detector
+      raise LangDetectException.new("need to load profiles") if @lang_list.length == 0
+      detector = Detector.new(self)
+    end
+  end
+end
diff --git a/lib/langusta/lang_profile.rb b/lib/langusta/lang_profile.rb
@@ -1,13 +1,10 @@
 require 'set'
-require 'active_support/core_ext/string'
-
-TODO_NGRAM_LEN = 3
 
 class Langusta::LangProfile
   MINIMUM_FREQ = 2
   LESS_FREQ_RATIO = 100_000
 
-  attr_reader :name, :freq
+  attr_reader :name, :freq, :n_words
 
   def initialize(name=nil)
     @name = name
@@ -18,7 +15,7 @@ def initialize(name=nil)
   def add(gram)
     return if @name.nil? or gram.nil?
     length = gram.mb_chars.size
-    return if length < 1 or length > TODO_NGRAM_LEN
+    return if length < 1 or length > NGram.N_GRAM
     @n_words[length - 1] += 1
     @freq[gram] ||= 0
     @freq[gram] += 1

diff --git a/lib/langusta/ucs2_string.rb b/lib/langusta/ucs2_string.rb
@@ -0,0 +1,31 @@
+module Langusta
+  class UCS2String
+    include Enumerable
+
+    attr_reader :underlying
+
+    def initialize(underlying)
+      @underlying = Iconv.conv("ucs-2", "ucs-2", underlying)
+    end
+
+    def [](index)
+      @underlying[index / 2, 2]
+    end
+
+    def gsub!(oregexp, subst)
+      oregexp.gsub!(@underlying, subst)
+    end
+
+    def <<(ucs2string)
+      raise TypeError unless ucs2string.is_a?(UCS2String)
+      self.new(@underlying + ucs2string.underlying)
+    end
+
+    def each_char(&blk)
+      (0..(@underlying.length - 2)).step(2) do |index|
+        blk.call(@underlying[index, 2])
+      end
+    end
+    alias :each :each_char
+  end
+end
diff --git a/profiles/af b/profiles/af
diff --git a/profiles/ar b/profiles/ar
diff --git a/profiles/bg b/profiles/bg
diff --git a/profiles/bn b/profiles/bn
diff --git a/profiles/cs b/profiles/cs
diff --git a/profiles/da b/profiles/da
diff --git a/profiles/de b/profiles/de
diff --git a/profiles/el b/profiles/el
diff --git a/profiles/en b/profiles/en
diff --git a/profiles/es b/profiles/es
diff --git a/profiles/fa b/profiles/fa
diff --git a/profiles/fi b/profiles/fi
diff --git a/profiles/fr b/profiles/fr
diff --git a/profiles/gu b/profiles/gu
diff --git a/profiles/he b/profiles/he
diff --git a/profiles/hi b/profiles/hi
diff --git a/profiles/hr b/profiles/hr
diff --git a/profiles/hu b/profiles/hu
diff --git a/profiles/id b/profiles/id
diff --git a/profiles/it b/profiles/it
diff --git a/profiles/ja b/profiles/ja
diff --git a/profiles/kn b/profiles/kn
diff --git a/profiles/ko b/profiles/ko
@@ -0,0 +1 @@
+{"freq":{"D":3892,"E":3299,"F":3285,"G":3045,"A":6416,"B":4070,"C":6617,"L":3054,"M":5299,"N":2815,"O":2578,"H":2337,"I":4798,"J":1557,"K":2201,"U":1696,"T":4435,"W":1804,"V":1999,"P":5115,"S":7482,"R":3099,"Y":511,"X":864,"Z":322,"f":4104,"g":6365,"d":8971,"e":33474,"b":3763,"c":11043,"a":29207,"n":24208,"o":24326,"l":15315,"m":9717,"j":569,"k":3352,"h":9005,"i":26119,"w":2253,"v":3158,"u":10429,"t":20234,"s":16203,"r":23102,"q":406,"p":6165,"z":1050,"y":5016,"x":1132,"·":3643,"é":335,"α":340," l":537," m":1211," n":447," o":1533," h":426," i":705," k":705," d":1030," e":627," f":736," g":373," a":1340," b":551," c":1312," t":1282," v":337," p":1037," s":1367," r":515," J":1393," K":1837," H":1901," I":2769," N":1730," O":1347," L":1992," M":3624," B":2598," C":4248," A":3943," F":2209," G":2328," D":2552," E":1916," Y":379," X":512,"и":342,"о":346," S":4739," R":2101," P":3310,"а":357," W":1489," V":980," U":1053," T":2804," ·":608,"가가》":1806,"A ":1090,"F ":430,"Da":387,"Co":1100,"Ch":633,"FA":344,"G ":347,"De":463,"Di":385,"Ge":351,"I ":810,"Fr":387,"B ":341,"C ":1323,"BS":411,"Ar":322,"D ":656,"Ba":510,"An":388,"Al":410,"Br":354,"Ca":585,"E ":441,"Be":332,"Bo":326,"Le":342,"Li":387,"N ":341,"La":422,"Lo":321,"Me":417,"Mi":434,"O ":403,"Ma":994,"Mo":474,"Ne":370,"Na":336,"P ":715,"Gr":424,"Ha":385,"He":353,"II":481,"In":800,"Ja":347,"L ":466,"Jo":379,"M ":639,"Un":351,"Tr":351,"Th":790,"Te":335,"V ":541,"St":667,"TV":438,"Wi":435,"Pr":609,"S ":1197,"Pa":670,"R ":385,"Se":487,"Sc":370,"So":342,"U ":356,"Sa":396,"Re":588,"Ro":375,"T ":528,"b ":354,"a ":3271,"i ":1146,"ge":1157,"ga":443,"ff":332,"fi":424,"fo":504,"Int":339,"he":2310,"ha":1401,"gi":481,"gh":427,"gu":409,"gr":369,"g ":1575,"ea":1479,"ec":1049,"ed":1247,"de":1816,"di":1071,"do":684,"dr":338,"ew":384,"ev":385,"h ":1328,"fe":378,"eg":405,"ee":580,"el":1725,"ei":535,"ep":401,"eo":702,"en":3472,"em":983,"et":1358,"es":2573,"er":5897,"ca":1157,"e ":7980,"bo":372,"bl":393,"bi":443,"be":654,"da":619,"f ":1427,"cu":340,"ct":1002,"co":1189,"ck":563,"ci":913,"ch":1939,"ce":1477,"c ":925,"ay":495,"ba":511,"d ":2871,"at":3499,"as":1398,"ar":3373,"av":444,"au":580,"al":3307,"ai":767,"ap":659,"am":1381,"an":5017,"ac":1106,"ad":861,"ab":469,"ag":721,"ae":329,"nt":2416,"ns":1211,"no":896,"nn":583,"of":1300,"oc":729,"od":611,"ob":397,"om":1422,"on":5415,"ol":1638,"og":510,"ot":772,"os":950,"ov":494,"ou":1250,"op":740,"oo":649,"or":3530,"r ":3525,"ow":507,"pe":982,"pa":741,"pl":372,"po":651,"ph":572,"pi":450,"lo":1098,"ll":1500,"lu":455,"lt":410,"ly":332,"o ":1739,"ma":1374,"mb":376,"me":1635,"mi":890,"mm":492,"mp":610,"mo":691,"mu":332,"p ":731,"na":1797,"nc":1140,"nd":2000,"ne":1878,"ng":2212,"ni":1786,"ke":469,"m ":2197,"km":481,"li":1968,"le":2422,"ld":489,"la":1786,"n ":6058,"ht":434,"hu":339,"hi":1040,"ho":848,"id":750,"ic":2901,"ia":1671,"ig":816,"if":357,"ie":1267,"k ":934,"ir":791,"is":2128,"it":2101,"iu":388,"iv":722,"il":1499,"im":625,"in":4140,"io":2942,"ip":462,"l ":3328,"y ":2799,"wa":428,"vi":761,"ve":1364,"va":525,"x ":549,"ui":350,"ul":813,"ue":443,"ur":1440,"us":1642,"ut":842,"um":757,"un":1114,"up":321,"ty":669,"tu":765,"tt":612,"ua":440,"uc":387,"w ":493,"to":1445,"ts":387,"tr":1222,"te":3099,"ti":3945,"th":1754,"ta":1709,"su":334,"ss":1114,"st":2539,"sp":332,"so":726,"sc":476,"se":1269,"sh":547,"si":1432,"u ":440,"sa":574,"rr":396,"rs":871,"rt":1127,"ru":469,"ry":772,"ro":2074,"rn":743,"rm":593,"rl":533,"rk":352,"ri":2784,"rg":488,"re":2795,"rd":807,"rc":621,"ra":2892,"t ":3426,"s ":5482,"pr":409,"ys":349,"丞丞 ":694,"丞一 ":1816,"》가":1345,"《가":2228,"丞丕 ":551,"Com":345,"アアア":475,"》가 ":1068,"一一一":5909,"一一丞":1736,"一一丕":1309,"丕一 ":1155,"Pro":348," ·가":327,"가·가":2886," 《":2439," 》":343," 〈":356,"あ":978," 가 ":130222,"》":2546,"《":2558,"〉":392,"〈":395,"ア":871,"丕丞 ":472,"一丞丞":570,"丕丕 ":348,"가가 ":783362,"一丞一":1754,"一丞丕":441,"가가·":2619,"一丕一":1043,"一丕丞":389,"丕丞一":339,"両":634,"丞":11346,"丐":1267,"丕":7734,"一":36326,"丞一":4590,"丞丕":1151,"丞丞":1515,"丕丞":1076,"丕丕":711,"丕一":2824,"丐一":586,"一丕":3304,"一丐":489,"一丞":5004,"一一":15752," 丞":3275," 丐":491," 丕":2250," 一":11320,"The":618,"丞一丕":423,"丞一丞":536,"丞一一":1649,"ああ":644," 一 ":651,"ber":375,"ce ":804,"al ":1627,"ant":323,"ang":422,"anc":360,"and":945,"ame":329,"all":348,"an ":1234,"ard":453,"ari":335,"art":327,"ar ":393,"ate":490,"ati":1694,"アア":629,"가》":1921,"丕一一":978,"丕一丞":350,"ity":425,"ist":505,"ive":498,"is ":521,"ion":2383,"가가가":1019221,"》 ":1160," Ge":348," Fr":383," Ha":384," He":345," Gr":419," Ja":344," In":792,"har":332," Jo":377," La":409," Le":335," Li":369," Ma":978," Mi":422," Me":403,"he ":1035," Ne":354," Na":322," Mo":461,"her":322," An":382," Al":401," Ba":502," Be":321," Br":346," Ca":565," Ch":622," Co":1074," Da":378," Di":376," De":454," Wi":415," Pa":649," Ro":366," Re":579," Pr":602," St":642," Th":749," Te":327," Tr":341," Sa":393," Sc":357," Se":468," So":330," Un":342,"ian":427," in":405,"ic ":729,"ia ":540," 丞丞":589," 丞一":1845," of":1120," 丞丕":471,"ich":356," km":461,"ica":619," an":470," 가가":780590,"ine":530,"ing":1131," 丕丞":431," co":491,"in ":695," 丕一":1167,"ill":401," de":521," 一丞":2175," 一丕":1192," 一一":6508," th":680,"est":422,"ess":440,"er ":2043,"es ":1105,"eri":458,"era":435,"et ":385,"ers":564,"ern":454,"en ":605,"ell":389,"enc":362,"ent":998,"el ":452,"ge ":450,"for":417," 《가":2128,"de ":544,"cti":343,"ch ":513,"che":357,"ed ":647,"ect":433,"·가가":3042,"der":419,"rea":453,"re ":598,"rch":365,"rd ":425,"rat":517,"ran":534,"ric":460,"ry ":656,"rt ":353," 가":911452,"se ":376,"st ":511,"ss ":355,"ste":469,"sti":465,"str":431,"te ":613,"가":2734665,"per":399,"ng ":1232,"nce":521,"ne ":683,"nal":636,"nd ":828,"가·":2913,"가 ":919426,"nte":685,"nt ":696,"ns ":350,"m가":495,"of ":1107,"or ":608,"ore":443,"on ":2671,"ona":592,"ons":366,"le ":899,"lan":381,"A가":441,"lli":322,"ll ":408,"S가":359,"man":401,"丞丕一":331,"men":485,"C가":415,"A가 ":326,"丞丞一":486,"·가":3221,"《가가":2147,"ver":541,"ve ":360,"가》가":891,"us ":933,"um ":349,"ty ":554,"tra":420,"tor":430,"tin":357,"tio":1917,"tic":456,"th ":330,"ter":1180,"the":803,"一丞 ":2036,"一丕 ":1523,"一一 ":6123,"丞 ":3597,"丐 ":360,"丕 ":2805,"가가":1808522,"가》 ":998,"一 ":10399,"あああ":427},"n_words":[3186351,4098367,2871816],"name":"ko"}
diff --git a/profiles/mk b/profiles/mk
diff --git a/profiles/ml b/profiles/ml
diff --git a/profiles/mr b/profiles/mr
diff --git a/profiles/ne b/profiles/ne
diff --git a/profiles/nl b/profiles/nl
diff --git a/profiles/no b/profiles/no
diff --git a/profiles/pa b/profiles/pa
diff --git a/profiles/pl b/profiles/pl
diff --git a/profiles/pt b/profiles/pt
diff --git a/profiles/ro b/profiles/ro
diff --git a/profiles/ru b/profiles/ru
diff --git a/profiles/sk b/profiles/sk
diff --git a/profiles/so b/profiles/so
diff --git a/profiles/sq b/profiles/sq
diff --git a/profiles/sv b/profiles/sv
diff --git a/profiles/sw b/profiles/sw
diff --git a/profiles/ta b/profiles/ta
diff --git a/profiles/te b/profiles/te
diff --git a/profiles/th b/profiles/th
diff --git a/profiles/tl b/profiles/tl
diff --git a/profiles/tr b/profiles/tr
diff --git a/profiles/uk b/profiles/uk
diff --git a/profiles/ur b/profiles/ur
diff --git a/profiles/vi b/profiles/vi
diff --git a/profiles/zh-cn b/profiles/zh-cn
diff --git a/profiles/zh-tw b/profiles/zh-tw
diff --git a/test/test_detector.rb b/test/test_detector.rb
@@ -0,0 +1,46 @@
+require 'test/helper'
+
+class DetectorTest < Test::Unit::TestCase
+  TRAINING_EN = "\x00a \x00a \x00a \x00b \x00b \x00c \x00c \x00d \x00e"
+  TRAINING_FR = "\x00a \x00b \x00b \x00c \x00c \x00c \x00d \x00d \x00d"
+  TRAINING_JP = "\x30\x42 \x30\x42 \x30\x42 \x30\x44 \x30\x46 \x30\x48 \x30\x48"
+
+  def setup
+    @factory = DetectorFactory.new
+    profile_en = LangProfile.new("en")
+    TRAINING_EN.split(/ /).each do |w|
+      profile_en.add(w)
+    end
+    @factory.add_profile(profile_en, 0, 3)
+
+    profile_fr = LangProfile.new("fr")
+    TRAINING_FR.split(/ /).each do |w|
+      profile_fr.add(w)
+    end
+    @factory.add_profile(profile_fr, 1, 3)
+
+    profile_jp = LangProfile.new("jp")
+    TRAINING_JP.split(/ /).each do |w|
+      profile_jp.add(w)
+    end
+    @factory.add_profile(profile_jp, 2, 3)
+  end
+
+  def test_detector1
+    detector = @factory.create()
+    detector.append(UCS2String.new("\x00a"))
+    assert_equal("en", detector.detect())
+  end
+
+  def test_detector2
+    detector = @factory.create()
+    detector.append(UCS2String.new("\x00b\x00\x20\x00d"))
+    assert_equal("fr", detector.detect())
+  end
+
+  def test_detector3
+    detector = @factory.create()
+    detector.append(UCS2String.new("\x30\x42\x30\x42\x30\x42\x40\x42\x00a"))
+    assert_equal("jp", detector.detect())
+  end
+end
diff --git a/test/test_detector_factory.rb b/test/test_detector_factory.rb
@@ -0,0 +1,5 @@
+require 'test/helper'
+
+class DetectorFactoryTest < Test::Unit::TestCase
+
+end