Permalink
Browse files

convenience script for determining POS of word according to corpus

  • Loading branch information...
1 parent 1dab337 commit e4d93001a35c3eafbe718f9f852eb9a14d3597aa @gkovacs committed May 15, 2011
Showing with 29 additions and 0 deletions.
  1. +29 −0 word-pos-in-corpus.rb
View
29 word-pos-in-corpus.rb
@@ -0,0 +1,29 @@
+#!/usr/bin/ruby1.9
+# encoding: utf-8
+
+$LOAD_PATH << File.join(File.expand_path(File.dirname(__FILE__)), '.')
+require 'typelists.rb'
+
+word = ARGV[0]
+
+posCounts = {}
+partsOfSpeech.each { |pos| posCounts[pos] = 0 }
+File.open("corpus/corpus-allwords-base-pos.txt").each { |line|
+ spl = line.split(" ")
+ baseform = spl[1]
+ if baseform != word
+ next
+ end
+ pos = spl[2]
+ posCounts[pos] += 1
+}
+maxcount = 0
+bestpos = ""
+posCounts.each { |pos,count|
+ if count > maxcount
+ bestpos = pos
+ maxcount = count
+ end
+}
+puts bestpos
+

0 comments on commit e4d9300

Please sign in to comment.