Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

67 lines (53 sloc) 1.592 kb
require 'rake/clean'
require 'rake/testtask'
task :default => :test
Rake::TestTask.new do |t|
t.warning = true
end
task :samples do
require 'linguist/samples'
require 'yajl'
data = Linguist::Samples.data
json = Yajl::Encoder.encode(data, :pretty => true)
File.open('lib/linguist/samples.json', 'w') { |io| io.write json }
end
namespace :classifier do
LIMIT = 1_000
desc "Run classifier against #{LIMIT} public gists"
task :test do
require 'linguist/classifier'
require 'linguist/samples'
total, correct, incorrect = 0, 0, 0
$stdout.sync = true
each_public_gist do |gist_url, file_url, file_language|
next if file_language.nil? || file_language == 'Text'
begin
data = open(file_url).read
guessed_language, score = Linguist::Classifier.classify(Linguist::Samples::DATA, data).first
total += 1
guessed_language == file_language ? correct += 1 : incorrect += 1
print "\r\e[0K%d:%d %g%%" % [correct, incorrect, (correct.to_f/total.to_f)*100]
$stdout.flush
rescue URI::InvalidURIError
else
break if total >= LIMIT
end
end
puts ""
end
def each_public_gist
require 'open-uri'
require 'json'
url = "https://api.github.com/gists/public"
loop do
resp = open(url)
url = resp.meta['link'][/<([^>]+)>; rel="next"/, 1]
gists = JSON.parse(resp.read)
for gist in gists
for filename, attrs in gist['files']
yield gist['url'], attrs['raw_url'], attrs['language']
end
end
end
end
end
Jump to Line
Something went wrong with that request. Please try again.