Skip to content

Commit

Permalink
language interpreters and shebang lines
Browse files Browse the repository at this point in the history
Add an interpreter array to each language, and match interpreters found
in the shebang lines of scripts to this array to identify the language
of scripts.

With suggestions from tnm. #687
  • Loading branch information
eschulte committed Nov 9, 2013
1 parent eb5f146 commit 7a6202a
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 0 deletions.
46 changes: 46 additions & 0 deletions lib/linguist/language.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ class Language
@alias_index = {}

@extension_index = Hash.new { |h,k| h[k] = [] }
@interpreter_index = Hash.new { |h,k| h[k] = [] }
@filename_index = Hash.new { |h,k| h[k] = [] }
@primary_extension_index = {}

Expand Down Expand Up @@ -71,6 +72,10 @@ def self.create(attributes = {})

@primary_extension_index[language.primary_extension] = language

language.interpreters.each do |interpreter|
@interpreter_index[interpreter] << language
end

language.filenames.each do |filename|
@filename_index[filename] << language
end
Expand Down Expand Up @@ -101,6 +106,8 @@ def self.detect(name, data, mode = nil)
data = data.call() if data.respond_to?(:call)
if data.nil? || data == ""
nil
elsif result = find_by_shebang(data)
result.first
elsif result = Classifier.classify(Samples::DATA, data, possible_languages.map(&:name)).first
Language[result[0]]
end
Expand Down Expand Up @@ -162,6 +169,20 @@ def self.find_by_filename(filename)
langs.compact.uniq
end

# Public: Look up Languages by shebang line.
#
# data - Array of tokens or String data to analyze.
#
# Examples
#
# Language.find_by_shebang("#!/bin/bash\ndate;")
# # => [#<Language name="Bash">]
#
# Returns the matching Language
def self.find_by_shebang(data)
@interpreter_index[Linguist.interpreter_from_shebang(data)]
end

# Public: Look up Language by its name or lexer.
#
# name - The String name of the Language
Expand Down Expand Up @@ -247,6 +268,7 @@ def initialize(attributes = {})

# Set extensions or default to [].
@extensions = attributes[:extensions] || []
@interpreters = attributes[:interpreters] || []
@filenames = attributes[:filenames] || []

unless @primary_extension = attributes[:primary_extension]
Expand Down Expand Up @@ -359,6 +381,15 @@ def initialize(attributes = {})
# Returns the extension String.
attr_reader :primary_extension

# Public: Get interpreters
#
# Examples
#
# # => ['awk', 'gawk', 'mawk' ...]
#
# Returns the interpreters Array
attr_reader :interpreters

# Public: Get filenames
#
# Examples
Expand Down Expand Up @@ -452,11 +483,13 @@ def inspect
end

extensions = Samples::DATA['extnames']
interpreters = Samples::DATA['interpreters']
filenames = Samples::DATA['filenames']
popular = YAML.load_file(File.expand_path("../popular.yml", __FILE__))

YAML.load_file(File.expand_path("../languages.yml", __FILE__)).each do |name, options|
options['extensions'] ||= []
options['interpreters'] ||= []
options['filenames'] ||= []

if extnames = extensions[name]
Expand All @@ -467,6 +500,18 @@ def inspect
end
end

if interpreters == nil
interpreters = {}
end

if interpreter_names = interpreters[name]
interpreter_names.each do |interpreter|
if !options['interpreters'].include?(interpreter)
options['interpreters'] << interpreter
end
end
end

if fns = filenames[name]
fns.each do |filename|
if !options['filenames'].include?(filename)
Expand All @@ -487,6 +532,7 @@ def inspect
:searchable => options.key?('searchable') ? options['searchable'] : true,
:search_term => options['search_term'],
:extensions => options['extensions'].sort,
:interpreters => options['interpreters'].sort,
:primary_extension => options['primary_extension'],
:filenames => options['filenames'],
:popular => popular.include?(name)
Expand Down
38 changes: 38 additions & 0 deletions lib/linguist/samples.rb
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def self.each(&block)
yield({
:path => File.join(dirname, filename),
:language => category,
:interpreter => File.exist?(filename) ? Linguist.interpreter_from_shebang(File.read(filename)) : nil,
:extname => File.extname(filename)
})
end
Expand All @@ -67,6 +68,7 @@ def self.each(&block)
def self.data
db = {}
db['extnames'] = {}
db['interpreters'] = {}
db['filenames'] = {}

each do |sample|
Expand All @@ -80,6 +82,14 @@ def self.data
end
end

if sample[:interpreter]
db['interpreters'][language_name] ||= []
if !db['interpreters'][language_name].include?(sample[:interpreter])
db['interpreters'][language_name] << sample[:interpreter]
db['interpreters'][language_name].sort!
end
end

if sample[:filename]
db['filenames'][language_name] ||= []
db['filenames'][language_name] << sample[:filename]
Expand All @@ -95,4 +105,32 @@ def self.data
db
end
end

# Used to retrieve the interpreter from the shebang line of a file's
# data.
def self.interpreter_from_shebang(data)
lines = data.lines

if lines.any? && (match = lines[0].match(/(.+)\n?/)) && (bang = match[0]) =~ /^#!/
bang.sub!(/^#! /, '#!')
tokens = bang.split(' ')
pieces = tokens.first.split('/')

if pieces.size > 1
script = pieces.last
else
script = pieces.first.sub('#!', '')
end

script = script == 'env' ? tokens[1] : script

# "python2.6" -> "python"
if script =~ /((?:\d+\.?)+)/
script.sub! $1, ''
end

script
end
end

end

0 comments on commit 7a6202a

Please sign in to comment.