Permalink
Fetching contributors…
Cannot retrieve contributors at this time
559 lines (484 sloc) 14 KB
require 'escape_utils'
require 'yaml'
begin
require 'yajl'
rescue LoadError
end
require 'linguist/classifier'
require 'linguist/heuristics'
require 'linguist/samples'
require 'linguist/file_blob'
require 'linguist/blob_helper'
require 'linguist/strategy/filename'
require 'linguist/strategy/extension'
require 'linguist/strategy/modeline'
require 'linguist/shebang'
module Linguist
# Language names that are recognizable by GitHub. Defined languages
# can be highlighted, searched and listed under the Top Languages page.
#
# Languages are defined in `lib/linguist/languages.yml`.
class Language
@languages = []
@index = {}
@name_index = {}
@alias_index = {}
@language_id_index = {}
@extension_index = Hash.new { |h,k| h[k] = [] }
@interpreter_index = Hash.new { |h,k| h[k] = [] }
@filename_index = Hash.new { |h,k| h[k] = [] }
# Valid Languages types
TYPES = [:data, :markup, :programming, :prose]
# Detect languages by a specific type
#
# type - A symbol that exists within TYPES
#
# Returns an array
def self.by_type(type)
all.select { |h| h.type == type }
end
# Internal: Create a new Language object
#
# attributes - A hash of attributes
#
# Returns a Language object
def self.create(attributes = {})
language = new(attributes)
@languages << language
# All Language names should be unique. Raise if there is a duplicate.
if @name_index.key?(language.name)
raise ArgumentError, "Duplicate language name: #{language.name}"
end
# Language name index
@index[language.name.downcase] = @name_index[language.name.downcase] = language
language.aliases.each do |name|
# All Language aliases should be unique. Raise if there is a duplicate.
if @alias_index.key?(name)
raise ArgumentError, "Duplicate alias: #{name}"
end
@index[name.downcase] = @alias_index[name.downcase] = language
end
language.extensions.each do |extension|
if extension !~ /^\./
raise ArgumentError, "Extension is missing a '.': #{extension.inspect}"
end
@extension_index[extension.downcase] << language
end
language.interpreters.each do |interpreter|
@interpreter_index[interpreter] << language
end
language.filenames.each do |filename|
@filename_index[filename] << language
end
@language_id_index[language.language_id] = language
language
end
# Public: Get all Languages
#
# Returns an Array of Languages
def self.all
@languages
end
# Public: Look up Language by its proper name.
#
# name - The String name of the Language
#
# Examples
#
# Language.find_by_name('Ruby')
# # => #<Language name="Ruby">
#
# Returns the Language or nil if none was found.
def self.find_by_name(name)
return nil if !name.is_a?(String) || name.to_s.empty?
name && (@name_index[name.downcase] || @name_index[name.split(',', 2).first.downcase])
end
# Public: Look up Language by one of its aliases.
#
# name - A String alias of the Language
#
# Examples
#
# Language.find_by_alias('cpp')
# # => #<Language name="C++">
#
# Returns the Language or nil if none was found.
def self.find_by_alias(name)
return nil if !name.is_a?(String) || name.to_s.empty?
name && (@alias_index[name.downcase] || @alias_index[name.split(',', 2).first.downcase])
end
# Public: Look up Languages by filename.
#
# The behaviour of this method recently changed.
# See the second example below.
#
# filename - The path String.
#
# Examples
#
# Language.find_by_filename('Cakefile')
# # => [#<Language name="CoffeeScript">]
# Language.find_by_filename('foo.rb')
# # => []
#
# Returns all matching Languages or [] if none were found.
def self.find_by_filename(filename)
basename = File.basename(filename)
@filename_index[basename]
end
# Public: Look up Languages by file extension.
#
# The behaviour of this method recently changed.
# See the second example below.
#
# filename - The path String.
#
# Examples
#
# Language.find_by_extension('dummy.rb')
# # => [#<Language name="Ruby">]
# Language.find_by_extension('rb')
# # => []
#
# Returns all matching Languages or [] if none were found.
def self.find_by_extension(filename)
# find the first extension with language definitions
extname = FileBlob.new(filename.downcase).extensions.detect do |e|
!@extension_index[e].empty?
end
@extension_index[extname]
end
# Public: Look up Languages by interpreter.
#
# interpreter - String of interpreter name
#
# Examples
#
# Language.find_by_interpreter("bash")
# # => [#<Language name="Bash">]
#
# Returns the matching Language
def self.find_by_interpreter(interpreter)
@interpreter_index[interpreter]
end
# Public: Look up Languages by its language_id.
#
# language_id - Integer of language_id
#
# Examples
#
# Language.find_by_id(100)
# # => [#<Language name="Elixir">]
#
# Returns the matching Language
def self.find_by_id(language_id)
@language_id_index[language_id.to_i]
end
# Public: Look up Language by its name.
#
# name - The String name of the Language
#
# Examples
#
# Language['Ruby']
# # => #<Language name="Ruby">
#
# Language['ruby']
# # => #<Language name="Ruby">
#
# Returns the Language or nil if none was found.
def self.[](name)
return nil if !name.is_a?(String) || name.to_s.empty?
lang = @index[name.downcase]
return lang if lang
@index[name.split(',', 2).first.downcase]
end
# Public: A List of popular languages
#
# Popular languages are sorted to the top of language chooser
# dropdowns.
#
# This list is configured in "popular.yml".
#
# Returns an Array of Languages.
def self.popular
@popular ||= all.select(&:popular?).sort_by { |lang| lang.name.downcase }
end
# Public: A List of non-popular languages
#
# Unpopular languages appear below popular ones in language
# chooser dropdowns.
#
# This list is created from all the languages not listed in "popular.yml".
#
# Returns an Array of Languages.
def self.unpopular
@unpopular ||= all.select(&:unpopular?).sort_by { |lang| lang.name.downcase }
end
# Public: A List of languages with assigned colors.
#
# Returns an Array of Languages.
def self.colors
@colors ||= all.select(&:color).sort_by { |lang| lang.name.downcase }
end
# Internal: Initialize a new Language
#
# attributes - A hash of attributes
def initialize(attributes = {})
# @name is required
@name = attributes[:name] || raise(ArgumentError, "missing name")
# Set type
@type = attributes[:type] ? attributes[:type].to_sym : nil
if @type && !TYPES.include?(@type)
raise ArgumentError, "invalid type: #{@type}"
end
@color = attributes[:color]
# Set aliases
@aliases = [default_alias] + (attributes[:aliases] || [])
# Load the TextMate scope name or try to guess one
@tm_scope = attributes[:tm_scope] || begin
context = case @type
when :data, :markup, :prose
'text'
when :programming, nil
'source'
end
"#{context}.#{@name.downcase}"
end
@ace_mode = attributes[:ace_mode]
@codemirror_mode = attributes[:codemirror_mode]
@codemirror_mime_type = attributes[:codemirror_mime_type]
@wrap = attributes[:wrap] || false
# Set the language_id
@language_id = attributes[:language_id]
# Set extensions or default to [].
@extensions = attributes[:extensions] || []
@interpreters = attributes[:interpreters] || []
@filenames = attributes[:filenames] || []
# Set popular, and searchable flags
@popular = attributes.key?(:popular) ? attributes[:popular] : false
@searchable = attributes.key?(:searchable) ? attributes[:searchable] : true
# If group name is set, save the name so we can lazy load it later
if attributes[:group_name]
@group = nil
@group_name = attributes[:group_name]
# Otherwise we can set it to self now
else
@group = self
end
end
# Public: Get proper name
#
# Examples
#
# # => "Ruby"
# # => "Python"
# # => "Perl"
#
# Returns the name String
attr_reader :name
# Public: Get type.
#
# Returns a type Symbol or nil.
attr_reader :type
# Public: Get color.
#
# Returns a hex color String.
attr_reader :color
# Public: Get aliases
#
# Examples
#
# Language['C++'].aliases
# # => ["cpp"]
#
# Returns an Array of String names
attr_reader :aliases
# Public: Get language_id (used in GitHub search)
#
# Examples
#
# # => "1"
# # => "2"
# # => "3"
#
# Returns the integer language_id
attr_reader :language_id
# Public: Get the name of a TextMate-compatible scope
#
# Returns the scope
attr_reader :tm_scope
# Public: Get Ace mode
#
# Examples
#
# # => "text"
# # => "javascript"
# # => "c_cpp"
#
# Returns a String name or nil
attr_reader :ace_mode
# Public: Get CodeMirror mode
#
# Maps to a directory in the `mode/` source code.
# https://github.com/codemirror/CodeMirror/tree/master/mode
#
# Examples
#
# # => "nil"
# # => "javascript"
# # => "clike"
#
# Returns a String name or nil
attr_reader :codemirror_mode
# Public: Get CodeMirror MIME type mode
#
# Examples
#
# # => "nil"
# # => "text/x-javascript"
# # => "text/x-csrc"
#
# Returns a String name or nil
attr_reader :codemirror_mime_type
# Public: Should language lines be wrapped
#
# Returns true or false
attr_reader :wrap
# Public: Get extensions
#
# Examples
#
# # => ['.rb', '.rake', ...]
#
# Returns the extensions Array
attr_reader :extensions
# Public: Get interpreters
#
# Examples
#
# # => ['awk', 'gawk', 'mawk' ...]
#
# Returns the interpreters Array
attr_reader :interpreters
# Public: Get filenames
#
# Examples
#
# # => ['Rakefile', ...]
#
# Returns the extensions Array
attr_reader :filenames
# Public: Get URL escaped name.
#
# Examples
#
# "C%23"
# "C%2B%2B"
# "Common%20Lisp"
#
# Returns the escaped String.
def escaped_name
EscapeUtils.escape_url(name).gsub('+', '%20')
end
# Public: Get default alias name
#
# Returns the alias name String
def default_alias
name.downcase.gsub(/\s/, '-')
end
alias_method :default_alias_name, :default_alias
# Public: Get Language group
#
# Returns a Language
def group
@group ||= Language.find_by_name(@group_name)
end
# Public: Is it popular?
#
# Returns true or false
def popular?
@popular
end
# Public: Is it not popular?
#
# Returns true or false
def unpopular?
!popular?
end
# Public: Is it searchable?
#
# Unsearchable languages won't by indexed by solr and won't show
# up in the code search dropdown.
#
# Returns true or false
def searchable?
@searchable
end
# Public: Return name as String representation
def to_s
name
end
def ==(other)
eql?(other)
end
def eql?(other)
equal?(other)
end
def hash
name.hash
end
def inspect
"#<#{self.class} name=#{name}>"
end
end
extensions = Samples.cache['extnames']
interpreters = Samples.cache['interpreters']
popular = YAML.load_file(File.expand_path("../popular.yml", __FILE__))
languages_yml = File.expand_path("../languages.yml", __FILE__)
languages_json = File.expand_path("../languages.json", __FILE__)
if File.exist?(languages_json) && defined?(Yajl)
languages = Yajl.load(File.read(languages_json))
else
languages = YAML.load_file(languages_yml)
end
languages.each do |name, options|
options['extensions'] ||= []
options['interpreters'] ||= []
options['filenames'] ||= []
if extnames = extensions[name]
extnames.each do |extname|
if !options['extensions'].index { |x| x.downcase.end_with? extname.downcase }
warn "#{name} has a sample with extension (#{extname.downcase}) that isn't explicitly defined in languages.yml"
options['extensions'] << extname
end
end
end
interpreters ||= {}
if interpreter_names = interpreters[name]
interpreter_names.each do |interpreter|
if !options['interpreters'].include?(interpreter)
options['interpreters'] << interpreter
end
end
end
Language.create(
:name => name,
:color => options['color'],
:type => options['type'],
:aliases => options['aliases'],
:tm_scope => options['tm_scope'],
:ace_mode => options['ace_mode'],
:codemirror_mode => options['codemirror_mode'],
:codemirror_mime_type => options['codemirror_mime_type'],
:wrap => options['wrap'],
:group_name => options['group'],
:searchable => options.fetch('searchable', true),
:language_id => options['language_id'],
:extensions => Array(options['extensions']),
:interpreters => options['interpreters'].sort,
:filenames => options['filenames'],
:popular => popular.include?(name)
)
end
end