# Languages

In [1]:
import json
import pandas
import requests
import yaml

In [2]:
URL = "https://raw.githubusercontent.com/github/linguist/master/lib/linguist/languages.yml"

response = requests.get(URL)
response_yaml = response.text
languages = yaml.safe_load(response_yaml)

language_keys = list(languages.keys())

list_of_languages = []

for language_key in language_keys:
    value = languages[language_key]
    value["language"] = language_key
    list_of_languages.append(value)
  
languages_data_frame = pandas.DataFrame(list_of_languages)

In [3]:
types = languages_data_frame['type'].value_counts()
types

type
programming    469
data           159
markup          60
prose           18
Name: count, dtype: int64

In [4]:
sorted_languages_data_frame = languages_data_frame.sort_values(by=["type", "ace_mode", "tm_scope", "language"])
sorted_languages_data_frame

Unnamed: 0,type,color,extensions,tm_scope,ace_mode,language_id,language,aliases,codemirror_mode,codemirror_mime_type,interpreters,group,filenames,wrap,fs_name,searchable
28,data,#d12127,"[.apacheconf, .vhost]",source.apache-config,apache_conf,16,ApacheConf,"[aconf, apache]",,,,,"[.htaccess, apache2.conf, httpd.conf]",,,
68,data,,[.c-objdump],objdump.x86asm,assembly_x86,44,C-ObjDump,,,,,,,,,
113,data,,"[.cppobjdump, .c++-objdump, .c++objdump, .cpp-...",objdump.x86asm,assembly_x86,70,Cpp-ObjDump,[c++-objdump],,,,,,,,
126,data,,[.d-objdump],objdump.x86asm,assembly_x86,81,D-ObjDump,,,,,,,,,
409,data,,[.objdump],objdump.x86asm,assembly_x86,256,ObjDump,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
377,prose,,[.muse],text.muse,text,474864066,Muse,"[amusewiki, emacs muse]",,,,,,True,,
701,prose,#141414,"[.rst, .rest, .rest.txt, .rst.txt]",text.restructuredtext,text,419,reStructuredText,[rst],rst,text/x-rst,,,,True,,
604,prose,,"[.texinfo, .texi, .txi]",text.texinfo,text,988020015,Texinfo,,,,[makeinfo],,,True,,
630,prose,#199f4b,[.txt],text.vim-help,text,508563686,Vim Help File,"[help, vimhelp]",,,,,,,,


In [5]:
minified_languages_data_frame = sorted_languages_data_frame[["type", "color", "ace_mode", "tm_scope", "language"]]
minified_languages_data_frame

Unnamed: 0,type,color,ace_mode,tm_scope,language
28,data,#d12127,apache_conf,source.apache-config,ApacheConf
68,data,,assembly_x86,objdump.x86asm,C-ObjDump
113,data,,assembly_x86,objdump.x86asm,Cpp-ObjDump
126,data,,assembly_x86,objdump.x86asm,D-ObjDump
409,data,,assembly_x86,objdump.x86asm,ObjDump
...,...,...,...,...,...
377,prose,,text,text.muse,Muse
701,prose,#141414,text,text.restructuredtext,reStructuredText
604,prose,,text,text.texinfo,Texinfo
630,prose,#199f4b,text,text.vim-help,Vim Help File


In [6]:
colored_languages_data_frame = minified_languages_data_frame[minified_languages_data_frame['color'].notnull()]
colored_languages_data_frame

Unnamed: 0,type,color,ace_mode,tm_scope,language
28,data,#d12127,apache_conf,source.apache-config,ApacheConf
77,data,#244776,coffee,source.coffee,CSON
211,data,#F44D27,gitignore,source.gitattributes,Git Attributes
262,data,#000000,gitignore,source.gitignore,Ignore List
82,data,#483465,haskell,source.cabal,Cabal Config
...,...,...,...,...,...
201,prose,#ff6900,text,source.gemini,Gemini
649,prose,#fc5757,text,text.html.mediawiki,Wikitext
701,prose,#141414,text,text.restructuredtext,reStructuredText
630,prose,#199f4b,text,text.vim-help,Vim Help File


In [7]:
programming_languages_data_frame = minified_languages_data_frame[minified_languages_data_frame['type'] == 'programming']
programming_languages_data_frame

Unnamed: 0,type,color,ace_mode,tm_scope,language
3,programming,#E8274B,abap,source.abap,ABAP
17,programming,#882B0F,actionscript,source.actionscript.3,ActionScript
18,programming,#02f88c,ada,source.ada,Ada
31,programming,#101F1F,applescript,source.applescript,AppleScript
30,programming,#0B3D91,assembly_x86,source.agc,Apollo Guidance Computer
...,...,...,...,...,...
109,programming,#B5314C,yaml,source.cwl,Common Workflow Language
137,programming,#FBEE96,yaml,source.denizenscript,DenizenScript
299,programming,#773b37,yaml,source.yaml,Kaitai Struct
333,programming,#652B81,yaml,source.yaml,LookML


In [8]:
data_languages_data_frame = minified_languages_data_frame[minified_languages_data_frame['type'] == 'data']
data_languages_data_frame

Unnamed: 0,type,color,ace_mode,tm_scope,language
28,data,#d12127,apache_conf,source.apache-config,ApacheConf
68,data,,assembly_x86,objdump.x86asm,C-ObjDump
113,data,,assembly_x86,objdump.x86asm,Cpp-ObjDump
126,data,,assembly_x86,objdump.x86asm,D-ObjDump
409,data,,assembly_x86,objdump.x86asm,ObjDump
...,...,...,...,...,...
405,data,#85ea2d,yaml,source.yaml,OASv2-yaml
407,data,#85ea2d,yaml,source.yaml,OASv3-yaml
616,data,#222c37,yaml,source.yaml,Unity3D Asset
672,data,#cb171e,yaml,source.yaml,YAML


In [9]:
markup_languages_data_frame = minified_languages_data_frame[minified_languages_data_frame['type'] == 'markup']
markup_languages_data_frame

Unnamed: 0,type,color,ace_mode,tm_scope,language
78,markup,#563d7c,css,source.css,CSS
290,markup,#a52a22,django,text.html.django,Jinja
150,markup,#a91e50,ejs,text.html.js,EJS
247,markup,#ece2a9,haml,text.haml,Haml
248,markup,#f7931e,handlebars,text.html.handlebars,Handlebars
36,markup,#ff5a03,html,source.astro,Astro
51,markup,#5562ac,html,source.csswg,Bikeshed
580,markup,#3fb34f,html,source.string-template,StringTemplate
585,markup,#ff3e00,html,source.svelte,Svelte
238,markup,#e34c26,html,text.html.basic,HTML
