# Languages

In [64]:
import json
import pandas
import requests
import yaml

In [65]:
URL = "https://raw.githubusercontent.com/github/linguist/master/lib/linguist/languages.yml"

response = requests.get(URL)
response_yaml = response.text
languages = yaml.safe_load(response_yaml)

language_keys = list(languages.keys())

list_of_languages = []

for language_key in language_keys:
    value = languages[language_key]
    value["language"] = language_key
    list_of_languages.append(value)
  
languages_data_frame = pandas.DataFrame(list_of_languages)

In [66]:
types = languages_data_frame['type'].value_counts()
types

type
programming    469
data           159
markup          60
prose           18
Name: count, dtype: int64

In [67]:
sorted_languages_data_frame = languages_data_frame.sort_values(by=["type", "ace_mode", "tm_scope", "language"])
sorted_languages_data_frame.to_csv("./csv/all-languages.csv", index=False, header=True)
sorted_languages_data_frame

Unnamed: 0,type,color,extensions,tm_scope,ace_mode,language_id,language,aliases,codemirror_mode,codemirror_mime_type,interpreters,group,filenames,wrap,fs_name,searchable
28,data,#d12127,"[.apacheconf, .vhost]",source.apache-config,apache_conf,16,ApacheConf,"[aconf, apache]",,,,,"[.htaccess, apache2.conf, httpd.conf]",,,
68,data,,[.c-objdump],objdump.x86asm,assembly_x86,44,C-ObjDump,,,,,,,,,
113,data,,"[.cppobjdump, .c++-objdump, .c++objdump, .cpp-...",objdump.x86asm,assembly_x86,70,Cpp-ObjDump,[c++-objdump],,,,,,,,
126,data,,[.d-objdump],objdump.x86asm,assembly_x86,81,D-ObjDump,,,,,,,,,
409,data,,[.objdump],objdump.x86asm,assembly_x86,256,ObjDump,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
377,prose,,[.muse],text.muse,text,474864066,Muse,"[amusewiki, emacs muse]",,,,,,True,,
701,prose,#141414,"[.rst, .rest, .rest.txt, .rst.txt]",text.restructuredtext,text,419,reStructuredText,[rst],rst,text/x-rst,,,,True,,
604,prose,,"[.texinfo, .texi, .txi]",text.texinfo,text,988020015,Texinfo,,,,[makeinfo],,,True,,
630,prose,#199f4b,[.txt],text.vim-help,text,508563686,Vim Help File,"[help, vimhelp]",,,,,,,,


In [68]:
minified_languages_data_frame = sorted_languages_data_frame[["type", "color", "ace_mode", "tm_scope", "language"]]
minified_languages_data_frame

Unnamed: 0,type,color,ace_mode,tm_scope,language
28,data,#d12127,apache_conf,source.apache-config,ApacheConf
68,data,,assembly_x86,objdump.x86asm,C-ObjDump
113,data,,assembly_x86,objdump.x86asm,Cpp-ObjDump
126,data,,assembly_x86,objdump.x86asm,D-ObjDump
409,data,,assembly_x86,objdump.x86asm,ObjDump
...,...,...,...,...,...
377,prose,,text,text.muse,Muse
701,prose,#141414,text,text.restructuredtext,reStructuredText
604,prose,,text,text.texinfo,Texinfo
630,prose,#199f4b,text,text.vim-help,Vim Help File


In [69]:
colored_languages_data_frame = minified_languages_data_frame[minified_languages_data_frame['color'].notnull()]
colored_languages_data_frame.to_csv("./csv/colored-languages.csv", index=False, header=True)

In [70]:
programming_languages_data_frame = minified_languages_data_frame[minified_languages_data_frame['type'] == 'programming']
programming_languages_data_frame.to_csv("./csv/programming-languages.csv", index=False, header=True)

In [71]:
data_languages_data_frame = minified_languages_data_frame[minified_languages_data_frame['type'] == 'data']
data_languages_data_frame.to_csv("./csv/data-languages.csv", index=False, header=True)

In [72]:
markup_languages_data_frame = minified_languages_data_frame[minified_languages_data_frame['type'] == 'markup']
markup_languages_data_frame.to_csv("./csv/markup-languages.csv", index=False, header=True)

In [73]:
languages_json = json.dumps(list_of_languages, indent=2)
languages_json_file = open("./json/languages.json", "w+")
languages_json_file.write(languages_json)
languages_json

'[\n  {\n    "type": "programming",\n    "color": "#814CCC",\n    "extensions": [\n      ".bsl",\n      ".os"\n    ],\n    "tm_scope": "source.bsl",\n    "ace_mode": "text",\n    "language_id": 0,\n    "language": "1C Enterprise"\n  },\n  {\n    "type": "data",\n    "color": "#38761D",\n    "extensions": [\n      ".2da"\n    ],\n    "tm_scope": "source.2da",\n    "ace_mode": "text",\n    "language_id": 387204628,\n    "language": "2-Dimensional Array"\n  },\n  {\n    "type": "programming",\n    "color": "#004289",\n    "extensions": [\n      ".4dm"\n    ],\n    "tm_scope": "source.4dm",\n    "ace_mode": "text",\n    "language_id": 577529595,\n    "language": "4D"\n  },\n  {\n    "type": "programming",\n    "color": "#E8274B",\n    "extensions": [\n      ".abap"\n    ],\n    "tm_scope": "source.abap",\n    "ace_mode": "abap",\n    "language_id": 1,\n    "language": "ABAP"\n  },\n  {\n    "type": "programming",\n    "color": "#555e25",\n    "extensions": [\n      ".asddls"\n    ],\n    "