In [2]:
import pytest
import ipytest
import json

ipytest.autoconfig()

In [3]:
delimiter = ','
def unwrap(str_):
    return str_.split(delimiter)

def inverse_dict(dict_):
    _dict = {}
    
    for key, values in dict_.items():
        for value in values:
            _dict[str(value)] = unwrap(key)
    
    return _dict

def flatten(l):
    return [item for sublist in l for item in sublist]

def json_prettify(json_):
    return json.dumps(json_, indent=4);

In [4]:
%%ipytest

# define the tests
def test_unwrap():
    assert unwrap('1,2,3') == ['1','2','3']

def test_inverse_dict():
    assert inverse_dict(
        {'a': [1, 2], 'b': [3, 4]}
    ) == {
            '1': ['a'], 
            '2': ['a'],
            '3': ['b'],
            '4': ['b']
         }
    
def test_inverse_dict():
    assert inverse_dict(
        {'a': [1, 2], 'b': [3, 4]}
    ) == {
            '1': ['a'], 
            '2': ['a'],
            '3': ['b'],
            '4': ['b']
         }

def test_flatten():
    assert flatten([[1, 2], [3, 4]]) == [1, 2, 3, 4]
    
def test_json_prettify():
    assert json_prettify({'a': 1, 'b': 2}) == '{\n    "a": 1,\n    "b": 2\n}'

[32m.[0m[32m.[0m[32m.[0m[32m.[0m[32m                                                                                         [100%][0m
[32m[32m[1m4 passed[0m[32m in 0.01s[0m[0m


In [7]:
import json

with open('./language_applications.json') as f:
    languages = json.load(f)

print(json_prettify(languages))

{
    "Python": [
        "Web development",
        "scientific computing",
        "data analysis",
        "artificial intelligence",
        "automation",
        "machine learning"
    ],
    "Java": [
        "Enterprise software development",
        "Android app development",
        "web development",
        "big data processing",
        "scientific computing",
        "gaming"
    ],
    "JavaScript": [
        "Web development",
        "front-end development",
        "server-side development",
        "desktop applications",
        "mobile applications",
        "game development"
    ],
    "C++": [
        "Operating systems",
        "gaming",
        "database systems",
        "compilers",
        "finance",
        "scientific computing",
        "artificial intelligence",
        "robotics"
    ],
    "C#": [
        "Windows desktop applications",
        "game development",
        "web development",
        "mobile app development",
        "enterprise softwar

In [8]:
from eule import euler

# Pizza to exclusive ingredient
euler_languages = euler(languages);

print(json_prettify(euler_languages));


{
    "Erlang": [
        "telecoms",
        "messaging systems",
        "Distributed computing",
        "fault-tolerant systems"
    ],
    "Elixir,Erlang": [
        "concurrency"
    ],
    "Awk,Icon": [
        "Text processing"
    ],
    "Common Lisp,Icon,Lisp,OCaml,Objective Caml,Prolog,Scheme": [
        "symbolic computation"
    ],
    "Icon,Processing,Racket,Smalltalk": [
        "education"
    ],
    "Awk,BASIC,Bash,Groovy,Icon,Lua,Nim,PowerShell,REXX,Racket,Ruby,Tcl": [
        "scripting"
    ],
    "SAS": [
        "business intelligence",
        "Data analysis",
        "statistical modeling"
    ],
    "Ada": [
        "Aerospace and defense",
        "financial services",
        "telecommunications",
        "industrial automation"
    ],
    "Ada,Cobol": [
        "healthcare",
        "transportation"
    ],
    "C++": [
        "finance",
        "robotics",
        "Operating systems",
        "database systems"
    ],
    "C++,Parrot": [
        "compilers"

In [29]:
from collections import Counter, OrderedDict

class OrderedCounter(Counter, OrderedDict):
    pass


applications=inverse_dict(languages)

delimiter=' '

categories=list(set([category.lower() for category in flatten(list(languages.values()))]))
category_words=list(flatten([category.split(delimiter) for category in categories]))

sorted(Counter(category_words).items(), key=lambda pair: pair[1], reverse=True)


[('development', 25),
 ('programming', 18),
 ('systems', 13),
 ('and', 8),
 ('desktop', 7),
 ('app', 7),
 ('data', 7),
 ('applications', 6),
 ('computing', 6),
 ('processing', 5),
 ('distributed', 4),
 ('database', 3),
 ('mobile', 3),
 ('web', 3),
 ('software', 3),
 ('analysis', 3),
 ('server-side', 3),
 ('game', 2),
 ('simple', 2),
 ('cross-platform', 2),
 ('windows', 2),
 ('application', 2),
 ('statistical', 2),
 ('networking', 2),
 ('ios', 2),
 ('android', 2),
 ('intelligence', 2),
 ('scripting', 2),
 ('visualization', 2),
 ('dynamic', 2),
 ('languages', 2),
 ('real-time', 2),
 ('scientific', 2),
 ('engineering', 2),
 ('business', 2),
 ('front-end', 2),
 ('management', 2),
 ('language', 2),
 ('concurrent', 2),
 ('automation', 2),
 ('numerical', 2),
 ('graphical', 2),
 ('administration', 2),
 ('embedded', 1),
 ('aerospace', 1),
 ('defense', 1),
 ('planning', 1),
 ('healthcare', 1),
 ('virtual', 1),
 ('machines', 1),
 ('logic', 1),
 ('education', 1),
 ('security', 1),
 ('bioinformatic

In [37]:
category_words=list(set(category_words))

word_to_categories=dict(map(lambda word: [word, []], category_words))

for word in category_words:
    for category in categories:
        if(word in category):
            word_to_categories[word].append(category)
            
print(json_prettify(word_to_categories))

{
    "machines": [
        "virtual machines"
    ],
    "high-performance": [
        "high-performance computing"
    ],
    "sound": [
        "sound and music programming"
    ],
    "hardware": [
        "hardware description language"
    ],
    "app": [
        "mobile applications",
        "windows desktop application development",
        "windows desktop applications",
        "mobile app development for ios and android",
        "desktop applications",
        "cross-platform desktop and web application development",
        "scientific and engineering applications",
        "business applications",
        "apple watch app development",
        "desktop app development",
        "android app development",
        "ios app development",
        "real-time applications",
        "macos app development",
        "mobile app development"
    ],
    "healthcare": [
        "healthcare"
    ],
    "domain-specific": [
        "domain-specific languages"
    ],
    "symbolic": [