In [1]:
all_langs = ['Python', 'Java', 'JavaScript', 'TypeScript', 'PHP', 'C', 'C++', 'C#',
             'Ruby', 'R', 'Matlab', 'Go', 'Rust', 'Objective-C', 'Swift',
             'Visual Basic', 'Perl', 'Cobol', 'Fortran', 'Lisp', 'Assembly']

# Load data

In [2]:
import csv
from pprint import pprint


with open('../data/Programming language survey.csv', 'r') as file:
    file.readline()
    reader = csv.DictReader(file, fieldnames=('timestamp', 'languages', 'other_langs', 'age'))
    langs_by_entry = [line['languages'].split(';') for line in reader]
    
pprint(langs_by_entry)

[['Python', 'Java', 'C', 'C++', 'R', 'Matlab', 'Rust', 'Fortran', 'Assembly'],
 ['Python', 'Java', 'PHP', 'Visual Basic'],
 ['Python',
  'Java',
  'JavaScript',
  'C',
  'C++',
  'Ruby',
  'R',
  'Objective-C',
  'Assembly']]


## Find number of languages known
Print `"{# known by class} / {# in list} languages known by this class (as %)"`.

E.g. **12/21 languages known by this class (57%)**

In [3]:
known_langs = {lang for langs in langs_by_entry for lang in langs}

percent = round(len(known_langs)/len(all_langs) * 100)
print(f"{len(known_langs)}/{len(all_langs)} languages known by this class ({percent}%)")

14/21 languages known by this class (67%)


## List languages known and not known

In [4]:
print('Not known:')
print(set(all_langs) - known_langs)

Not known:
{'Go', 'Swift', 'Perl', 'Cobol', 'Lisp', 'C#', 'TypeScript'}


# Rank languages by most commonly known
Print each language as `"{position}: {language} ({count})"`, in order from most to least known

e.g. **1: Python (30)**

In [5]:
rank_langs = {lang: 0 for lang in known_langs}

for langs in langs_by_entry:
    for lang in langs:
        rank_langs[lang] += 1

rank_langs

{'Rust': 1,
 'JavaScript': 1,
 'PHP': 1,
 'Fortran': 1,
 'Assembly': 2,
 'C++': 2,
 'Visual Basic': 1,
 'Java': 3,
 'Objective-C': 1,
 'Matlab': 1,
 'Ruby': 1,
 'Python': 3,
 'R': 2,
 'C': 2}

In [6]:
sorted_langs = sorted(list(rank_langs.items()), key=lambda tup: -tup[1])
sorted_langs

[('Java', 3),
 ('Python', 3),
 ('Assembly', 2),
 ('C++', 2),
 ('R', 2),
 ('C', 2),
 ('Rust', 1),
 ('JavaScript', 1),
 ('PHP', 1),
 ('Fortran', 1),
 ('Visual Basic', 1),
 ('Objective-C', 1),
 ('Matlab', 1),
 ('Ruby', 1)]

In [7]:
for i, (lang, count) in enumerate(sorted_langs, start=1):
    print(f"{i}: {lang} ({count})")

1: Java (3)
2: Python (3)
3: Assembly (2)
4: C++ (2)
5: R (2)
6: C (2)
7: Rust (1)
8: JavaScript (1)
9: PHP (1)
10: Fortran (1)
11: Visual Basic (1)
12: Objective-C (1)
13: Matlab (1)
14: Ruby (1)
