# Survey analysis
Analyse the results from the [languages survey](https://forms.gle/5b3mZRVcgAsoNG1FA)

In [1]:
all_langs = ['Python', 'Java', 'JavaScript', 'TypeScript', 'PHP', 'SQL', 'C', 'C++', 'C#',
             'Ruby', 'R', 'Matlab', 'Go', 'Rust', 'Objective-C', 'Swift', 'Visual Basic',
             'Perl', 'Cobol', 'Fortran', 'Lisp', 'Assembly', 'Kotlin', 'Dart', 'Scala',
             'Lua', 'Delphi', 'Haskell', 'Julia', 'Clojure', 'Elixir', 'Pascal']

## Load data

In [2]:
import csv
from pprint import pprint

with open('../data/Programming language survey.csv') as file:
    file.readline()
    reader = csv.DictReader(file, fieldnames=['timestamp', 'langs', 'years'])
    languages_known_by_person = [line['langs'].split(', ') for line in reader]

print(len(languages_known_by_person))
print(languages_known_by_person[0])

77
['Python']


## Find number of languages known
Print `"{# known by class} / {# in list} languages known by this class (as %)"`.

E.g. **12/21 languages known by this class (57%)**

In [3]:
langs_known = {lang for langs in languages_known_by_person for lang in langs}
    
print(f"{len(langs_known)} / {len(all_langs)} languages known by this class (as {round(len(langs_known) / len(all_langs) * 100)}%)")

31 / 32 languages known by this class (as 97%)


## List languages not known by anyone in the class

In [4]:
set(all_langs) - langs_known

{'Scala'}

## Rank languages by most commonly known
Print each language as `"{position}: {language} ({percent_known}%)"`, in order from most to least known

e.g. **1: Python (93%)**

In [5]:
from collections import Counter
langs_count = Counter()

for langs in languages_known_by_person:
    langs_count.update(langs)
        
print(langs_count)

Counter({'Python': 63, 'SQL': 41, 'JavaScript': 36, 'Java': 36, 'C': 30, 'C++': 23, 'Visual Basic': 19, 'Perl': 15, 'Pascal': 15, 'Assembly': 13, 'Fortran': 10, 'PHP': 10, 'TypeScript': 9, 'C#': 9, 'R': 8, 'Go': 8, 'Swift': 5, 'Ruby': 5, 'Lisp': 5, 'Cobol': 5, 'Delphi': 5, 'Clojure': 4, 'Objective-C': 3, 'Matlab': 2, 'Julia': 2, 'Kotlin': 2, 'Rust': 1, 'Dart': 1, 'Lua': 1, 'Haskell': 1, 'Elixir': 1})


In [6]:
num_people = len(languages_known_by_person)
for pos, (lang, count) in enumerate(langs_count.most_common(), start=1):
    print(f"{pos}: {lang} ({round(count / num_people * 100)}%)")

1: Python (82%)
2: SQL (53%)
3: JavaScript (47%)
4: Java (47%)
5: C (39%)
6: C++ (30%)
7: Visual Basic (25%)
8: Perl (19%)
9: Pascal (19%)
10: Assembly (17%)
11: Fortran (13%)
12: PHP (13%)
13: TypeScript (12%)
14: C# (12%)
15: R (10%)
16: Go (10%)
17: Swift (6%)
18: Ruby (6%)
19: Lisp (6%)
20: Cobol (6%)
21: Delphi (6%)
22: Clojure (5%)
23: Objective-C (4%)
24: Matlab (3%)
25: Julia (3%)
26: Kotlin (3%)
27: Rust (1%)
28: Dart (1%)
29: Lua (1%)
30: Haskell (1%)
31: Elixir (1%)
