# Survey analysis
Analyse the results from the [languages survey](https://forms.gle/5b3mZRVcgAsoNG1FA)

In [1]:
all_langs = ['Python', 'Java', 'JavaScript', 'TypeScript', 'PHP', 'SQL', 'C', 'C++', 'C#',
             'Ruby', 'R', 'Matlab', 'Go', 'Rust', 'Objective-C', 'Swift', 'Visual Basic',
             'Perl', 'Cobol', 'Fortran', 'Lisp', 'Assembly', 'Kotlin', 'Dart', 'Scala',
             'Lua', 'Delphi', 'Haskell', 'Julia', 'Clojure', 'Elixir', 'Pascal']

## Load data

In [2]:
import csv

with open('../data/Programming language - responses.csv') as file:
    file.readline()
    reader = csv.DictReader(file, fieldnames=['timestamp', 'languages', 'years'])
    results = [lang for line in reader for lang in line['languages'].split(", ")]

print(len(results))
print(results)

72
['Python', 'Java', 'JavaScript', 'PHP', 'SQL', 'C', 'Ruby', 'Go', 'Objective-C', 'Perl', 'Python', 'JavaScript', 'SQL', 'Python', 'SQL', 'C', 'Python', 'Java', 'JavaScript', 'TypeScript', 'SQL', 'C#', 'Ruby', 'Go', 'Rust', 'Visual Basic', 'Perl', 'Lisp', 'Assembly', 'Scala', 'Lua', 'Haskell', 'Clojure', 'Python', 'Ruby', 'Go', 'Python', 'Fortran', 'Python', 'Ruby', 'Go', 'Python', 'Python', 'JavaScript', 'SQL', 'Ruby', 'Python', 'Python', 'Java', 'JavaScript', 'TypeScript', 'PHP', 'SQL', 'C', 'C++', 'C#', 'R', 'Matlab', 'Objective-C', 'Swift', 'Visual Basic', 'Perl', 'Assembly', 'Scala', 'Delphi', 'Pascal', 'Python', 'Python', 'R', 'Matlab', 'Python', 'Ruby']


## Find number of languages known
Print `"{# known by class} / {# in list} languages known by this class (as %)"`.

E.g. **12/21 languages known by this class (57%)**

In [3]:
unique_langs = set(results)
num_known = len(unique_langs)
num_all = len(all_langs)
print(f"{num_known} / {num_all} languages known by this class ({round(num_known/num_all * 100)}%)")

27 / 32 languages known by this class (84%)


## List languages not known by anyone in the class

In [4]:
not_known = set(all_langs) - unique_langs
print(not_known)

{'Julia', 'Dart', 'Elixir', 'Cobol', 'Kotlin'}


## Rank languages by most commonly known
Print each language as `"{position}: {language} ({percent_known}%)"`, in order from most to least known

e.g. **1: Python (93%)**

In [5]:
from collections import Counter

langs_by_count = Counter(results)
langs_by_count.update(not_known)
langs_by_count.subtract(not_known)

In [6]:
for i, (lang, count) in enumerate(langs_by_count.most_common()):
    print(f"{i + 1}: {lang} ({count})")

1: Python (14)
2: SQL (6)
3: Ruby (6)
4: JavaScript (5)
5: Go (4)
6: Java (3)
7: C (3)
8: Perl (3)
9: PHP (2)
10: Objective-C (2)
11: TypeScript (2)
12: C# (2)
13: Visual Basic (2)
14: Assembly (2)
15: Scala (2)
16: R (2)
17: Matlab (2)
18: Rust (1)
19: Lisp (1)
20: Lua (1)
21: Haskell (1)
22: Clojure (1)
23: Fortran (1)
24: C++ (1)
25: Swift (1)
26: Delphi (1)
27: Pascal (1)
28: Julia (0)
29: Dart (0)
30: Elixir (0)
31: Cobol (0)
32: Kotlin (0)
