# Survey analysis
Analyse the results from the [languages survey](https://forms.gle/5b3mZRVcgAsoNG1FA)

In [1]:
all_langs = ['Python', 'Java', 'JavaScript', 'TypeScript', 'PHP', 'SQL', 'C', 'C++', 'C#',
             'Ruby', 'R', 'Matlab', 'Go', 'Rust', 'Objective-C', 'Swift', 'Visual Basic',
             'Perl', 'Cobol', 'Fortran', 'Lisp', 'Assembly', 'Kotlin', 'Dart', 'Scala',
             'Lua', 'Delphi', 'Haskell', 'Julia', 'Clojure', 'Elixir', 'Pascal']

## Load data

In [2]:
import csv

with open('../data/Survey-2023-Nov.csv') as file:
    file.readline()
    reader = csv.DictReader(file, fieldnames=("timestamp", "languages", "years"))
    responses = [response['languages'].split(';') for response in reader]
    num_responses = len(responses)
    langs_known = [lang for langs in responses for lang in langs]

print(num_responses)

48


## Find number of languages known
Print `"{# known by class} / {# in list} languages known by this class (as %)"`.

E.g. **12/21 languages known by this class (57%)**

In [3]:
langs_known_set = set(langs_known)
print(langs_known_set)

{'Go', 'Ruby', 'Matlab', 'Pascal', 'C#', 'Clojure', 'PHP', 'C', 'Objective-C', 'Scala', 'Haskell', 'Visual Basic', 'Kotlin', 'R', 'TypeScript', 'JavaScript', 'Fortran', 'Java', 'Delphi', 'Python', 'SQL', 'Assembly', 'Swift', 'Perl', 'C++', 'Rust', 'Lua', 'Lisp'}


In [4]:
print(f"{len(langs_known_set)} / {len(all_langs)} languages known by this class ({round(len(langs_known_set) / len(all_langs) * 100)}%)")

28 / 32 languages known by this class (88%)


## List languages not known by anyone in the class

In [5]:
not_known = set(all_langs) - langs_known_set
not_known_list = list(not_known)
not_known_list.sort()
print(not_known_list)

['Cobol', 'Dart', 'Elixir', 'Julia']


## Rank languages by most commonly known
Print each language as `"{position}: {language} ({percent_known}%)"`, in order from most to least known

e.g. **1: Python (93%)**

In [6]:
from collections import Counter

langs_count = Counter(langs_known)

for pos, (lang, count) in enumerate(langs_count.most_common(), start=1):
    percent_known = round(count / num_responses * 100)
    print(f"{pos}: {lang} ({percent_known}%)")

1: Python (85%)
2: C (44%)
3: SQL (40%)
4: Perl (33%)
5: Java (31%)
6: JavaScript (31%)
7: C++ (31%)
8: Pascal (21%)
9: Assembly (19%)
10: Matlab (17%)
11: PHP (15%)
12: Ruby (15%)
13: Go (10%)
14: C# (10%)
15: Lisp (8%)
16: Scala (8%)
17: R (8%)
18: Delphi (8%)
19: Objective-C (6%)
20: TypeScript (6%)
21: Visual Basic (6%)
22: Haskell (6%)
23: Lua (4%)
24: Rust (2%)
25: Clojure (2%)
26: Fortran (2%)
27: Swift (2%)
28: Kotlin (2%)
