# Survey analysis
Analyse the results from the [languages survey](https://forms.gle/5b3mZRVcgAsoNG1FA)

In [1]:
all_langs = ['Python', 'Java', 'JavaScript', 'TypeScript', 'PHP', 'SQL', 'C', 'C++', 'C#',
             'Ruby', 'R', 'Matlab', 'Go', 'Rust', 'Objective-C', 'Swift', 'Visual Basic',
             'Perl', 'Cobol', 'Fortran', 'Lisp', 'Assembly', 'Kotlin', 'Dart', 'Scala',
             'Lua', 'Delphi', 'Haskell', 'Julia', 'Clojure', 'Elixir', 'Pascal']

## Load data

In [2]:
import csv

num_attendees = 0
with open('../data/Survey-2024-Jan.csv') as file:
    file.readline()  # Ignore first line (header)
    reader = csv.DictReader(file, fieldnames=('timestamp', 'languages', 'years'))  # list of dicts with keys passed into fieldnames
    langs_known = [line['languages'].split(';') for line in reader]
    num_attendees = len(langs_known)

print(langs_known)
print(num_attendees)

[['Python', 'SQL'], ['Python'], ['Python'], ['Java', 'JavaScript', 'SQL', 'C', 'C++', 'Visual Basic', 'Perl', 'Cobol', 'Fortran', 'Assembly', 'Pascal'], ['Python', 'Perl'], ['JavaScript', 'TypeScript', 'PHP', 'SQL', 'C++', 'Visual Basic', 'Perl'], ['Python', 'Java', 'C++', 'Perl'], ['Python', 'Java', 'JavaScript', 'TypeScript', 'PHP', 'SQL', 'Ruby', 'R', 'Objective-C', 'Swift', 'Perl', 'Kotlin', 'Dart'], ['Python', 'Java', 'JavaScript', 'SQL', 'C', 'C++', 'Matlab', 'Visual Basic', 'Cobol', 'Fortran', 'Assembly'], ['Java', 'JavaScript'], ['Python', 'Java', 'JavaScript', 'SQL', 'C', 'C++'], ['Python', 'SQL', 'C', 'R'], ['Java', 'C++'], ['SQL', 'R'], ['Python', 'Java'], ['Ruby', 'Fortran', 'Lisp'], ['Python', 'Java', 'C#', 'Ruby', 'Visual Basic', 'Cobol', 'Pascal'], ['SQL'], ['Python', 'Java', 'JavaScript', 'C', 'Rust'], ['Python', 'JavaScript', 'Perl'], ['Java', 'SQL', 'C#'], ['Python', 'Java'], ['Python', 'JavaScript', 'SQL', 'C'], ['Python', 'SQL', 'C', 'C++'], ['Python', 'Java', 'Java

## Find number of languages known
Print `"{# known by class} / {# in list} languages known by this class (as %)"`.

E.g. **12/21 languages known by this class (57%)**

In [3]:
unique_langs = {lang for langs in langs_known for lang in langs}
num_unique = len(unique_langs)
num_all = len(all_langs)
print(f"{num_unique} / {num_all} languages known by this class ({round(num_unique / num_all * 100)}%)")

24 / 32 languages known by this class (75%)


## List languages not known by anyone in the class

In [4]:
not_known = set(all_langs) - unique_langs
sorted_not_known = sorted(list(not_known))
print(f"Nobody knows {', '.join(sorted_not_known)}")

Nobody knows Clojure, Delphi, Elixir, Go, Haskell, Julia, Lua, Scala


## Rank languages by most commonly known
Print each language as `"{position}: {language} ({percent_known}%)"`, in order from most to least known

e.g. **1: Python (93%)**

In [5]:
flattened_langs = [lang for langs in langs_known for lang in langs]
langs_by_count = {lang: flattened_langs.count(lang) for lang in all_langs}

print(langs_by_count)

{'Python': 17, 'Java': 13, 'JavaScript': 10, 'TypeScript': 2, 'PHP': 2, 'SQL': 12, 'C': 7, 'C++': 7, 'C#': 2, 'Ruby': 3, 'R': 3, 'Matlab': 1, 'Go': 0, 'Rust': 1, 'Objective-C': 1, 'Swift': 1, 'Visual Basic': 4, 'Perl': 6, 'Cobol': 3, 'Fortran': 3, 'Lisp': 1, 'Assembly': 2, 'Kotlin': 1, 'Dart': 1, 'Scala': 0, 'Lua': 0, 'Delphi': 0, 'Haskell': 0, 'Julia': 0, 'Clojure': 0, 'Elixir': 0, 'Pascal': 2}


In [6]:
langs_by_count_list = list(langs_by_count.items())
langs_by_count_list.sort(key=lambda t: -t[1])
langs_by_count_list

[('Python', 17),
 ('Java', 13),
 ('SQL', 12),
 ('JavaScript', 10),
 ('C', 7),
 ('C++', 7),
 ('Perl', 6),
 ('Visual Basic', 4),
 ('Ruby', 3),
 ('R', 3),
 ('Cobol', 3),
 ('Fortran', 3),
 ('TypeScript', 2),
 ('PHP', 2),
 ('C#', 2),
 ('Assembly', 2),
 ('Pascal', 2),
 ('Matlab', 1),
 ('Rust', 1),
 ('Objective-C', 1),
 ('Swift', 1),
 ('Lisp', 1),
 ('Kotlin', 1),
 ('Dart', 1),
 ('Go', 0),
 ('Scala', 0),
 ('Lua', 0),
 ('Delphi', 0),
 ('Haskell', 0),
 ('Julia', 0),
 ('Clojure', 0),
 ('Elixir', 0)]

In [7]:
for pos, (lang, count) in enumerate(langs_by_count_list, start=1):
    print(f"{pos}: {lang} ({round(count / num_attendees * 100)}%)")

1: Python (68%)
2: Java (52%)
3: SQL (48%)
4: JavaScript (40%)
5: C (28%)
6: C++ (28%)
7: Perl (24%)
8: Visual Basic (16%)
9: Ruby (12%)
10: R (12%)
11: Cobol (12%)
12: Fortran (12%)
13: TypeScript (8%)
14: PHP (8%)
15: C# (8%)
16: Assembly (8%)
17: Pascal (8%)
18: Matlab (4%)
19: Rust (4%)
20: Objective-C (4%)
21: Swift (4%)
22: Lisp (4%)
23: Kotlin (4%)
24: Dart (4%)
25: Go (0%)
26: Scala (0%)
27: Lua (0%)
28: Delphi (0%)
29: Haskell (0%)
30: Julia (0%)
31: Clojure (0%)
32: Elixir (0%)
