# Survey analysis
Analyse the results from the [languages survey](https://forms.gle/5b3mZRVcgAsoNG1FA)

In [1]:
all_langs = ['Python', 'Java', 'JavaScript', 'TypeScript', 'PHP', 'SQL', 'C', 'C++', 'C#',
             'Ruby', 'R', 'Matlab', 'Go', 'Rust', 'Objective-C', 'Swift', 'Visual Basic',
             'Perl', 'Cobol', 'Fortran', 'Lisp', 'Assembly', 'Kotlin', 'Dart', 'Scala',
             'Lua', 'Delphi', 'Haskell', 'Julia', 'Clojure', 'Elixir', 'Pascal']

## Load data

In [2]:
import csv

with open('../data/Survey-2024-Jul.csv') as file:
    file.readline()  # Ignore first line (header)
    reader = csv.DictReader(file, fieldnames=(
    'timestamp', 'languages', 'years'))  # list of dicts with keys passed into fieldnames
    responses = [line['languages'].split(';') for line in reader]

print(responses)
print(len(responses))

[['Python', 'Java', 'JavaScript', 'SQL', 'C', 'Ruby', 'R', 'Objective-C'], ['Python', 'JavaScript', 'SQL', 'R', 'Visual Basic'], ['Python', 'JavaScript', 'C', 'C++', 'Matlab'], ['Python'], ['Java'], ['Python', 'SQL'], ['Python', 'Java', 'Go', 'Perl', 'Elixir'], ['Python', 'Matlab', 'Perl', 'Lisp', 'Assembly'], ['Python', 'JavaScript', 'PHP', 'SQL', 'Go'], ['Python', 'Java', 'JavaScript', 'TypeScript'], ['Python'], ['Python', 'C', 'C++', 'Matlab'], ['Python', 'Java', 'JavaScript', 'C', 'C++', 'Ruby', 'R', 'Go', 'Perl', 'Fortran', 'Lisp', 'Assembly', 'Lua', 'Haskell', 'Pascal'], ['Python', 'Java', 'C++', 'C#', 'Go', 'Perl', 'Fortran', 'Assembly', 'Scala'], ['Python', 'Java', 'SQL', 'C', 'C++', 'C#', 'Visual Basic', 'Fortran', 'Lisp', 'Pascal'], ['SQL', 'Cobol'], ['Python', 'SQL', 'R'], ['Python', 'Java', 'C', 'Lua'], ['SQL'], ['JavaScript'], ['Python', 'Java'], ['Python', 'Java', 'JavaScript', 'SQL', 'C']]
22


## Find number of languages known
Print `"{# known by class} / {# in list} languages known by this class (as %)"`.

E.g. **12/21 languages known by this class (57%)**

In [3]:
langs_known = {lang for langs in responses for lang in langs}

print(
    f"{len(langs_known)} / {len(all_langs)} languages known by this class ({round(len(langs_known) / len(all_langs) * 100)}%)")

25 / 32 languages known by this class (78%)


## List languages not known by anyone in the class

In [4]:
not_known = set(all_langs) - langs_known
print("Nobody knows:")
for lang in sorted(list(not_known)):
    print("- " + lang)

Nobody knows:
- Clojure
- Dart
- Delphi
- Julia
- Kotlin
- Rust
- Swift


## Rank languages by most commonly known
Print each language as `"{position}: {language} ({percent_known}%)"`, in order from most to least known

e.g. **1: Python (93%)**

In [5]:
langs_by_count = {lang: 0 for lang in all_langs}

for response in responses:
    for lang in response:
        langs_by_count[lang] += 1

print(langs_by_count)

{'Python': 18, 'Java': 10, 'JavaScript': 8, 'TypeScript': 1, 'PHP': 1, 'SQL': 9, 'C': 7, 'C++': 5, 'C#': 2, 'Ruby': 2, 'R': 4, 'Matlab': 3, 'Go': 4, 'Rust': 0, 'Objective-C': 1, 'Swift': 0, 'Visual Basic': 2, 'Perl': 4, 'Cobol': 1, 'Fortran': 3, 'Lisp': 3, 'Assembly': 3, 'Kotlin': 0, 'Dart': 0, 'Scala': 1, 'Lua': 2, 'Delphi': 0, 'Haskell': 1, 'Julia': 0, 'Clojure': 0, 'Elixir': 1, 'Pascal': 2}


In [6]:
langs_count_list = list(langs_by_count.items())

langs_count_list.sort(key=lambda t: t[1], reverse=True)
print(langs_count_list)

[('Python', 18), ('Java', 10), ('SQL', 9), ('JavaScript', 8), ('C', 7), ('C++', 5), ('R', 4), ('Go', 4), ('Perl', 4), ('Matlab', 3), ('Fortran', 3), ('Lisp', 3), ('Assembly', 3), ('C#', 2), ('Ruby', 2), ('Visual Basic', 2), ('Lua', 2), ('Pascal', 2), ('TypeScript', 1), ('PHP', 1), ('Objective-C', 1), ('Cobol', 1), ('Scala', 1), ('Haskell', 1), ('Elixir', 1), ('Rust', 0), ('Swift', 0), ('Kotlin', 0), ('Dart', 0), ('Delphi', 0), ('Julia', 0), ('Clojure', 0)]


In [7]:
for pos, (lang, count) in enumerate(langs_count_list, start=1):
    print(f"{pos}: {lang} ({round(count / len(responses) * 100)}%)")

1: Python (82%)
2: Java (45%)
3: SQL (41%)
4: JavaScript (36%)
5: C (32%)
6: C++ (23%)
7: R (18%)
8: Go (18%)
9: Perl (18%)
10: Matlab (14%)
11: Fortran (14%)
12: Lisp (14%)
13: Assembly (14%)
14: C# (9%)
15: Ruby (9%)
16: Visual Basic (9%)
17: Lua (9%)
18: Pascal (9%)
19: TypeScript (5%)
20: PHP (5%)
21: Objective-C (5%)
22: Cobol (5%)
23: Scala (5%)
24: Haskell (5%)
25: Elixir (5%)
26: Rust (0%)
27: Swift (0%)
28: Kotlin (0%)
29: Dart (0%)
30: Delphi (0%)
31: Julia (0%)
32: Clojure (0%)
