# Survey analysis
Analyse the results from the [languages survey](https://forms.gle/5b3mZRVcgAsoNG1FA)

In [62]:
all_langs = ['Python', 'Java', 'JavaScript', 'TypeScript', 'PHP', 'SQL', 'C', 'C++', 'C#',
             'Ruby', 'R', 'Matlab', 'Go', 'Rust', 'Objective-C', 'Swift', 'Visual Basic',
             'Perl', 'Cobol', 'Fortran', 'Lisp', 'Assembly', 'Kotlin', 'Dart', 'Scala',
             'Lua', 'Delphi', 'Haskell', 'Julia', 'Clojure', 'Elixir', 'Pascal']

## Load data

In [63]:
import csv
from pprint import pprint

with open('../data/Survey-data.csv') as file:
    file.readline()  # Ignore first line (header)
    reader = csv.DictReader(file, fieldnames=('timestamp', 'languages', 'years'))  # list of dicts with keys passed into fieldnames
    responses: list[list[str]] = [
        [lang for lang in response['languages'].split(';') if lang]
        for response in reader
    ]

print(len(responses))
num_responses = len(responses)
responses

49


[['Python', 'Java', 'JavaScript', 'SQL', 'C', 'Ruby', 'R', 'Objective-C'],
 ['Python', 'JavaScript', 'SQL', 'R', 'Visual Basic'],
 ['Python', 'JavaScript', 'C', 'C++', 'Matlab'],
 ['Python'],
 ['Java'],
 ['Python', 'SQL'],
 ['Python', 'Java', 'Go', 'Perl', 'Elixir'],
 ['Python', 'Matlab', 'Perl', 'Lisp', 'Assembly'],
 ['Python', 'JavaScript', 'PHP', 'SQL', 'Go'],
 ['Python', 'Java', 'JavaScript', 'TypeScript'],
 ['Python'],
 ['Python', 'C', 'C++', 'Matlab'],
 ['Python',
  'Java',
  'JavaScript',
  'C',
  'C++',
  'Ruby',
  'R',
  'Go',
  'Perl',
  'Fortran',
  'Lisp',
  'Assembly',
  'Lua',
  'Haskell',
  'Pascal'],
 ['Python', 'Java', 'C++', 'C#', 'Go', 'Perl', 'Fortran', 'Assembly', 'Scala'],
 ['Python',
  'Java',
  'SQL',
  'C',
  'C++',
  'C#',
  'Visual Basic',
  'Fortran',
  'Lisp',
  'Pascal'],
 ['SQL', 'Cobol'],
 ['Python', 'SQL', 'R'],
 ['Python', 'Java', 'C', 'Lua'],
 ['SQL'],
 ['JavaScript'],
 ['Python', 'Java'],
 ['Python', 'Java', 'JavaScript', 'SQL', 'C'],
 ['Python', 'Ja

## Find number of languages known
Print `"{# known by class} / {# in list} languages known by this class (as %)"`.

E.g. **12/21 languages known by this class (57%)**

In [64]:
langs_known = {lang for response in responses for lang in response}

print(f"{len(langs_known)} / {len(all_langs)} languages known by this class ({len(langs_known)/len(all_langs) * 100:.0f}%)")

26 / 32 languages known by this class (81%)


## List languages not known by anyone in the class

In [65]:
not_known = set(all_langs) - langs_known
not_known = list(not_known)
not_known.sort()
print(not_known)

['Clojure', 'Dart', 'Julia', 'Kotlin', 'Rust', 'Swift']


## Rank languages by most commonly known
Print each language as `"{position}: {language} ({percent_known}%)"`, in order from most to least known

e.g. **1: Python (93%)**

In [66]:
# Count responses
response_langs = [lang for response in responses for lang in response]
lang_counts: dict[str, int] = {lang: response_langs.count(lang) for lang in sorted(all_langs)}

lang_counts

{'Assembly': 3,
 'C': 17,
 'C#': 6,
 'C++': 14,
 'Clojure': 0,
 'Cobol': 2,
 'Dart': 0,
 'Delphi': 1,
 'Elixir': 1,
 'Fortran': 4,
 'Go': 6,
 'Haskell': 2,
 'Java': 20,
 'JavaScript': 16,
 'Julia': 0,
 'Kotlin': 0,
 'Lisp': 4,
 'Lua': 2,
 'Matlab': 4,
 'Objective-C': 1,
 'PHP': 6,
 'Pascal': 6,
 'Perl': 11,
 'Python': 40,
 'R': 7,
 'Ruby': 4,
 'Rust': 0,
 'SQL': 22,
 'Scala': 1,
 'Swift': 0,
 'TypeScript': 4,
 'Visual Basic': 6}

In [67]:
def to_percent(num):
    return round(num / num_responses * 100, 1)

lang_percent_list: list[tuple[str, float]] = [
    (lang, to_percent(count))
    for lang, count in lang_counts.items()
]

lang_percent_list.sort(key=lambda t: t[1], reverse=True)
lang_percent_list

[('Python', 81.6),
 ('SQL', 44.9),
 ('Java', 40.8),
 ('C', 34.7),
 ('JavaScript', 32.7),
 ('C++', 28.6),
 ('Perl', 22.4),
 ('R', 14.3),
 ('C#', 12.2),
 ('Go', 12.2),
 ('PHP', 12.2),
 ('Pascal', 12.2),
 ('Visual Basic', 12.2),
 ('Fortran', 8.2),
 ('Lisp', 8.2),
 ('Matlab', 8.2),
 ('Ruby', 8.2),
 ('TypeScript', 8.2),
 ('Assembly', 6.1),
 ('Cobol', 4.1),
 ('Haskell', 4.1),
 ('Lua', 4.1),
 ('Delphi', 2.0),
 ('Elixir', 2.0),
 ('Objective-C', 2.0),
 ('Scala', 2.0),
 ('Clojure', 0.0),
 ('Dart', 0.0),
 ('Julia', 0.0),
 ('Kotlin', 0.0),
 ('Rust', 0.0),
 ('Swift', 0.0)]

In [68]:
for pos, (lang, percent) in enumerate(lang_percent_list, start=1):
    print(f"{pos}: {lang} ({percent:.0f}%)")

1: Python (82%)
2: SQL (45%)
3: Java (41%)
4: C (35%)
5: JavaScript (33%)
6: C++ (29%)
7: Perl (22%)
8: R (14%)
9: C# (12%)
10: Go (12%)
11: PHP (12%)
12: Pascal (12%)
13: Visual Basic (12%)
14: Fortran (8%)
15: Lisp (8%)
16: Matlab (8%)
17: Ruby (8%)
18: TypeScript (8%)
19: Assembly (6%)
20: Cobol (4%)
21: Haskell (4%)
22: Lua (4%)
23: Delphi (2%)
24: Elixir (2%)
25: Objective-C (2%)
26: Scala (2%)
27: Clojure (0%)
28: Dart (0%)
29: Julia (0%)
30: Kotlin (0%)
31: Rust (0%)
32: Swift (0%)
