# Survey analysis
Analyse the results from the [languages survey](https://forms.gle/5b3mZRVcgAsoNG1FA)

In [1]:
all_langs = ['Python', 'Java', 'JavaScript', 'TypeScript', 'PHP', 'SQL', 'C', 'C++', 'C#',
             'Ruby', 'R', 'Matlab', 'Go', 'Rust', 'Objective-C', 'Swift', 'Visual Basic',
             'Perl', 'Cobol', 'Fortran', 'Lisp', 'Assembly', 'Kotlin', 'Dart', 'Scala',
             'Lua', 'Delphi', 'Haskell', 'Julia', 'Clojure', 'Elixir', 'Pascal']

## Load data

In [15]:
import csv
from pprint import pprint

with open('data/Survey-2024-Jan.csv') as file:
    file.readline()  # Ignore first line (header)
    reader = csv.DictReader(file, fieldnames=('timestamp', 'languages', 'years'))  # list of dicts with keys passed into fieldnames
    lang_responses = [
        response['languages'].split(';')
        for response in reader
    ]

pprint(lang_responses)
num_responses = len(lang_responses)
print(num_responses)


[['Python', 'SQL'],
 ['Python'],
 ['Python'],
 ['Java',
  'JavaScript',
  'SQL',
  'C',
  'C++',
  'Visual Basic',
  'Perl',
  'Cobol',
  'Fortran',
  'Assembly',
  'Pascal'],
 ['Python', 'Perl'],
 ['JavaScript', 'TypeScript', 'PHP', 'SQL', 'C++', 'Visual Basic', 'Perl'],
 ['Python', 'Java', 'C++', 'Perl'],
 ['Python',
  'Java',
  'JavaScript',
  'TypeScript',
  'PHP',
  'SQL',
  'Ruby',
  'R',
  'Objective-C',
  'Swift',
  'Perl',
  'Kotlin',
  'Dart'],
 ['Python',
  'Java',
  'JavaScript',
  'SQL',
  'C',
  'C++',
  'Matlab',
  'Visual Basic',
  'Cobol',
  'Fortran',
  'Assembly'],
 ['Java', 'JavaScript'],
 ['Python', 'Java', 'JavaScript', 'SQL', 'C', 'C++'],
 ['Python', 'SQL', 'C', 'R'],
 ['Java', 'C++'],
 ['SQL', 'R'],
 ['Python', 'Java'],
 ['Ruby', 'Fortran', 'Lisp'],
 ['Python', 'Java', 'C#', 'Ruby', 'Visual Basic', 'Cobol', 'Pascal'],
 ['SQL'],
 ['Python', 'Java', 'JavaScript', 'C', 'Rust'],
 ['Python', 'JavaScript', 'Perl'],
 ['Java', 'SQL', 'C#'],
 ['Python', 'Java'],
 ['Pytho

## Find number of languages known
Print `"{# known by class} / {# in list} languages known by this class"`.

E.g. **12/21 languages known by this class**

In [16]:
langs_known = {lang for langs in lang_responses for lang in langs}

print(f"{len(langs_known)} / {len(all_langs)} languages known by this class")

24 / 32 languages known by this class


## List languages not known by anyone in the class

In [18]:
not_known = [lang for lang in all_langs if lang not in langs_known]

print(sorted(not_known))
print(len(not_known))

['Clojure', 'Delphi', 'Elixir', 'Go', 'Haskell', 'Julia', 'Lua', 'Scala']
8


## Rank languages by most commonly known
Print each language as `"{position}: {language} ({percent_known}%)"`, in order from most to least known

e.g. **1: Python (93%)**

In [25]:
langs_flat = [lang for langs in lang_responses for lang in langs]
lang_counts = [(langs_flat.count(lang), lang) for lang in all_langs]
lang_counts

[(17, 'Python'),
 (13, 'Java'),
 (10, 'JavaScript'),
 (2, 'TypeScript'),
 (2, 'PHP'),
 (12, 'SQL'),
 (7, 'C'),
 (7, 'C++'),
 (2, 'C#'),
 (3, 'Ruby'),
 (3, 'R'),
 (1, 'Matlab'),
 (0, 'Go'),
 (1, 'Rust'),
 (1, 'Objective-C'),
 (1, 'Swift'),
 (4, 'Visual Basic'),
 (6, 'Perl'),
 (3, 'Cobol'),
 (3, 'Fortran'),
 (1, 'Lisp'),
 (2, 'Assembly'),
 (1, 'Kotlin'),
 (1, 'Dart'),
 (0, 'Scala'),
 (0, 'Lua'),
 (0, 'Delphi'),
 (0, 'Haskell'),
 (0, 'Julia'),
 (0, 'Clojure'),
 (0, 'Elixir'),
 (2, 'Pascal')]

In [28]:
lang_counts.sort(reverse=True)

In [29]:
for pos, (count, lang) in enumerate(lang_counts, start=1):
    print(f"{pos}: {lang} ({round(count / num_responses * 100)}%)")

1: Python (68%)
2: Java (52%)
3: SQL (48%)
4: JavaScript (40%)
5: C++ (28%)
6: C (28%)
7: Perl (24%)
8: Visual Basic (16%)
9: Ruby (12%)
10: R (12%)
11: Fortran (12%)
12: Cobol (12%)
13: TypeScript (8%)
14: Pascal (8%)
15: PHP (8%)
16: C# (8%)
17: Assembly (8%)
18: Swift (4%)
19: Rust (4%)
20: Objective-C (4%)
21: Matlab (4%)
22: Lisp (4%)
23: Kotlin (4%)
24: Dart (4%)
25: Scala (0%)
26: Lua (0%)
27: Julia (0%)
28: Haskell (0%)
29: Go (0%)
30: Elixir (0%)
31: Delphi (0%)
32: Clojure (0%)
