# Survey analysis
Analyse the results from the [languages survey](https://forms.gle/5b3mZRVcgAsoNG1FA)

In [1]:
all_langs = ['Python', 'Java', 'JavaScript', 'TypeScript', 'PHP', 'SQL', 'C', 'C++', 'C#',
             'Ruby', 'R', 'Matlab', 'Go', 'Rust', 'Objective-C', 'Swift', 'Visual Basic',
             'Perl', 'Cobol', 'Fortran', 'Lisp', 'Assembly', 'Kotlin', 'Dart', 'Scala',
             'Lua', 'Delphi', 'Haskell', 'Julia', 'Clojure', 'Elixir', 'Pascal']

## Load data

In [2]:
import csv

with open('data/Survey-2023:11:02.csv') as file:
    file.readline()
    reader = csv.DictReader(file, fieldnames=("timestamp", "languages", "years"))
    responses = [response['languages'].split(';') for response in reader]
    num_responses = len(responses)
    langs_known = [lang for langs in responses for lang in langs]

print(num_responses)

72
['Python', 'Java', 'JavaScript', 'PHP', 'SQL', 'C', 'Ruby', 'Go', 'Objective-C', 'Perl', 'Python', 'JavaScript', 'SQL', 'Python', 'SQL', 'C', 'Python', 'Java', 'JavaScript', 'TypeScript', 'SQL', 'C#', 'Ruby', 'Go', 'Rust', 'Visual Basic', 'Perl', 'Lisp', 'Assembly', 'Scala', 'Lua', 'Haskell', 'Clojure', 'Python', 'Ruby', 'Go', 'Python', 'Fortran', 'Python', 'Ruby', 'Go', 'Python', 'Python', 'JavaScript', 'SQL', 'Ruby', 'Python', 'Python', 'Java', 'JavaScript', 'TypeScript', 'PHP', 'SQL', 'C', 'C++', 'C#', 'R', 'Matlab', 'Objective-C', 'Swift', 'Visual Basic', 'Perl', 'Assembly', 'Scala', 'Delphi', 'Pascal', 'Python', 'Python', 'R', 'Matlab', 'Python', 'Ruby']


## Find number of languages known
Print `"{# known by class} / {# in list} languages known by this class (as %)"`.

E.g. **12/21 languages known by this class (57%)**

In [3]:
langs_known_set = set(langs_known)
print(langs_known_set)

27 / 32 languages known by this class (84%)


In [None]:
print(f"{len(langs_known_set)} / {len(all_langs)} languages known by this class ({round(len(langs_known_set) / len(all_langs) * 100)}%)")

## List languages not known by anyone in the class

In [4]:
not_known = set(all_langs) - langs_known_set
not_known_list = list(not_known)
not_known_list.sort()
print(not_known_list)

{'Julia', 'Dart', 'Elixir', 'Cobol', 'Kotlin'}


## Rank languages by most commonly known
Print each language as `"{position}: {language} ({percent_known}%)"`, in order from most to least known

e.g. **1: Python (93%)**

In [5]:
from collections import Counter

langs_count = Counter(langs_known)

for pos, (lang, count) in enumerate(langs_count.most_common(), start=1):
    percent_known = round(count / num_responses * 100)
    print(f"{pos}: {lang} ({percent_known}%)")