# Survey analysis
Analyse the results from the [languages survey](https://forms.gle/5b3mZRVcgAsoNG1FA)

In [None]:
all_langs = ['Python', 'Java', 'JavaScript', 'TypeScript', 'PHP', 'SQL', 'C', 'C++', 'C#',
             'Ruby', 'R', 'Matlab', 'Go', 'Rust', 'Objective-C', 'Swift', 'Visual Basic',
             'Perl', 'Cobol', 'Fortran', 'Lisp', 'Assembly', 'Kotlin', 'Dart', 'Scala',
             'Lua', 'Delphi', 'Haskell', 'Julia', 'Clojure', 'Elixir', 'Pascal']

## Load data

In [None]:
import csv

responses = []

with open('../data/Survey-2024-Jul.csv') as file:
    file.readline()  # Ignore first line (header)
    reader = csv.DictReader(file, fieldnames=(
    'timestamp', 'languages', 'years'))  # list of dicts with keys passed into fieldnames
    for line in reader:
        langs = line['languages'].split(';')
        responses.append(langs)

print(responses)
print(len(responses))

## Find number of languages known
Print `"{# known by class} / {# in list} languages known by this class (as %)"`.

E.g. **12/21 languages known by this class (57%)**

In [None]:
langs_known = set()
for response in responses:
    langs_known.update(response)

print(
    f"{len(langs_known)} / {len(all_langs)} languages known by this class ({round(len(langs_known) / len(all_langs) * 100)}%)")

## List languages not known by anyone in the class

In [None]:
not_known = set(all_langs) - langs_known
print("Nobody knows:")
for lang in sorted(list(not_known)):
    print("- " + lang)

## Rank languages by most commonly known
Print each language as `"{position}: {language} ({percent_known}%)"`, in order from most to least known

e.g. **1: Python (93%)**

In [None]:
langs_by_count = {}
for lang in all_langs:
    langs_by_count[lang] = 0

for response in responses:
    for lang in response:
        langs_by_count[lang] += 1

print(langs_by_count)

In [None]:
langs_count_list = list(langs_by_count.items())


def tuple_sort(t):
    return t[1]


langs_count_list.sort(key=lambda t: t[1], reverse=True)
print(langs_count_list)

In [None]:
for pos, (lang, count) in enumerate(langs_count_list, start=1):
    print(f"{pos}: {lang} ({round(count / len(responses) * 100)}%)")