# Survey analysis
Analyse the results from the [languages survey](https://forms.gle/5b3mZRVcgAsoNG1FA)

In [1]:
all_langs = ['Python', 'Java', 'JavaScript', 'TypeScript', 'PHP', 'SQL', 'C', 'C++', 'C#',
             'Ruby', 'R', 'Matlab', 'Go', 'Rust', 'Objective-C', 'Swift', 'Visual Basic',
             'Perl', 'Cobol', 'Fortran', 'Lisp', 'Assembly', 'Kotlin', 'Dart', 'Scala',
             'Lua', 'Delphi', 'Haskell', 'Julia', 'Clojure', 'Elixir', 'Pascal']

## Load data

In [2]:
import csv

langs_known = []
num_people = 0

with open('../data/Survey-2024-May.csv') as file:
    file.readline()  # Ignore first line (header)
    reader = csv.DictReader(file, fieldnames=('timestamp', 'languages', 'years'))  # list of dicts with keys passed into fieldnames
    for line in reader:
        langs = line['languages'].split(';')
        langs_known.extend(langs)
        num_people += 1

print(langs_known)
print(num_people)

['Python', 'SQL', 'Python', 'C++', 'Python', 'SQL', 'C++', 'R', 'Matlab', 'Python', 'Java', 'Python', 'SQL', 'Perl', 'Python', 'Java', 'SQL', 'Python', 'Java', 'C', 'C++', 'Go', 'Cobol', 'JavaScript', 'Python', 'Python', 'Java', 'SQL', 'C', 'R', 'Python', 'Java', 'JavaScript', 'TypeScript', 'PHP', 'SQL', 'C', 'C++', 'C#', 'R', 'Go', 'Objective-C', 'Swift', 'Perl', 'Cobol', 'Assembly', 'Kotlin', 'Python', 'Java', 'JavaScript', 'PHP', 'SQL', 'C', 'C++', 'Cobol', 'Python', 'Java', 'JavaScript', 'TypeScript', 'SQL', 'Python', 'Python', 'SQL', 'Python', 'SQL', 'Python']
19


## Find number of languages known
Print `"{# known by class} / {# in list} languages known by this class (as %)"`.

E.g. **12/21 languages known by this class (57%)**

In [3]:
unique_langs_known = set(langs_known)
unique_langs_known

{'Assembly',
 'C',
 'C#',
 'C++',
 'Cobol',
 'Go',
 'Java',
 'JavaScript',
 'Kotlin',
 'Matlab',
 'Objective-C',
 'PHP',
 'Perl',
 'Python',
 'R',
 'SQL',
 'Swift',
 'TypeScript'}

In [4]:
print(f"{len(unique_langs_known)} / {len(all_langs)} languages known by this class ({len(unique_langs_known) / len(all_langs) * 100}%)")

18 / 32 languages known by this class (56.25%)


## List languages not known by anyone in the class

In [5]:
not_known = set(all_langs) - unique_langs_known
not_known

{'Clojure',
 'Dart',
 'Delphi',
 'Elixir',
 'Fortran',
 'Haskell',
 'Julia',
 'Lisp',
 'Lua',
 'Pascal',
 'Ruby',
 'Rust',
 'Scala',
 'Visual Basic'}

## Rank languages by most commonly known
Print each language as `"{position}: {language} ({percent_known}%)"`, in order from most to least known

e.g. **1: Python (93%)**

In [6]:
from pprint import pprint

langs_count = {}
for lang in langs_known:
    if lang not in langs_count:
        langs_count[lang] = 0
    langs_count[lang] += 1

pprint(langs_count)

{'Assembly': 1,
 'C': 4,
 'C#': 1,
 'C++': 5,
 'Cobol': 3,
 'Go': 2,
 'Java': 7,
 'JavaScript': 4,
 'Kotlin': 1,
 'Matlab': 1,
 'Objective-C': 1,
 'PHP': 2,
 'Perl': 2,
 'Python': 16,
 'R': 3,
 'SQL': 10,
 'Swift': 1,
 'TypeScript': 2}


In [7]:
lang_count_list = list(langs_count.items())
lang_count_list.sort(key=lambda l: l[1], reverse=True)
lang_count_list

[('Python', 16),
 ('SQL', 10),
 ('Java', 7),
 ('C++', 5),
 ('C', 4),
 ('JavaScript', 4),
 ('R', 3),
 ('Cobol', 3),
 ('Perl', 2),
 ('Go', 2),
 ('TypeScript', 2),
 ('PHP', 2),
 ('Matlab', 1),
 ('C#', 1),
 ('Objective-C', 1),
 ('Swift', 1),
 ('Assembly', 1),
 ('Kotlin', 1)]

In [8]:
for pos, (lang, count) in enumerate(lang_count_list, start=1):
    print(f"{pos}: {lang} ({round(count / num_people * 100)}%)")

1: Python (84%)
2: SQL (53%)
3: Java (37%)
4: C++ (26%)
5: C (21%)
6: JavaScript (21%)
7: R (16%)
8: Cobol (16%)
9: Perl (11%)
10: Go (11%)
11: TypeScript (11%)
12: PHP (11%)
13: Matlab (5%)
14: C# (5%)
15: Objective-C (5%)
16: Swift (5%)
17: Assembly (5%)
18: Kotlin (5%)
