# Survey analysis
Analyse the results from the [languages survey](https://forms.gle/5b3mZRVcgAsoNG1FA)

In [1]:
all_langs = ['Python', 'Java', 'JavaScript', 'TypeScript', 'PHP', 'SQL', 'C', 'C++', 'C#',
             'Ruby', 'R', 'Matlab', 'Go', 'Rust', 'Objective-C', 'Swift', 'Visual Basic',
             'Perl', 'Cobol', 'Fortran', 'Lisp', 'Assembly', 'Kotlin', 'Dart', 'Scala',
             'Lua', 'Delphi', 'Haskell', 'Julia', 'Clojure', 'Elixir', 'Pascal']

## Load data

In [2]:
import csv
from pprint import pprint

lang_responses = []
num_responses = 0
with open('data/Survey-2024-Jan.csv') as file:
    file.readline()  # Ignore first line (header)
    reader = csv.DictReader(file, fieldnames=('timestamp', 'languages', 'years'))  # list of dicts with keys passed into fieldnames
    for response in reader:
        langs = response['languages'].split(';')
        lang_responses += langs
        num_responses += 1

print(len(lang_responses))
print(num_responses)


105
25


## Find number of languages known
Print `"{# known by class} / {# in list} languages known by this class"`.

E.g. **12/21 languages known by this class**

In [3]:
langs_known = set(lang_responses)

print(f"{len(langs_known)} / {len(all_langs)} languages known by this class")

24 / 32 languages known by this class


## List languages not known by anyone in the class

In [4]:
not_known = set(all_langs) - langs_known

print(sorted(not_known))
print(len(not_known))

['Clojure', 'Delphi', 'Elixir', 'Go', 'Haskell', 'Julia', 'Lua', 'Scala']
8


## Rank languages by most commonly known
Print each language as `"{position}: {language} ({percent_known}%)"`, in order from most to least known

e.g. **1: Python (93%)**

In [5]:
lang_counts = {}

for lang in sorted(all_langs):
    lang_counts[lang] = 0

lang_counts

{'Assembly': 0,
 'C': 0,
 'C#': 0,
 'C++': 0,
 'Clojure': 0,
 'Cobol': 0,
 'Dart': 0,
 'Delphi': 0,
 'Elixir': 0,
 'Fortran': 0,
 'Go': 0,
 'Haskell': 0,
 'Java': 0,
 'JavaScript': 0,
 'Julia': 0,
 'Kotlin': 0,
 'Lisp': 0,
 'Lua': 0,
 'Matlab': 0,
 'Objective-C': 0,
 'PHP': 0,
 'Pascal': 0,
 'Perl': 0,
 'Python': 0,
 'R': 0,
 'Ruby': 0,
 'Rust': 0,
 'SQL': 0,
 'Scala': 0,
 'Swift': 0,
 'TypeScript': 0,
 'Visual Basic': 0}

In [6]:
for lang in lang_responses:
    lang_counts[lang] += 1

lang_counts

{'Assembly': 2,
 'C': 7,
 'C#': 2,
 'C++': 7,
 'Clojure': 0,
 'Cobol': 3,
 'Dart': 1,
 'Delphi': 0,
 'Elixir': 0,
 'Fortran': 3,
 'Go': 0,
 'Haskell': 0,
 'Java': 13,
 'JavaScript': 10,
 'Julia': 0,
 'Kotlin': 1,
 'Lisp': 1,
 'Lua': 0,
 'Matlab': 1,
 'Objective-C': 1,
 'PHP': 2,
 'Pascal': 2,
 'Perl': 6,
 'Python': 17,
 'R': 3,
 'Ruby': 3,
 'Rust': 1,
 'SQL': 12,
 'Scala': 0,
 'Swift': 1,
 'TypeScript': 2,
 'Visual Basic': 4}

In [7]:
lang_counts_list = list(lang_counts.items())
lang_counts_list.sort(key=lambda l_c: -l_c[1])

In [8]:
for pos, (lang, count) in enumerate(lang_counts_list, start=1):
    print(f"{pos}: {lang} ({round(count / num_responses * 100)}%)")

1: Python (68%)
2: Java (52%)
3: SQL (48%)
4: JavaScript (40%)
5: C (28%)
6: C++ (28%)
7: Perl (24%)
8: Visual Basic (16%)
9: Cobol (12%)
10: Fortran (12%)
11: R (12%)
12: Ruby (12%)
13: Assembly (8%)
14: C# (8%)
15: PHP (8%)
16: Pascal (8%)
17: TypeScript (8%)
18: Dart (4%)
19: Kotlin (4%)
20: Lisp (4%)
21: Matlab (4%)
22: Objective-C (4%)
23: Rust (4%)
24: Swift (4%)
25: Clojure (0%)
26: Delphi (0%)
27: Elixir (0%)
28: Go (0%)
29: Haskell (0%)
30: Julia (0%)
31: Lua (0%)
32: Scala (0%)
