**Purpose**: Analyze the instructor survey

In [1]:
import base64
import matplotlib.pyplot as plt
import pandas as pd
import yaml

from dateutil import relativedelta

In [2]:
url = "https://docs.google.com/spreadsheets/d/1DUC7yKxMJRHFfKYn8aWLYdenAWdRsG6c0xvIQVW_KD0/export?format=csv&gid=1780897126"

In [3]:
df = (pd.read_csv(url)
    .drop([
        'Your most recent training event was within the past...',
        'Which other training materials did you use?',
        'What did you need to prepare, ahead of these events? (E.g. materials, websites, servers, certificates)',
        'How long did you need to prepare for these events? Which portions took the most time?',
        'What did you like about the preparation process?',
        'What could have been easier or better?',
        'Which tutorial(s) did you use? (No need for an exhaustive list.)',
        'What did you like about them?',
        'What did you not like about them or what could be improved?',
        'What hinders you to contribute? What could we improve?',
        'Any comments?'], axis=1)
    .rename(columns= {
        'Did you use Galaxy for teaching or training in the last 3 years?': 'Use in last 3 years',
        'How many events (approximately)?': 'Number of training events',
        'Did you use GTN training materials?': 'GTN usage',
        'For how many of those events did you use the materials from the Galaxy Training Network?': 'GTN events',
        'How would you rate these materials?': 'GTN rate',
        'Would you recommend them to others?': 'GTN recommendation',
        'Which features did you use?': 'Features',
        'Which Galaxy servers(s) have you already used for training?': 'Servers',
        'Have you ever used TIaaS (Training Infrastructure as a Service, https://galaxyproject.eu/tiaas)?': 'TiaaS usage',
        'How did you like it?': 'TiaaS rate',
        'Would you recommend it to other trainers?': 'TiaaS recommendation',
        'Have you already contributed to GTN material?': 'GTN contributor',
        'If no, do you plan to do so?': 'GTN future contributor'})
     .replace("Yes", True)
     .replace("No", False)
     .fillna(0))
df['Number of training events'] = df['Number of training events'].astype(int)
df['GTN events'] = df['GTN events'].astype(int)
df['GTN rate'] = df['GTN rate'].astype(int)
df['GTN recommendation'] = df['GTN recommendation'].astype(int)
df['TiaaS rate'] = df['TiaaS rate'].astype(int)
df['TiaaS recommendation'] = df['TiaaS recommendation'].astype(int)
df['GTN contributor'] = df['GTN contributor'].astype(bool)
df['GTN usage'] = df['GTN usage'].astype(bool)
df['TiaaS usage'] = df['TiaaS usage'].astype(bool)
df['Features'] = df['Features'].replace(0, '')
df.head()

Unnamed: 0,Tidsmerke,Use in last 3 years,Number of training events,GTN usage,GTN events,GTN rate,GTN recommendation,Features,Servers,TiaaS usage,TiaaS rate,TiaaS recommendation,GTN contributor,GTN future contributor
0,07.11.2019 kl. 13.30.19,True,1,True,2,5,5,,European Galaxy server (https://usegalaxy.eu/),True,5,5,True,0
1,07.11.2019 kl. 16.25.57,True,25,True,25,5,5,,European Galaxy server (https://usegalaxy.eu/),True,5,5,True,0
2,21.11.2019 kl. 17.48.58,True,5,True,3,4,4,Hands-on tutorials,European Galaxy server (https://usegalaxy.eu/)...,True,5,5,True,0
3,29.11.2019 kl. 09.16.54,True,3,True,3,5,5,Hands-on tutorials,European Galaxy server (https://usegalaxy.eu/),True,5,5,True,0
4,10.01.2020 kl. 13.08.57,True,10,True,10,4,4,"Hands-on tutorials, Slide decks",Private Server,False,0,0,True,0


Number of answers

In [4]:
len(df)

33

# Use Galaxy for teaching or training in the last 3 year

Percentage that use Galaxy for teaching or training in the last 3 years

In [5]:
did_training_df = df[df['Use in last 3 years']]
len(did_training_df) / len(df) * 100

87.87878787878788

Percentage that gave 1 training session in the last 3 year

In [6]:
sum(did_training_df['Number of training events'] <= 1) / len(did_training_df) * 100

24.137931034482758

Percentage that gave >10 training session in the last 3 year

In [7]:
sum(did_training_df['Number of training events'] >= 10) / len(did_training_df) * 100

34.48275862068966

Percentage that gave >25 training session in the last 3 year

In [8]:
sum(did_training_df['Number of training events'] >= 25) / len(did_training_df) * 100

10.344827586206897

# Use of GTN

In [9]:
use_gtn = did_training_df[did_training_df['GTN usage']]

Percentage of use of GTN

In [10]:
len(use_gtn) / len(did_training_df) * 100

79.3103448275862

Rate

In [11]:
use_gtn['GTN rate']

0     5
1     5
2     4
3     5
4     4
5     5
6     4
7     5
8     4
9     5
12    5
13    5
16    5
17    5
19    5
20    5
23    5
24    5
25    5
26    5
29    5
30    5
32    5
Name: GTN rate, dtype: int64

Mean rate

In [12]:
use_gtn['GTN rate'].mean()

4.826086956521739

Percentage that gave 5 star for recommendation

In [13]:
sum(use_gtn['GTN recommendation'] >= 5) / len(use_gtn) * 100

91.30434782608695

In [14]:
use_gtn_features = use_gtn['Features'][use_gtn['Features'] != '']
use_gtn_features

2                                    Hands-on tutorials
3                                    Hands-on tutorials
4                       Hands-on tutorials, Slide decks
5     Hands-on tutorials, Slide decks, List of tools...
6     Hands-on tutorials, Slide decks, List of tools...
7     Hands-on tutorials, Slide decks, Data librarie...
8     Hands-on tutorials, Slide decks, Data librarie...
9                                    Hands-on tutorials
12    Hands-on tutorials, Slide decks, Data librarie...
13    Hands-on tutorials, Slide decks, List of tools...
16                 Hands-on tutorials, Slide decks, FAQ
17    Hands-on tutorials, Slide decks, Training Phil...
19    Hands-on tutorials, Slide decks, Data librarie...
20    Hands-on tutorials, Material on teaching and h...
23                 Hands-on tutorials, Slide decks, FAQ
24    Hands-on tutorials, Slide decks, List of tools...
25                      Hands-on tutorials, Slide decks
26                      Hands-on tutorials, Slid

Percentage that uses Hands-on tutorials

In [15]:
sum(use_gtn_features.str.contains('Hands-on tutorial')) / len(use_gtn_features) * 100

100.0

Percentage that uses Slides

In [16]:
sum(use_gtn_features.str.contains('Slide')) / len(use_gtn_features) * 100

71.42857142857143

Percentage that uses tools

In [17]:
sum(use_gtn_features.str.contains('tools')) / len(use_gtn_features) * 100

28.57142857142857

Percentage that uses data

In [18]:
sum(use_gtn_features.str.contains('Data libraries')) / len(use_gtn_features) * 100

38.095238095238095

Percentage that uses workflows

In [19]:
sum(use_gtn_features.str.contains('workflows')) / len(use_gtn_features) * 100

4.761904761904762

Percentage that uses "Material on teaching and hosting Galaxy training"

In [20]:
sum(use_gtn_features.str.contains('Material on teaching and hosting Galaxy training')) / len(use_gtn_features) * 100

23.809523809523807

Percentage that uses "Training Philosophies"

In [21]:
sum(use_gtn_features.str.contains('Training Philosophies')) / len(use_gtn_features) * 100

9.523809523809524

# Use of TIaaS

In [22]:
use_tiaas = did_training_df[did_training_df['TiaaS usage']]

Percentage of use of TIaaS

In [23]:
len(use_tiaas) / len(did_training_df) * 100

58.620689655172406

Rate

In [24]:
use_tiaas['TiaaS rate']

0     5
1     5
2     5
3     5
6     5
7     5
12    5
17    5
19    5
20    5
23    5
24    5
25    5
26    5
29    5
30    5
31    3
Name: TiaaS rate, dtype: int64

Percentage that gave 5 star for recommendation

In [25]:
sum(use_tiaas['TiaaS recommendation'] >= 5) / len(use_tiaas) * 100

94.11764705882352

# Contributors

Percentage of contributors

In [26]:
sum(did_training_df['GTN contributor'])  / len(did_training_df) * 100

68.96551724137932

In [32]:
not_gtn_contributors = did_training_df[~did_training_df['GTN contributor']]
not_gtn_contributors

Unnamed: 0,Tidsmerke,Use in last 3 years,Number of training events,GTN usage,GTN events,GTN rate,GTN recommendation,Features,Servers,TiaaS usage,TiaaS rate,TiaaS recommendation,GTN contributor,GTN future contributor
10,29.01.2020 kl. 14.25.43,True,6,False,0,0,0,,Private Server,False,0,0,False,True
11,29.01.2020 kl. 15.50.10,True,3,False,0,0,0,,Private Server,False,0,0,False,True
12,30.01.2020 kl. 07.47.29,True,25,True,20,5,5,"Hands-on tutorials, Slide decks, Data librarie...",Australian Galaxy server (https://usegalaxy.or...,True,5,5,False,True
14,02.02.2020 kl. 16.50.36,True,2,False,0,0,0,,European Galaxy server (https://usegalaxy.eu/)...,False,0,0,False,True
17,10.02.2020 kl. 12.54.05,True,7,True,3,5,5,"Hands-on tutorials, Slide decks, Training Phil...",European Galaxy server (https://usegalaxy.eu/)...,True,5,5,False,False
20,18.02.2021 kl. 16.46.25,True,3,True,3,5,5,"Hands-on tutorials, Material on teaching and h...",European Galaxy server (https://usegalaxy.eu/)...,True,5,5,False,False
27,30.09.2021 kl. 15.47.55,True,1,False,0,0,0,,Private Server,False,0,0,False,True
28,30.09.2021 kl. 16.43.01,True,0,False,0,0,0,,"Main Galaxy server (https://usegalaxy.org/), E...",False,0,0,False,False
31,18.10.2021 kl. 12.09.53,True,1,False,0,0,0,,European Galaxy server (https://usegalaxy.eu/),True,3,3,False,False


In [31]:
sum(not_gtn_contributors['GTN future contributor'])  / len(not_gtn_contributors) * 100

55.55555555555556