In [1]:
import plotly.express as px
import pandas as pd
import plotly.graph_objects as go


In [97]:
# survey results with emails, processing to remove information that cannot be shared.
df = pd.read_csv("survey_results.csv")

In [98]:
df.columns

Index(['Zeitstempel',
       'I use the following databases for my scientific illustrations:',
       'Regarding BioIcons', 'Is it easy to use the current BioIcons website?',
       'If you know about BioIcons, have you ever submitted or thought about submitting illustrations using the webform or via PullRequest?',
       'I use the following tools to draw scientific illustrations or to assemble icons from BioIcons in illustrations',
       'Which proposed features do you think are most useful to you: [Extension of the icon catalog based on community requests]',
       'Which proposed features do you think are most useful to you: [Collaborative web editor]',
       'Which proposed features do you think are most useful to you: [Templating mechanism to share graphics created using BioIcons for others to reuse]',
       'Which proposed features do you think are most useful to you: [Workshops on illustration licensing and creating illustrations using BioIcons]',
       'Any software featur

In [99]:
df = df.drop(columns=['Any software features you miss in your current workflow that would help you? (in your Editor or in the Database)\r\n',
       'Have you ever used BioIcons in a publication?',
       'Please provide links to the publications, if yes in preceding question.',
       'Name', 'Affiliation', 'Email (optional)',
       'Other Comments'])

In [100]:
df.to_csv("processed_user_survey.csv")

In [4]:
df['I use the following databases for my scientific illustrations:'].value_counts()

BioIcons                                                            165
BioIcons, BioRender                                                 103
BioRender                                                            66
BioIcons, Wikimedia Commons                                          31
BioIcons, BioRender, SciDraw                                         19
                                                                   ... 
BioIcons, BioRender, Wikimedia Commons, SVGRepo                       1
BioRender, SciDraw, FreePik, Other Stocklibrary such as Vecteezy      1
Protein                                                               1
BioRender, Open ClipArt                                               1
BioRender, Adobe Stock, Other Stocklibrary such as Vecteezy           1
Name: I use the following databases for my scientific illustrations:, Length: 169, dtype: int64

In [40]:
df_split = df['I use the following databases for my scientific illustrations:'].str.split(',')

# Step 2: Explode the lists into separate rows
df_exploded = df_split.explode().str.strip()

# Step 3: Count the occurrences of each unique value
value_counts = df_exploded.value_counts().reset_index()
value_counts.columns = ['Database', 'Count']

value_counts['Database'] = value_counts.apply(lambda x: 'Other' if x['Count'] == 1 else x['Database'], axis=1)

# Step 5: Recount the occurrences including the "Other" category
final_counts = value_counts.groupby('Database')['Count'].sum().reset_index()

final_counts[final_counts['Database']!=""].sort_values(by="Count", ascending=False).to_csv("database.csv", index=False)

print("Total answers", len(df[df['I use the following databases for my scientific illustrations:'].notna()]))

Total answers 696


In [33]:

value_counts = df['Regarding BioIcons'].value_counts().reset_index()
value_counts.columns = ['Database', 'Count']

value_counts['Percentage'] = value_counts['Count'] / value_counts['Count'].sum() * 100
value_counts['Percentage'] = value_counts['Percentage'].round(2)
value_counts['Percentage'] = value_counts['Percentage'].astype(str) + '%'
value_counts.to_csv("bioicons.csv", index=False)
print("Total answers", sum(value_counts['Count']))

Total answers 755


In [42]:
value_counts = df['If you know about BioIcons, have you ever submitted or thought about submitting illustrations using the webform or via PullRequest?'].value_counts().reset_index()
value_counts.columns = ['Answer', 'Count']

value_counts['Percentage'] = value_counts['Count'] / value_counts['Count'].sum() * 100
value_counts['Percentage'] = value_counts['Percentage'].round(2)
value_counts['Percentage'] = value_counts['Percentage'].astype(str) + '%'
value_counts.to_csv("submitting_bioicons.csv", index=False)
print("Total answers", sum(value_counts['Count']))

Total answers 672


In [59]:
df_split = df['I use the following tools to draw scientific illustrations or to assemble icons from BioIcons in illustrations'].str.split(',')

# rename

rename = {
    "Affinity designer": "Affinity Designer",
    "affinity designer": "Affinity Designer",
    "Affinity designer 2": "Affinity Designer",
    "Affinity Designer": "Affinity Designer",
    "AffinityDesigner": "Affinity Designer",
    "Affinity": "Affinity Designer",
    "CorelDraw": "Corel Draw",
    "GIMP Only office":"GIMP, Only Office",
    "Draw.io and Visio": "Draw.io, Microsoft Visio",
    "GIMP Only Office": "GIMP, Only Office",
    "Gimp": "GIMP",
    "gimp":"GIMP",
    "google slides": "Google Slides",
    "Google Drawing": "Google Draw",
    "canva":"Canva",
    "Draw.io":"draw.io",
    "Krita":"KDE Krita",
    "Adobe Photoshop and Microsoft Visio": "Adobe Photoshop,Microsoft Visio"
}





# Step 2: Explode the lists into separate rows
df_exploded = df_split.explode().str.strip()
df_exploded = df_exploded.replace(rename)
df_split = df_exploded.str.split(',')
df_exploded = df_split.explode().str.strip()


# Step 3: Count the occurrences of each unique value
value_counts = df_exploded.value_counts().reset_index()
value_counts.columns = ['Tool', 'Count']



# Step 5: Recount the occurrences including the "Other" category
final_counts = value_counts.groupby('Tool')['Count'].sum().reset_index()

final_counts[final_counts['Tool']!=""].sort_values(by="Count", ascending=False).to_csv("tool.csv", index=False)

print("Total answers", len(df[df['I use the following tools to draw scientific illustrations or to assemble icons from BioIcons in illustrations'].notna()]))

Total answers 662


In [79]:
dfs = []
keys = {"Extension of the icon catalog based on community requests":"Which proposed features do you think are most useful to you: [Extension of the icon catalog based on community requests]",
"Collaborative web editor":'Which proposed features do you think are most useful to you: [Collaborative web editor]',
"Templating mechanism to share graphics created using BioIcons for others to reuse":'Which proposed features do you think are most useful to you: [Templating mechanism to share graphics created using BioIcons for others to reuse]',
"Workshops on illustration licensing and creating illustrations using BioIcons":'Which proposed features do you think are most useful to you: [Workshops on illustration licensing and creating illustrations using BioIcons]',
}

for key, value in keys.items():
        value_counts = df[value].value_counts().reset_index()
        value_counts.columns = ['Answer', 'Count']
        value_counts['Percentage'] = value_counts['Count'] / value_counts['Count'].sum() * 100
        value_counts['Percentage'] = value_counts['Percentage'].round(2)
        value_counts['Percentage'] = value_counts['Percentage'].astype(str) + '%'
        value_counts['key'] = key
        print("Total answers", key,sum(value_counts['Count']))
        dfs.append(value_counts)

dfs = pd.concat(dfs)




dfs = dfs.pivot(index='Answer', columns='key', values='Count').reset_index()

key_index =  ["Answer"]+list(keys.keys())

dfs = dfs.reindex(columns=sorted(dfs.columns, key=lambda x:key_index.index(x)))

dfs.to_csv("icon_catalog_extension.csv", index=False)


Total answers Extension of the icon catalog based on community requests 631
Total answers Collaborative web editor 621
Total answers Templating mechanism to share graphics created using BioIcons for others to reuse 627
Total answers Workshops on illustration licensing and creating illustrations using BioIcons 621


In [85]:
df['Country'][df['Country'].notna()].value_counts().to_csv("countries.csv")


In [86]:
print("Total responses", len(df['Country'][df['Country'].notna()]))

Total responses 368


In [96]:

replace = {
"研究生":"PostDoc",
"Scientist ":"academic staff",
"Government Research Entomologist":"academic staff",
"Researcher":"academic staff",
"Biotech research staff":"academic staff",
"Academic Technician":"academic staff",
"M.S. Biology Candidate":"Undergraduate",
"permanent academic staff":"academic staff",
"Temporary academic staff":"academic staff",
"Assistant professor in pharmacology":"Professor",
"Masters graduate":"Undergraduate",
"Senior Bioinformatician":"academic staff",
"research support company":"Industry",
"MSc serving as a non-military service member in the University of Helsinki.":"Undergraduate",
"Senior Scientist ":"academic staff",
"Principal Scientist, Computational Sciences and Engineering":"academic staff",
"Research Associate":"academic staff",
"master's":"Undergraduate",
"Biology Teacher":"Teacher",
"Pharma Consultant":"Industry",
"I have been a professor but now work in industry ":"Industry",
"Master ":"Undergraduate",
"Faculty of Medicine, 5th.":"Undergraduate",
"researcher":"academic staff",
"Staff":"academic staff",
"Self-study": "Other",
"Independent Scientist":"Other",
"Principal investigator":"Group Leader",
"Science Technician":"academic staff",
"Masters Student":"Undergraduate",
"MD Student":"Undergraduate",
"Lab Staff":"academic staff",
"At present looking for roles":"Other",
"Data Outputs Manager":"academic staff",
"industry/biotech dept head":"Industry",
"Industry bioinfo":"Industry",
"Computational Biologist (industry)":"Industry",
"non-permanent academic staff":"academic staff",
"Group leader":"Group Leader",
"not-permanent academic staff":"academic staff",
"MSc student":"Undergraduate",
"Msc student":"Undergraduate",
"Research associate":"academic staff",
"research manager":"academic staff",
"Research Assistant":"academic staff",
"High school research science teacher":"Teacher",
"Teaching support":"Teacher",
"Company":"Industry",
"Scientist":"academic staff",
"Research Scientist":"academic staff",
"Consultant":"Industry",
"CEO":"Industry",
"Graduate":"Other",
"Lecturer": "Group Leader",
"Postgraduate":"Other"
}

df_pos = df['Position'][df['Position'].notna()]
df_pos  = df_pos.replace(replace)

df_pos.value_counts().to_csv("position.csv")
print("Total responses", len(df['Position'][df['Position'].notna()]))

Total responses 567


All plots were made with datawrapper and are available on https://blog.bioicons.com/post/user_survey/