In [1]:
import textwrap
from collections import Counter

import altair as alt
import pandas as pd

In [2]:
def format_percentage(row, col_name):
    count = int(row["counts"])
    percentage = row["percentage"] * 100
    w = "response"
    if count > 1:
        w += "s"
    return f"{percentage:.0f}% ({count} {w})"

In [3]:
def add_formatted_column(data):
    total = data["counts"].sum()
    data["percentage"] = data["counts"] / total
    data["formatted"] = data.apply(format_percentage, axis=1, col_name="Time")
    return data

In [4]:
def wrap(text, width):
    return textwrap.wrap(text, width)

In [5]:
def alt_text(title, options, counts, order):
    text = []
    text.append(f"{title}")
    if order == "-x":
        pairs = zip(options, counts)
        sorted_pairs = sorted(pairs, key=lambda x: x[1], reverse=True)
        for o, c in sorted_pairs:
            text.append(f"- {o}: {c} responses")
    else:
        for o, c in zip(order, counts):
            text.append(f"- {o}: {c} responses")
    print("\n".join(text))
    with open("alt-text.md", "a") as f:
        f.write("\n\n")
        f.write("\n".join(text))

In [6]:
def plot(data, title, order, output_file, subtitle=[]):
    c = Counter(data[question].dropna().tolist())
    options, counts = zip(*c.items())

    alt_text(title, options, counts, order)

    data_question = pd.DataFrame(
        {
            "options": options,
            "counts": counts,
        }
    )

    _data = add_formatted_column(data_question)

    bars = (
        alt.Chart(
            _data,
            title=alt.Title(
                wrap(title, width=40),
                subtitle=subtitle,
            ),
        )
        .mark_bar()
        .encode(
            x=alt.X("percentage:Q", axis=alt.Axis(format=".0%", title=None)),
            y=alt.Y("options:N", axis=alt.Axis(title=None, labels=True), sort=order),
            color=alt.Color("options:N", legend=None),
        )
    )

    text = (
        alt.Chart(_data)
        .mark_text(align="left", baseline="middle", dx=3)
        .encode(
            x=alt.X("percentage:Q"),
            y=alt.Y("options:N", sort=order),
            text=alt.Text("formatted"),
        )
    )

    chart = bars + text

    # chart = chart.properties(width=600, height=250)

    chart = chart.configure_title(
        fontSize=15,
        subtitleFontSize=15,
        anchor="start",
        orient="bottom",
        offset=15,
        subtitleColor="gray",
    )

    chart.show()
    chart.save(output_file, scale_factor=2)

In [7]:
data = pd.read_csv("data.txt", sep=";")

In [8]:
question = "In your estimate, how much time per month have you saved as a result of attending a CodeRefinery workshop?"
order = ["No time saved", "Minutes", "Hours", "Days"]

plot(data, question, order, "time-saved.png")

In your estimate, how much time per month have you saved as a result of attending a CodeRefinery workshop?
- No time saved: 59 responses
- Minutes: 17 responses
- Hours: 32 responses
- Days: 20 responses


In [9]:
question = "After attending the workshop, would you judge your code to be more reusable or not more reusable?"
order = ["My code is more reusable", "My code is not more reusable", "Not sure"]

plot(data, question, order, "reusable.png")

After attending the workshop, would you judge your code to be more reusable or not more reusable?
- My code is more reusable: 90 responses
- My code is not more reusable: 30 responses
- Not sure: 9 responses


In [10]:
question = "After attending the workshop, has it become easier or not for you to collaborate on software development with your colleagues and collaborators?"
order = ["Collaboration is easier", "Collaboration is not easier", "Not sure"]

plot(data, question, order, "collaboration.png")

After attending the workshop, has it become easier or not for you to collaborate on software development with your colleagues and collaborators?
- Collaboration is easier: 32 responses
- Collaboration is not easier: 91 responses
- Not sure: 6 responses


In [11]:
question = "Have you introduced one or more of your colleagues to new tools or practices as a result of the workshop?"
order = [
    "I have introduced one or more of my colleagues to new tools or practices",
    "I have not introduced one or more of my colleagues to new tools or practices",
    "Not sure",
]

plot(data, question, order, "colleagues.png")

Have you introduced one or more of your colleagues to new tools or practices as a result of the workshop?
- I have introduced one or more of my colleagues to new tools or practices: 79 responses
- I have not introduced one or more of my colleagues to new tools or practices: 16 responses
- Not sure: 33 responses


In [12]:
question = "How likely is it that you would recommend CodeRefinery workshop to a friend or colleague?"
order = [
    "I have introduced one or more of my colleagues to new tools or practices",
    "I have not introduced one or more of my colleagues to new tools or practices",
    "Not sure",
]
order = [10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]

plot(
    data,
    question,
    order,
    "recommending.png",
    subtitle="0 means definitely not. 10 means definitely yes.",
)

How likely is it that you would recommend CodeRefinery workshop to a friend or colleague?
- 10: 64 responses
- 9: 11 responses
- 8: 21 responses
- 7: 3 responses
- 6: 24 responses
- 5: 3 responses
- 4: 1 responses
- 3: 1 responses
- 2: 1 responses


In [13]:
question = "Would you prefer pre-recorded lectures, live online teaching, or in-person teaching?"
order = [
    "I would prefer pre-recorded lectures combined with live discussions",
    "I would prefer online teaching (lectures are live)",
    "I would prefer in-person teaching",
]

plot(data, question, order, "pre-recorded-or-live-or-in-person.png")

Would you prefer pre-recorded lectures, live online teaching, or in-person teaching?
- I would prefer pre-recorded lectures combined with live discussions: 36 responses
- I would prefer online teaching (lectures are live): 62 responses
- I would prefer in-person teaching: 30 responses


In [14]:
question = "What would be the ideal format for you?"
order = [
    "One week with half days (3 or 4 half-days)",
    "One week with full days (3 or 4 full days)",
    "Two weeks (6 half-days)",
    "Two weeks (6 half-days) but split into two separate logical courses",
    "Lesson series over 6 weeks with one session per week",
    "Lessons are available in a catalogue of videos and can be followed anytime",
]

plot(data, question, order, "format.png")

What would be the ideal format for you?
- One week with half days (3 or 4 half-days): 18 responses
- One week with full days (3 or 4 full days): 17 responses
- Two weeks (6 half-days): 24 responses
- Two weeks (6 half-days) but split into two separate logical courses: 32 responses
- Lesson series over 6 weeks with one session per week: 13 responses
- Lessons are available in a catalogue of videos and can be followed anytime: 25 responses


In [15]:
question = "Participation style"
order = [
    "Individual learner",
    "Learner in a team",
    "Team leader/ helper (online)",
    "Team leader/ helper (in person)",
]

plot(data, question, order, "participation-style.png")

Participation style
- Individual learner: 95 responses
- Learner in a team: 8 responses
- Team leader/ helper (online): 9 responses
- Team leader/ helper (in person): 15 responses


In [16]:
question = "Career stage"
order = [
    "Undergraduate student",
    "Graduate student/ PhD student",
    "Postdoc",
    "Researcher",
    "Professor",
    "Research software engineer/ Scientific programmer",
    "Industry",
    "Other",
]

plot(data, question, order, "career-stage.png")

Career stage
- Undergraduate student: 12 responses
- Graduate student/ PhD student: 19 responses
- Postdoc: 4 responses
- Researcher: 4 responses
- Professor: 63 responses
- Research software engineer/ Scientific programmer: 18 responses
- Industry: 3 responses
- Other: 6 responses


In [17]:
question = "Academic discipline"
order = "-x"

plot(data, question, order, "academic-discipline.png")

Academic discipline
- Physical Sciences: 30 responses
- Earth and Related Environmental Sciences: 18 responses
- Biological Sciences: 13 responses
- Chemical Sciences: 11 responses
- Computer and Information Sciences: 11 responses
- Other Engineering and Technologies: 6 responses
- Health Sciences: 5 responses
- Psychology: 4 responses
- Mathematics: 4 responses
- Mechanical Engineering: 3 responses
- Civil Engineering: 3 responses
- Environmental Engineering: 3 responses
- Medical Engineering: 2 responses
- Medical Biotechnology: 2 responses
- Clinical Medicine: 2 responses
- Other Medical and Health Sciences: 2 responses
- Other: 2 responses
- Other Social Sciences: 2 responses
- Chemical Engineering: 1 responses
- Electrical Engineering, Electronic Engineering, Information Engineering: 1 responses
- Economics and Business: 1 responses
- History and Archaeology: 1 responses
- Nano-technology: 1 responses
- Other Natural Sciences: 1 responses
