In [1]:
import textwrap
from collections import Counter

import altair as alt
import pandas as pd

In [2]:
def format_percentage(row, col_name):
    count = int(row["counts"])
    percentage = row["percentage"] * 100
    w = "response"
    if count > 1:
        w += "s"
    return f"{percentage:.0f}% ({count} {w})"


def add_formatted_column(data):
    total = data["counts"].sum()
    data["percentage"] = data["counts"] / total
    data["formatted"] = data.apply(format_percentage, axis=1, col_name="Time")
    return data

In [3]:
def wrap(text, width):
    return textwrap.wrap(text, width)

In [4]:
def plot(data, title, order, output_file, subtitle=[]):
    c = Counter(data[question].dropna().tolist())
    options, counts = zip(*c.items())

    data_question = pd.DataFrame(
        {
            "options": options,
            "counts": counts,
        }
    )

    sort = None
    if order:
        sort = order

    _data = add_formatted_column(data_question)

    bars = (
        alt.Chart(
            _data,
            title=alt.Title(
                wrap(title, width=40),
                subtitle=subtitle,
            ),
        )
        .mark_bar()
        .encode(
            x=alt.X("percentage:Q", axis=alt.Axis(format=".0%", title=None)),
            y=alt.Y("options:N", axis=alt.Axis(title=None, labels=True), sort=sort),
            color=alt.Color("options:N", legend=None),
        )
    )

    text = (
        alt.Chart(_data)
        .mark_text(align="left", baseline="middle", dx=3)
        .encode(
            x=alt.X("percentage:Q"),
            y=alt.Y("options:N", sort=sort),
            text=alt.Text("formatted"),
        )
    )

    chart = bars + text

    # chart = chart.properties(width=600, height=250)

    chart = chart.configure_title(
        fontSize=15,
        subtitleFontSize=15,
        anchor="start",
        orient="bottom",
        offset=15,
        subtitleColor="gray",
    )

    chart.show()
    chart.save(output_file, scale_factor=2)

In [5]:
data = pd.read_csv("data.txt", sep=";")

In [6]:
question = "In your estimate, how much time per month have you saved as a result of attending a CodeRefinery workshop?"
order = ["No time saved", "Minutes", "Hours", "Days"]

plot(data, question, order, "time-saved.png")

In [7]:
question = "After attending the workshop, would you judge your code to be more reusable or not more reusable?"
order = ["My code is more reusable", "My code is not more reusable", "Not sure"]

plot(data, question, order, "reusable.png")

In [8]:
question = "After attending the workshop, has it become easier or not for you to collaborate on software development with your colleagues and collaborators?"
order = ["Collaboration is easier", "Collaboration is not easier", "Not sure"]

plot(data, question, order, "collaboration.png")

In [9]:
question = "Have you introduced one or more of your colleagues to new tools or practices as a result of the workshop?"
order = [
    "I have introduced one or more of my colleagues to new tools or practices",
    "I have not introduced one or more of my colleagues to new tools or practices",
    "Not sure",
]

plot(data, question, order, "colleagues.png")

In [10]:
question = "How likely is it that you would recommend CodeRefinery workshop to a friend or colleague?"
order = [
    "I have introduced one or more of my colleagues to new tools or practices",
    "I have not introduced one or more of my colleagues to new tools or practices",
    "Not sure",
]
order = [10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]

plot(
    data,
    question,
    order,
    "recommending.png",
    subtitle="0 means definitely not. 10 means definitely yes.",
)

In [11]:
question = "Would you prefer pre-recorded lectures, live online teaching, or in-person teaching?"
order = [
    "I would prefer pre-recorded lectures combined with live discussions",
    "I would prefer online teaching (lectures are live)",
    "I would prefer in-person teaching",
]

plot(data, question, order, "pre-recorded-or-live-or-in-person.png")

In [12]:
question = "What would be the ideal format for you?"
order = [
    "One week with half days (3 or 4 half-days)",
    "One week with full days (3 or 4 full days)",
    "Two weeks (6 half-days)",
    "Two weeks (6 half-days) but split into two separate logical courses",
    "Lesson series over 6 weeks with one session per week",
    "Lessons are available in a catalogue of videos and can be followed anytime",
]

plot(data, question, order, "format.png")