This notebook is responsible for visualizing the data from the Github API, which should be provided in a serialized format.

Requirement: It must be possible to run this notebook using JupyterLite.

For an exemplary use case, please see the project's `README.md`.

Important aspects of the different cells, and how to use them are explained in markdown comments above them.

### Troubleshooting:

#### Even after reloading (deep-refreshing) the page, a notebook is not updated

This likely occurred because the backend and frontend of jupyterlite are out of sync.
I am not yet sure why this happens even after a deep-refresh of the page, as this should update the frontend according to the jupyterlite documentation.
As a workaround, delete the cache/cookies for the page and reload it.
Note that you this will reset all notebooks to their versions saved on Github, so download your notebooks if necessary.
You will also have to re-run the notebooks.

An additional necessary step may be to "delete" the files in question in jupyterlite, which should refresh them with the version on Github.

#### The `commits.json` cannot be found when it is being loaded

Same reasoning & workaround as above.

In [None]:
# We need to install dependecies for the frontend separately in JupyterLite
%pip install ipywidgets plotly pandas tabulate

In [None]:
import json
from datetime import datetime
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import ipywidgets
from IPython import display
import tabulate #https://pypi.org/project/tabulate/

In [None]:
with open('commits.json') as f:
    data = json.load(f)
    df = pd.json_normalize(data)
df['_date'] = df.apply(lambda row: pd.to_datetime(row['commit.author.date'], format='%Y-%m-%dT%H:%M:%SZ'), axis='columns')
df['_merge_commit'] = df.apply(lambda row: len(row['parents']) >= 2, axis='columns')
cs = df[~df['_merge_commit'] & ~df['author.login'].isna()].copy()

display.display(tabulate.tabulate(
    [["Retrieved commits from repository:", len(df)],
     ["Merge commits that are excluded:", len(df[df['_merge_commit']])],
     ["Non-merge commits missing GitHub user:", len(df[~df['_merge_commit'] & df['author.login'].isna()])],
     ["Amount of qualifying commits:", len(cs)],
     ["Unique authors in qualifying commits:", len(cs['author.login'].unique())],
     ["Earliest commit:", cs['_date'].min().date()],
     ["Latest commit:", cs['_date'].max().date()]
    ], tablefmt='html'))

In [None]:
not_assigned = df[~df['_merge_commit'] & df['author.login'].isna()] \
    .groupby(['commit.author.name','commit.author.email','commit.committer.name','commit.committer.email'], as_index=False) \
    .size()

out = ipywidgets.Output()
out.append_display_data(not_assigned.style.set_caption("Information of commits not assigned to GitHub users"))
ipywidgets.Accordion(children=[out], titles=(f"{len(not_assigned)} commits not assigned to GitHub users",))

In [None]:
# Sprints
sprint_inputs = ipywidgets.VBox()
sprints = pd.DataFrame({'name': [], 'start': [], 'end': [], 'length': []})

sprint_out = ipywidgets.Output()
def parse_sprints(button_param):
    sprints.drop(sprints.index, inplace=True)  # Clear rows of dataframe
    for hbox in sprint_inputs.children:
        if hbox.children[0].value != '': # Don't include sprints with empty name
            sprints.loc[len(sprints)] = [wdgt.value for wdgt in hbox.children] + [None]
    sprints['length'] = sprints['end'] - sprints['start']
    with sprint_out:
        sprint_out.clear_output(wait=True)
        display.display(sprints)

def add_s_input_row(param):
    n = ipywidgets.Text(value=f"Sprint {len(sprint_inputs.children)+1}", placeholder='Sprint Name', description='Sprint Name:')
    s = ipywidgets.DatePicker(description='Start Date', value=cs['_date'].min().date())
    e = ipywidgets.DatePicker(description='End Date', value=cs['_date'].max().date())
    sprint_inputs.children += (ipywidgets.HBox([n,s,e]),)
    n.observe(parse_sprints, names='value')
    s.observe(parse_sprints, names='value')
    e.observe(parse_sprints, names='value')
    parse_sprints(None)

add_s_btn = ipywidgets.Button(description='Add Sprint', button_style='success', icon='plus')
add_s_btn.on_click(add_s_input_row)
add_s_input_row(None)

display.display(ipywidgets.Label(value='Sprints to be used in analyses:'))
display.display(add_s_btn)
display.display(sprint_inputs)

# Teams
team_inputs = ipywidgets.VBox()
teams = pd.DataFrame({'_team.name': [], 'author.login': []})

teams_out = ipywidgets.Output()
def parse_teams(button_param):
    teams.drop(teams.index, inplace=True)  # Clear rows of dataframe
    for hbox in team_inputs.children:
        if hbox.children[0].value != '': # Don't include teams with empty name
            for login in hbox.children[1].value.split(','):
                teams.loc[len(teams)] = [hbox.children[0].value, login]
    with teams_out:
        teams_out.clear_output(wait=True)
        display.display(teams)

def add_t_input_row(param):
    n = ipywidgets.Text(value=f"Team {len(team_inputs.children)+1}", placeholder='Team Name', description='Team Name:')
    m = ipywidgets.Text(placeholder='Comma-separated GitHub users', description='Members:')
    team_inputs.children += (ipywidgets.HBox([n,m]),)
    n.observe(parse_teams, names='value')
    m.observe(parse_teams, names='value')
    parse_teams(None)

add_t_btn = ipywidgets.Button(description='Add Team', button_style='success', icon='plus')
add_t_btn.on_click(add_t_input_row)
add_t_input_row(None)

display.display(ipywidgets.Label(value='Teams to be used in analyses:'))
display.display(add_t_btn)
display.display(team_inputs)
display.display(ipywidgets.HBox([teams_out, sprint_out]))

#CL DefineDan,janiswehen,johannaschlimme,konrad-gerlach,richartkeil,TimRiedel,TonyBodo
#FN gwauge,Arkinul,MatthiasCr,MaxSpeer,Pungitius,Glitterrosie,simon-weissmueller

In [None]:
def assign_to_sprint(row):
    for i, sprint in sprints.iterrows():
        if sprint['start'] <= row['_date'] < sprint['end']:
            return sprint['name'], sprint['start'], sprint['end']
    return np.nan, np.nan, np.nan

cs[['_sprint.name', '_sprint.start', '_sprint.end']] = cs.apply(assign_to_sprint, axis='columns', result_type="expand")
cst = pd.merge(cs, teams, how='left', on='author.login')

sprint_commits = cst.groupby(['_sprint.name', '_sprint.start', '_sprint.end']).size().reset_index(name='count')
fig = px.timeline(sprint_commits, x_start="_sprint.start", x_end="_sprint.end", y="_sprint.name", color="_sprint.name", text='count', title='Iteration timeline and total number of commits')
fig.show()

In [None]:
# outer join includes authors w/o commits
counts = pd.merge(cs, teams, how='outer', on='author.login') \
    .fillna({'_team.name':'unassigned'}) \
    .groupby(['_team.name','author.login'], as_index=False) \
    .count().rename(columns = {'sha':'count'}) \
    .sort_values(by=['count'], ascending=False)
# counts[['_team.name','author.login','count']]

fig = px.bar(counts, x='count', y='author.login', color='_team.name', orientation='h', title='Commit counts by teams', color_discrete_map={'unassigned': 'lightgrey'})
fig.update_layout(
    xaxis_title=f"Total non-merge commits by authors from {cst['_date'].min().date()} to {cst['_date'].max().date()}",
    yaxis_title="GitHub Login",
    height=20*counts['author.login'].nunique()
)
fig.show()

In [None]:
by_sprint = cst.groupby(['_team.name', '_sprint.name']).size().reset_index(name='count')
fig = px.bar(by_sprint, x="_sprint.name", y="count", color="_team.name", title='None-merge commit counts by iteration', text_auto=True)
fig.show()

by_week = cst.groupby('_team.name').resample('W-Mon', on='_date').size().reset_index(name='count')
fig = px.bar(by_week, x="_date", y="count", color="_team.name", title='None-merge commit counts by week')
fig.show()

by_date = cst.groupby('_team.name').resample('D', on='_date').size().reset_index(name='count')
fig = px.bar(by_date, x="_date", y="count", color="_team.name", title='None-merge commit counts by day')
fig.show()

In [None]:
def time_of_day(row):
    hour = row['_date'].hour
    if hour <= 7 or hour >= 23:
        return 'Night (23-7)'
    elif hour <= 12:
        return 'Morning (7-12)'
        print('morning')
    elif hour <= 17:
        return 'Afternoon (12-17)'
        print('after')
    else:
        return 'Evening (17-23)'

cst['_time.desc'] = cst.apply(time_of_day, axis='columns')

In [None]:
def time_of_day(date):
    hour = date.hour
    if hour <= 7 or hour >= 23:
        return 0 # "night"
    elif hour <= 12:
        return 1 # "morning"
    elif hour <= 17:
        return 2 # "afternoon"
    else:
        return 3 # "evening"

def commit_heatmap(team=None, sprint=None):
    data = cst[~cst['_date'].isna()]
    if team:
        data = data.loc[data['_team.name'] == team]
    if sprint:
        data = data.loc[data['_sprint.name'] == sprint]
    
    fig_params = [
        {'x': data['_date'].dt.hour,
         'nbinsx': 24,
         'xaxis_title': 'Hour',
         'xaxis_tickvals': np.arange(0, 24, 1),
         'xaxis_ticktext': np.arange(0, 24, 1)},
        {'x': [time_of_day(d) for d in data['_date']],
         'nbinsx': 4,
         'xaxis_title': 'Time of Day',
         'xaxis_tickvals': np.arange(0, 4),
         'xaxis_ticktext': ['Night (23-7)', 'Morning (7-12)', 'Afternoon (12-17)', 'Evening (17-23)']}
    ]

    sub_tabs = []
    for i, params in enumerate(fig_params):
        fig = px.density_heatmap(
            x=params['x'],
            y=[d.weekday() for d in data['_date']],
            nbinsx=params['nbinsx'],
            nbinsy=7,
            histfunc='count',
            text_auto=True,
            marginal_x='histogram',
            marginal_y='histogram')
        fig.update_layout(
            title=f"Commit heatmap of '{team if team else 'All'}' for time '{sprint if sprint else 'All'}'",
            xaxis=dict(
                title = params['xaxis_title'],
                tickvals = params['xaxis_tickvals'],
                ticktext = params['xaxis_ticktext']
            ),
            yaxis=dict(
                title = 'Day of the week',
                tickvals = np.arange(0, 7),
                ticktext = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
            ))
        out = ipywidgets.Output()
        with out:
            fig.show()
            sub_tabs.append(out)

    tab = ipywidgets.Tab(children=sub_tabs, titles=['By hour', 'By time of day'])
    display.display(tab)

out = ipywidgets.interactive(
    commit_heatmap,
    team=[('All', None)] + [(n, n) for n in cst['_team.name'].dropna().unique()],
    sprint=[('All', None)] + [(n, n) for n in cst['_sprint.name'].dropna().unique()])
display.display(out)