This notebook is responsible for visualizing the data from the Github API, which should be provided in a serialized format.

Requirement: It must be possible to run this notebook using JupyterLite.

For an exemplary use case, please see the project's `README.md`.

Important aspects of the different cells, and how to use them are explained in markdown comments above them.

### Troubleshooting:

#### Even after reloading (deep-refreshing) the page, a notebook is not updated

This likely occurred because the backend and frontend of jupyterlite are out of sync.
I am not yet sure why this happens even after a deep-refresh of the page, as this should update the frontend according to the jupyterlite documentation.
As a workaround, delete the cache/cookies for the page and reload it.
Note that you this will reset all notebooks to their versions saved on Github, so download your notebooks if necessary.
You will also have to re-run the notebooks.

An additional necessary step may be to "delete" the files in question in jupyterlite, which should refresh them with the version on Github.

#### The `commits.json` cannot be found when it is being loaded

Same reasoning & workaround as above.

In [None]:
# We need to install dependecies for the frontend separately in JupyterLite
%pip install ipywidgets plotly pandas tabulate

In [None]:
import json
from datetime import datetime
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import ipywidgets
from IPython import display
import tabulate #https://pypi.org/project/tabulate/

In [None]:
with open('commits.json') as f:
    data = json.load(f)
    df = pd.json_normalize(data)
df['_date'] = df.apply(lambda row: pd.to_datetime(row['commit.author.date'], format='%Y-%m-%dT%H:%M:%SZ'), axis='columns')
df['_merge_commit'] = df.apply(lambda row: len(row['parents']) >= 2, axis='columns')
cs = df[~df['_merge_commit'] & ~df['author.login'].isna()]

In [None]:
display.display(tabulate.tabulate(
    [["Retrieved commits from repository:", len(df)],
     ["Merge commits that are excluded:", len(df[df['_merge_commit']])],
     ["Non-merge commits missing GitHub user:", len(df[~df['_merge_commit'] & df['author.login'].isna()])],
     ["Amount of qualifying commits:", len(cs)],
     ["Unique authors in qualifying commits:", len(cs['author.login'].unique())],
     ["Earliest commit:", cs['_date'].min().date()],
     ["Latest commit:", cs['_date'].max().date()]
    ], tablefmt='html'))

In [None]:
df[~df['_merge_commit'] & df['author.login'].isna()] \
    .groupby(['commit.author.name','commit.author.email','commit.committer.name','commit.committer.email'], as_index=False) \
    .size() \
    .style.set_caption("Information of commits not assigned to GitHub users")

In [None]:
#
# Input data ranges of sprints
#
sprints_out = ipywidgets.Output()
sprints_out.append_display_data(ipywidgets.Label(value="Set the number of sprints to be used as the basis of analysis:"))
num_sprints = ipywidgets.BoundedIntText(value=1, min=1, step=1, description='Number of sprints:', style=dict(description_width='initial'))
sprints_out.append_display_data(num_sprints)
sprints_out.append_display_data(ipywidgets.Label(value="Set start and end dates for each sprint:"))
sprint_names, start_dates, end_dates = [], [], []

# Initialize the widgets for the first sprint
sprint_names.append(ipywidgets.Text(value="All time", placeholder='Name to be displayed', description='Sprint Name:'))
start_dates.append(ipywidgets.DatePicker(value=cs['_date'].min().date(), description='Start Date'))
end_dates.append(ipywidgets.DatePicker(value=cs['_date'].max().date(), description='End Date'))
sprints_out.append_display_data(sprint_names[0])
sprints_out.append_display_data(start_dates[0])
sprints_out.append_display_data(end_dates[0])

# Automatically add/remove the widgets based on the number of sprints
def update_sprints(change):
	if change['type'] == 'change' and change['name'] == 'value':
		if change['new'] > change['old']:
			# Only add a new widget if we do not have one with this "number" already
			if len(sprint_names) < change['new']:
				sprint_names.append(ipywidgets.Text(value=f'Sprint {change["new"]}', placeholder='Set the name for this Sprint', description='Sprint Name:'))
				start_dates.append(ipywidgets.DatePicker(description='Start Date'))
				end_dates.append(ipywidgets.DatePicker(description='End Date'))
			# Display the necessary widgets for the new sprint
			sprints_out.append_display_data(sprint_names[change["new"] - 1])
			sprints_out.append_display_data(start_dates[change["new"] - 1])
			sprints_out.append_display_data(end_dates[change["new"] - 1])
		elif change['new'] < change['old']:
			# Remove the widgets for the removed sprint (workaround, as clear_output() will not work here, as we are using append_display_data() instead of the 'with out:' syntax)
			sprints_out.outputs = sprints_out.outputs[:-3]

num_sprints.observe(update_sprints, names='value')

display.display(sprints_out)

In [None]:
sprint_data = []
for i, name in enumerate(sprint_names):
    start = datetime.combine(start_dates[i].value, datetime.min.time())
    end = datetime.combine(end_dates[i].value, datetime.max.time())
    sprint_data.append({
        'name': name.value, 
        'start': start,
        'end': end
    })

#sprints = pd.DataFrame(sprint_data)
#bins = sprints['_sprint.start'].to_list() + [sprints.iloc[-1]['_sprint.end']]
#pd.cut(cs['_date'], bins, labels=sprints['_sprint.name'], include_lowest=True)

def assign_to_sprint(row):
    for sprint in sprint_data:
        if sprint['start'] <= row['_date'] < sprint['end']:
            return sprint['name'], sprint['start'], sprint['end']
    return np.nan, np.nan, np.nan

cs[['_sprint.name', '_sprint.start', '_sprint.end']] = cs.apply(assign_to_sprint, axis='columns', result_type="expand")

In [None]:
#
# Input teams and team members
#
teams_output = ipywidgets.Output()
teams_output.append_display_data(ipywidgets.Label(value="Set the number of teams using the slider, and the text fields to set names for each team."))
num_teams = ipywidgets.IntSlider(min=1, max=10, step=1, description='No. of teams')
teams_output.append_display_data(num_teams)
teams_output.append_display_data(ipywidgets.Label(value="Upload a JSON file of with a list of team members in the format `['githubUsername', 'anotherUsername']`"))

team_names = []
team_files = []
team_members = []

# Handler for the file upload widgets
def handle_upload(change):
	# Only handle the upload if the file is not empty
	if change['new']:
		# Get the team index from the name of the widget
		team_index = int(change['owner'].description.split(' ')[2]) - 1
		# Get the file name and content
		uploaded_file = next(iter(team_files[team_index].value))
		with open(uploaded_file.name, 'wb') as f:
			f.write(uploaded_file.content)
		with open(uploaded_file.name) as f:
			team_members[team_index] = json.load(f)
		print(f'Uploaded file {uploaded_file.name} for team {team_names[i].value}')
		# Print the team members
		print(f'Team members:')
		print("\n".join(team_members[team_index]))
		print()

# Initialize the widgets for the first team
team_names.append(ipywidgets.Text(value=f'Team 1', placeholder='Set the name for this Team', description='Team Name:'))
team_files.append(ipywidgets.FileUpload(description=f'Upload Team 1', accept='.json'))
team_members.append([])
teams_output.append_display_data(team_names[0])
teams_output.append_display_data(team_files[0])

# Register the upload handler for the first team
team_files[0].observe(handle_upload, names='value')

# Automatically add/remove the widgets based on the number of teams
def update_teams(change):
	if change['type'] == 'change' and change['name'] == 'value':
		if change['new'] > change['old']:
			# Only add a new widget if we do not have one with this "number" already
			if len(team_names) < change['new']:
				team_names.append(ipywidgets.Text(value=f'Team {change["new"]}', placeholder='Set the name for this Team', description='Team Name:'))
				team_files.append(ipywidgets.FileUpload(description=f'Upload Team {change["new"]}', accept='.json'))
				team_members.append([])
				# Register the upload handler for the new team
				team_files[-1].observe(handle_upload, names='value')
			# Display the necessary widgets for the new team
			teams_output.append_display_data(team_names[change["new"] - 1])
			teams_output.append_display_data(team_files[change["new"] - 1])
		elif change['new'] < change['old']:
			# Remove the widgets for the removed team (workaround, as clear_output() will not work here, as we are using append_display_data() instead of the 'with out:' syntax)
			teams_output.outputs = teams_output.outputs[:-2]

num_teams.observe(update_teams, names='value')

display.display(teams_output)

In [None]:
team_data = []
for i, team in enumerate(team_members):
    for login in team:
        team_data.append({'author.login': login, '_team.name': team_names[i].value})
teams = pd.DataFrame(team_data)
# teams.style.hide(axis='index').set_caption('Teams and members')

cst = pd.merge(cs, teams, how='outer', on='author.login')

In [None]:
unassigned = cst[cst['_team.name'].isna()] \
    .groupby(['author.login'], as_index=False) \
    .size().sort_values(['size'],ascending=False)
# unassigned.style.hide(axis='index').set_caption("GitHub users w/o team")

In [None]:
counts = cst.fillna({'_team.name':'unassigned'}) \
    .groupby(['_team.name','author.login'], as_index=False) \
    .count().rename(columns = {'sha':'count'}) \
    .sort_values(by=['count'])
# counts[['_team.name','author.login','count']]

fig = make_subplots(specs=[[{"secondary_y": True}]])
for team in counts['_team.name'].unique():
    fig.add_trace(go.Bar(
        x=counts.loc[counts['_team.name'] == team]['count'],
        y=counts.loc[counts['_team.name'] == team]['author.login'],
        name=team,
        orientation='h'
    ))
fig.update_layout(
    xaxis_title=f"Total non-merge commits by authors from {cst['_date'].min().date()} to {cst['_date'].max().date()}",
    yaxis_title="GitHub Login",
    height=20*len(counts['author.login'].unique()),
    yaxis2=dict(range=[0,1], visible=False)
)
# https://community.plotly.com/t/i-want-to-draw-line-markers-on-a-bar-plot/31020
mean = np.round(np.mean(counts['count']),decimals=1)
line = go.Scatter(x=[mean,mean], name='avg.')
fig.add_trace(line, 1, 1, secondary_y=True)

fig.show()

In [None]:
def time_of_day(date):
    hour = date.hour
    if hour <= 7 or hour >= 22:
        return 0 # "night"
    elif hour <= 12:
        return 1 # "morning"
    elif hour <= 17:
        return 2 # "afternoon"
    else:
        return 3 # "evening"

def commit_heatmap(team=None, sprint=None):
    data = cst[~cst['_date'].isna()]
    if team:
        data = data.loc[data['_team.name'] == team]
    if sprint:
        data = data.loc[data['_sprint.name'] == sprint]
    
    fig_params = [
        {'x': [d.hour for d in data['_date']],
         'nbinsx': 24,
         'xaxis_title': 'Hour',
         'xaxis_tickvals': np.arange(0, 24, 1),
         'xaxis_ticktext': np.arange(0, 24, 1)},
        {'x': [time_of_day(d) for d in data['_date']],
         'nbinsx': 4,
         'xaxis_title': 'Time of Day',
         'xaxis_tickvals': np.arange(0, 4),
         'xaxis_ticktext': ['Night (22-7)', 'Morning (7-12)', 'Afternoon (12-17)', 'Evening (17-22)']}
    ]

    sub_tabs = []
    for i, params in enumerate(fig_params):
        fig = px.density_heatmap(
            x=params['x'],
            y=[d.weekday() for d in data['_date']],
            nbinsx=params['nbinsx'],
            nbinsy=7,
            histfunc='count',
            text_auto=True)
        fig.update_layout(
            title=f"Commit heatmap of '{team if team else 'All'}' for time '{sprint if sprint else 'All'}'",
            xaxis=dict(
                title = params['xaxis_title'],
                tickvals = params['xaxis_tickvals'],
                ticktext = params['xaxis_ticktext']
            ),
            yaxis=dict(
                title = 'Day of the week',
                tickvals = np.arange(0, 7),
                ticktext = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
            ))
        out = ipywidgets.Output()
        with out:
            fig.show()
            sub_tabs.append(out)

    tab = ipywidgets.Tab(children=sub_tabs, titles=['By hour', 'By time of day'])
    display.display(tab)

interactive_plot = ipywidgets.interactive(
    commit_heatmap,
    team=[('All', None)] + [(n, n) for n in cst['_team.name'].dropna().unique()],
    sprint=[('All', None)] + [(n, n) for n in cst['_sprint.name'].dropna().unique()])
display.display(interactive_plot)