In [None]:
from IPython.display import display, Markdown
from dotenv import find_dotenv, load_dotenv

import pandas as pd

from matplotlib import pyplot as plt
import seaborn as sns

# load env vars
load_dotenv(find_dotenv())

# NOTE: GitHub PAT needs to exist as env var before importing the srcopmetrics
# this is a known bug of this library
from srcopsmetrics.entities.issue import Issue  # noqa: E402
from srcopsmetrics.entities.pull_request import PullRequest  # noqa: E402

In [None]:
# default pretty graph settings
sns.set()

In [None]:
# load issue data using an entity, put it into df
issue_entity = Issue("operate-first/support")
issues_df = issue_entity.load_previous_knowledge(is_local=True)
issues_df = issues_df.reset_index()
issues_df.head()

In [None]:
# get issues having the onboarding labels
onboard_labels = {'onboarding', 'kind/onboarding'}
onboard_filter = issues_df["labels"].apply(lambda x: len(onboard_labels.intersection(x.keys())) != 0)
onboard_issues_df = issues_df.loc[onboard_filter]
onboard_issues_df.head()

In [None]:
# time to close
onboard_issues_df['time_to_close'] = onboard_issues_df['closed_at'] - onboard_issues_df['created_at']

# summary stats
onboard_issues_df['time_to_close'].describe()

In [None]:
# histogram in terms of number of hours
fig, ax = plt.subplots(figsize=(20, 8))
sns.histplot(
    onboard_issues_df['time_to_close'].dt.total_seconds() / 3600,
    ax=ax,
    bins=50,
    stat="probability",
)
plt.ylabel("Proportion of Issues")
plt.xlabel("Time to Close Issue (hours)")
plt.title("Distribution of time taken to close issue")
plt.show()

In [None]:
# closing time greater than 6 months
onboard_issues_df[onboard_issues_df['time_to_close'].dt.total_seconds() > 6 * 30 * 24 * 60 * 60].head()

In [None]:
# calculate running mean of time to close
mttr_till_now = onboard_issues_df.sort_values(by='created_at')['time_to_close'].dt.total_seconds().expanding().mean()
mttr_till_now = mttr_till_now.rename('mttr_till_now')
mttr_till_now_days = mttr_till_now / (24 * 60 * 60)

# merge with rest of df
onboard_issues_df = onboard_issues_df.merge(
    mttr_till_now_days,
    left_index=True,
    right_index=True,
)
onboard_issues_df.head()

In [None]:
# what does the mean time  to close till now look like, over time
fig, ax = plt.subplots(figsize=(20, 8))
sns.lineplot(onboard_issues_df['created_at'], onboard_issues_df['mttr_till_now'])
plt.ylabel("Mean Time to Resolve (agg until now)")
plt.xlabel("Date")
plt.title("Distribution of overall MTTR over time")
plt.show()

In [None]:
# what if we only consider the MTTR in the last sprint (~14 days)
# mttr_sprintwise = onboard_issues_df.sort_values(by='created_at')['time_to_close'].dt.total_seconds().expanding().mean()
ttr = onboard_issues_df[["created_at", "time_to_close"]].set_index("created_at")

# get timedelta as seconds and then days
ttr["time_to_close"] = ttr["time_to_close"].dt.total_seconds()
ttr /= (24 * 3600)

ttr.resample("2W").mean()

# ttr.plot()


# ttr = ttr / (3600
# ttr.resample("W").mean().plot()
# onboard_issues_df.head()#set_index("created_at").resample("W").mean()

In [None]:
onboard_issues_df[(onboard_issues_df["created_at"] > "2022-04-17") & (onboard_issues_df["created_at"] < "2022-04-17")].mean()