In [1]:
%run _utils.ipynb

In [5]:
def copy_reset(df):
    return df.copy().reset_index(drop=True)

## Load Dataset

In [4]:
gkirs = load_gk_issues_for_analysis()
comments = load_comments()

In [20]:
gkirs_simp = gkirs[['id', 'state', 'created_at']].groupby('id').first().reset_index()

In [17]:
users_comments = copy_reset(comments[comments['comment_user_type'] == 'User'])
issues_with_user_comment = copy_reset(gkirs_simp[gkirs_simp['id'].isin(users_comments['comment_issue_id'])])

## Not Closed GKIRs


In [41]:
gkirs_open = copy_reset(gkirs_simp[gkirs_simp['state'] == 'open'])
gkirs_open['open_for'] = pd.Timestamp.utcnow() - gkirs_open['created_at']

In [42]:
proportions = (gkirs_simp['state'].value_counts() / gkirs_simp['state'].count()).to_frame(name='proportion')
print('--All GK in-range breaking issue state proportions (not calculated per project)--')
proportions

--All GK in-range breaking issue state proportions (not calculated per project)--


Unnamed: 0,proportion
closed,0.821859
open,0.178141


## Open GKIRs with no user comment

In [43]:
open_gkirs_with_no_user_comment = copy_reset(gkirs_open[~gkirs_open['id'].isin(users_comments['comment_issue_id'])])

In [46]:
prop = calculate_percent(open_gkirs_with_no_user_comment['id'].nunique(), gkirs_open['id'].nunique())
print(f'{prop}% of GKIRs that remain open do not have a comment from a user')

99.77% of GKIRs that remain open do not have a comment from a user


## Open GKIRs with no User Interactions (events)

In [66]:
events = load_events()
events['event_actor_login'] = events['event_actor_login'].astype('category')
events = events[~pd.isna(events['event_actor_login'])]

In [80]:
events['event_actor_type'] = 'user'

events.loc[
    events['event_actor_login'].str.contains('\[bot\]'),
    'event_actor_type'
] = 'bot'

events.loc[
    events['event_actor_login'].str.contains('automation'),
    'event_actor_type'
] = 'bot'
user_events = copy_reset(events[events['event_actor_type'] == 'user'])

In [86]:
# user_events.groupby('event_actor_login', observed=True).size().sort_values(ascending=False).head(50)

In [89]:
user_events.groupby('event_description', observed=True).size().sort_values(ascending=False).head(50)

event_description
closed                      85787
labeled                      1998
referenced                   1784
assigned                     1262
mentioned                     670
unlabeled                     655
moved_columns_in_project      639
added_to_project              578
subscribed                    456
milestoned                    379
reopened                      195
locked                        119
demilestoned                   69
unassigned                     51
comment_deleted                35
renamed                        18
unsubscribed                   18
connected                      10
removed_from_project            9
unlocked                        3
merged                          3
transferred                     3
head_ref_deleted                2
marked_as_duplicate             2
pinned                          1
unpinned                        1
dtype: int64

In [91]:
open_gkirs_with_no_user_events = copy_reset(gkirs_open[~gkirs_open['id'].isin(user_events['event_issue_id'])])

In [95]:
prop = calculate_percent(open_gkirs_with_no_user_events['id'].nunique(), gkirs_open['id'].nunique())
print(f'{prop}% of GKIRs that remain open do not have an event from a user')

99.31% of GKIRs that remain open do not have an event from a user


## Open GKIRs with no Human interaction

In [94]:
open_gkirs_with_no_user_interaction = copy_reset(
    gkirs_open.loc[
        (gkirs_open['id'].isin(open_gkirs_with_no_user_comment['id'])) &
        (gkirs_open['id'].isin(open_gkirs_with_no_user_events['id']))
    ]
)

In [96]:
prop = calculate_percent(open_gkirs_with_no_user_interaction['id'].nunique(), gkirs_open['id'].nunique())
print(f'{prop}% of GKIRs that remain open do not have a comment or an event from a user')

99.14% of GKIRs that remain open do not have a comment or an event from a user


## How long have not-closed GKIRS been open?

In [37]:
gkirs_open['open_for'].describe()

count                           16602
mean     1042 days 02:50:38.140791232
std       302 days 17:39:00.506957872
min          547 days 20:38:45.339442
25%          823 days 20:07:55.089442
50%          994 days 17:52:55.339442
75%         1178 days 13:50:29.339442
max         1863 days 14:14:53.339442
Name: open_for, dtype: object