In [4]:
import numpy as np
import pandas as pd
import seaborn as sns
from datetime import datetime
pd.set_option('display.max_colwidth', None)


### Load Data

In [5]:
issues = pd.read_csv('./csv/greenkeeper_issues.csv')
comments = pd.read_csv('./csv/greenkeeper_comments.csv')
events = pd.read_csv('./csv/greenkeeper_events.csv')
commits = pd.read_csv('./csv/greenkeeper_commits.csv')
package_names = pd.read_csv('./csv/greenkeeper_package_names.csv')

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


#### Custome Helper Functions

In [6]:
def get_issue(id):
    return issues[issues['issue_id'] == id]

def get_comment(id):
    return comments[comments['comment_id'] == id]

def get_comments_for_issue(id):
    return comments[comments['comment_issue_id'] == id]

def calculate_percent(numer, denom):
    return round((numer/denom)*100, 2)

#### Augment Data

Catoegorize Columns

In [7]:
events['event_description'] = events['event_description'].astype('category')

### How often are Greenkeeper Build Breakage Issue Reports closed?

In [8]:
n_issues = len(issues)
n_closed_issues = len(issues[issues.issue_state == "closed"])
percent_closed = calculate_percent(n_closed_issues, n_issues)
print(f"{percent_closed}% of Greenkeeper issues are eventually closed")

79.82% of Greenkeeper issues are eventually closed


### How much time do Greenkeeper issues take to be closed?

In [9]:
closed_issues = issues[issues["issue_state"] == "closed"]
def calculate_time_diff(issue):
    d1 = issue.issue_created_at
    d2 = issue.issue_closed_at
    FMT = '%Y-%m-%dT%H:%M:%S%z'
    return (datetime.strptime(d2, FMT) - datetime.strptime(d1, FMT))

time_to_close_issue = pd.DataFrame({
    'issue_id': closed_issues.issue_id, 
    'time_to_close_issue': closed_issues.apply(lambda row: calculate_time_diff(row), axis=1),
})

median_time = time_to_close_issue['time_to_close_issue'].median()
print(f"The median time to close the issue is {median_time} hours")


The median time to close the issue is 4 days 11:04:25 hours


In [10]:
print("Five number summary for the time to close the Greenkeeper issue:")
print(time_to_close_issue['time_to_close_issue'].describe())

Five number summary for the time to close the Greenkeeper issue:
count                      98341
mean     43 days 04:48:28.851343
std      98 days 22:44:48.052636
min              0 days 00:00:04
25%              0 days 09:58:19
50%              4 days 11:04:25
75%             35 days 07:48:16
max           1219 days 02:01:10
Name: time_to_close_issue, dtype: object


In [11]:
time_to_close_issue['time_to_close_issue_hours'] = \
    (time_to_close_issue['time_to_close_issue'].astype('timedelta64[s]')) / 60 / 60

# TODO: Figure out this graph!

# time_to_close_issue['time_to_close_issue_hours']
# ax = sns.kdeplot(
#     time_to_close_issue['time_to_close_issue_hours'],
#     shade=True,
#     bw=.01,
# )
# ax. set(xscale="log")
# ax = sns.violinplot(
#     data=time_to_close_issue,
#     y="time_to_close_issue_hours"
# )
# ax.set(yscale="symlog")
# ax = sns.distplot(time_to_close_issue["time_to_close_issue_hours"])
# ax.set(xscale="log")

### How many comments do Greenkeeper Breaking Issues have? How many of these are from Greenkeeper?

In [12]:
print("Five number summary for the number of comments on Greenkeeper Breakage issues:")
print(issues["issue_num_comments"].describe())

comments_by_greenkeeper = comments[(comments['comment_user_type'] == "Bot") & comments['comment_user_login'].str.contains("greenkeeper")]
percent_comments_by_greenkeeper = calculate_percent(len(comments_by_greenkeeper), len(comments))
print(f"{percent_comments_by_greenkeeper}% of comments on breaking issue reports opened by Greenkeeper are from the Greenkeeper bot.")



Five number summary for the number of comments on Greenkeeper Breakage issues:
count    123197.000000
mean          4.775555
std          29.850343
min           0.000000
25%           1.000000
50%           1.000000
75%           3.000000
max        2500.000000
Name: issue_num_comments, dtype: float64
96.89% of comments on breaking issue reports opened by Greenkeeper are from the Greenkeeper bot.


Create issue_first_comment frame

In [23]:
## This could take a few minutes
def get_first_comment(issue):
    issue_comments = get_comments_for_issue(issue['issue_id'])
    if issue_comments.empty:
        return None
    first_comment = issue_comments[issue_comments['comment_id'] == issue_comments['comment_id'].min()]
    first_comment
    return first_comment['comment_id'].values[0].astype(np.int64)


# issues.head(10).apply(lambda row: get_first_comment(row), axis=1),

issue_first_comment = pd.DataFrame({
    'issue_id': issues.issue_id, 
    'comment_id': issues.apply(lambda row: get_first_comment(row), axis=1),
})
issue_first_comment_filtered = issue_first_comment[pd.notna(issue_first_comment['comment_id'])]

In [29]:
issue_first_comment_filtered = pd.DataFrame(issue_first_comment[pd.notna(issue_first_comment['comment_id'])])
percent_with_min_one_comment = calculate_percent(len(issue_first_comment_filtered), len(issue_first_comment))
print(f"{percent_with_min_one_comment}% of issues have at least 1 comment.")

85.9% of issues have at least 1 comment.


### How successful is pinning the dependency?

In [37]:
## This might take a few minutes

pin_fail_string = "your tests are still failing"
pin_success_string = "your tests are passing again"

def comment_body_contains_string(issue_first_comment, string):
    if pd.isna(issue_first_comment['comment_id']):
        return False
    comment = comments[comments['comment_id'] == issue_first_comment['comment_id']]
    return comment['comment_body'].str.contains(string, na=False).values[0]


issue_first_comment_filtered['successful_pin'] = \
    issue_first_comment_filtered.apply(lambda row: comment_body_contains_string(row, pin_success_string), axis=1)

issue_first_comment_filtered['failed_pin'] = \
    issue_first_comment_filtered.apply(lambda row: comment_body_contains_string(row, pin_fail_string), axis=1)

In [46]:
pin_attempts = issue_first_comment_filtered[
    (issue_first_comment_filtered['successful_pin'] == True) | 
    (issue_first_comment_filtered['failed_pin'] == True)
]
succ_pin_count = len(pin_attempts[pin_attempts['successful_pin'] == True])
failed_pin_count = len(pin_attempts[pin_attempts['failed_pin'] == True])
total_pin_count = len(pin_attempts)
percent_pin_attempted = calculate_percent(total_pin_count, len(issues))
print(f"{percent_pin_attempted}% of issues were attempted to be resolved by automatically pinning the dependency.")
percent_succ_pin_count = calculate_percent(succ_pin_count, total_pin_count)
percent_failed_pin_count = calculate_percent(failed_pin_count, total_pin_count)
print(f"Of those, {percent_succ_pin_count}% were successful, and {percent_failed_pin_count}% were not successful.")

71.86% of issues were attempted to be resolved by automatically pinning the dependency.
Of those, 33.07% were successful, and 66.93% were not successful.


In [94]:
succ_pin_ids = pin_attempts[pin_attempts['successful_pin'] == True]['issue_id']
issues_with_succ_pin = issues[issues['issue_id'].isin(succ_pin_ids)]
# issues_with_succ_pin[issues_with_succ_pin['issue_num_comments'] > 1]['issue_url']

issues_with_succ_pin.head()

Unnamed: 0,issue_id,issue_number,issue_url,issue_title,issue_state,issue_is_locked,issue_created_at,issue_updated_at,issue_closed_at,issue_user_login,issue_labels,issue_num_comments,issue_events_url,issue_dependency_name,issue_dependency_type,issue_dependency_actual_version,issue_dependency_next_version,issue_dependency_bundle_name,issue_body_parser,issue_repo_url
11,629409768,24,https://api.github.com/repos/PeterNgTr/codeceptjs-bshelper/issues/24,An in-range update of tinyurl is breaking the build 🚨,open,False,2020-06-02T18:09:34Z,2020-06-02T18:10:25Z,,greenkeeper[bot],[greenkeeper],1,https://api.github.com/repos/PeterNgTr/codeceptjs-bshelper/issues/24/events,,,,,,,https://api.github.com/repos/PeterNgTr/codeceptjs-bshelper
15,629555588,22,https://api.github.com/repos/DaNautilus/node-native-helpers/issues/22,An in-range update of typedoc-plugin-markdown is breaking the build 🚨,open,False,2020-06-02T22:27:25Z,2020-06-02T22:28:35Z,,greenkeeper[bot],[greenkeeper],1,https://api.github.com/repos/DaNautilus/node-native-helpers/issues/22/events,,,,,,,https://api.github.com/repos/DaNautilus/node-native-helpers
25,628104259,54,https://api.github.com/repos/Apollon77/node-mbus/issues/54,An in-range update of nyc is breaking the build 🚨,closed,False,2020-06-01T02:16:00Z,2020-06-29T22:08:43Z,2020-06-29T22:08:43Z,greenkeeper[bot],[greenkeeper],1,https://api.github.com/repos/Apollon77/node-mbus/issues/54/events,,,,,,,https://api.github.com/repos/Apollon77/node-mbus
43,629135051,146,https://api.github.com/repos/ToonvanStrijp/nestjs-i18n/issues/146,An in-range update of graphql-tools is breaking the build 🚨,closed,False,2020-06-02T11:42:05Z,2020-06-02T13:52:30Z,2020-06-02T13:52:30Z,greenkeeper[bot],[greenkeeper],1,https://api.github.com/repos/ToonvanStrijp/nestjs-i18n/issues/146/events,,,,,,,https://api.github.com/repos/ToonvanStrijp/nestjs-i18n
62,627750132,121,https://api.github.com/repos/koopjs/koop-core/issues/121,An in-range update of @koopjs/logger is breaking the build 🚨,open,False,2020-05-30T14:52:25Z,2020-05-30T15:57:32Z,,greenkeeper[bot],[greenkeeper],2,https://api.github.com/repos/koopjs/koop-core/issues/121/events,,,,,,,https://api.github.com/repos/koopjs/koop-core


Get events for issues with successful pin.

It doesn't look like there is any indication on the issue report that the user took the bots advice and pinned the dependency using the bots link.

See issue_id=614163469 comment_id=625987320

In [113]:
issue_ids_with_succ_pin = issues_with_succ_pin['issue_id']
comments_for_issues_with_successful_pin = comments[comments['comment_issue_id'].isin(issue_ids_with_succ_pin)]
events_for_issues_with_successful_pin = events[events['event_issue_id'].isin(issue_ids_with_succ_pin)]

user_comments_for_issues_with_successful_pin = \
    comments_for_issues_with_successful_pin[comments_for_issues_with_successful_pin['comment_user_type'] == 'User']
user_comments_for_issues_with_successful_pin.head(20)
get_comments_for_issue(614163469)

Unnamed: 0,comment_issue_url,comment_issue_id,comment_id,comment_url,comment_created_at,comment_updated_at,comment_body,comment_author_association,comment_user_id,comment_user_login,comment_user_type
1957,https://api.github.com/repos/adobe/aio-lib-console/issues/2,614163469,625343504,https://api.github.com/repos/adobe/aio-lib-console/issues/comments/625343504,2020-05-07T15:59:26Z,2020-05-07T15:59:26Z,After pinning to **3.10.1** your tests are passing again. [Downgrade this dependency 📌](https://github.com/adobe/aio-lib-console/compare/master...adobe:greenkeeper%2Fswagger-client-pin-3.10.1).,CONTRIBUTOR,23040076,greenkeeper[bot],Bot
1958,https://api.github.com/repos/adobe/aio-lib-console/issues/2,614163469,625987320,https://api.github.com/repos/adobe/aio-lib-console/issues/comments/625987320,2020-05-08T19:46:55Z,2020-05-08T19:46:55Z,pinned,MEMBER,36107,shazron,User


Unnamed: 0,issue_id,issue_number,issue_url,issue_title,issue_state,issue_is_locked,issue_created_at,issue_updated_at,issue_closed_at,issue_user_login,issue_labels,issue_num_comments,issue_events_url,issue_dependency_name,issue_dependency_type,issue_dependency_actual_version,issue_dependency_next_version,issue_dependency_bundle_name,issue_body_parser,issue_repo_url
1313,614163469,2,https://api.github.com/repos/adobe/aio-lib-console/issues/2,An in-range update of swagger-client is breaking the build 🚨,closed,False,2020-05-07T15:51:18Z,2020-05-08T19:46:56Z,2020-05-08T19:46:55Z,greenkeeper[bot],[greenkeeper],2,https://api.github.com/repos/adobe/aio-lib-console/issues/2/events,,,,,,,https://api.github.com/repos/adobe/aio-lib-console


In [108]:
get_comments_for_issue(627750132)

Unnamed: 0,comment_issue_url,comment_issue_id,comment_id,comment_url,comment_created_at,comment_updated_at,comment_body,comment_author_association,comment_user_id,comment_user_login,comment_user_type
70,https://api.github.com/repos/koopjs/koop-core/issues/121,627750132,636341677,https://api.github.com/repos/koopjs/koop-core/issues/comments/636341677,2020-05-30T14:54:57Z,2020-05-30T14:54:57Z,After pinning to **2.0.4** your tests are passing again. [Downgrade this dependency 📌](https://github.com/koopjs/koop-core/compare/master...koopjs:greenkeeper%2F%40koopjs%2Flogger-pin-2.0.4).,CONTRIBUTOR,23040076,greenkeeper[bot],Bot
71,https://api.github.com/repos/koopjs/koop-core/issues/121,627750132,636350331,https://api.github.com/repos/koopjs/koop-core/issues/comments/636350331,2020-05-30T15:57:32Z,2020-05-30T15:57:32Z,"- The `dependency` [@koopjs/logger](https://github.com/koopjs/koop-logger) was updated from `2.0.5` to `2.0.6`.\n\n\nYour tests are passing again with this update. [Explicitly upgrade to this version 🚀](https://github.com/koopjs/koop-core/compare/master...koopjs:greenkeeper%2F%40koopjs%2Flogger-2.0.6)\n\n\n\n<details>\n<summary>Release Notes for v2.0.6</summary>\n\n<h3>Fixed</h3>\n<ul>\n<li>Update winston syntax</li>\n</ul>\n</details>\n\n<details>\n<summary>Commits</summary>\n<p>The new version differs by 3 commits.</p>\n<ul>\n<li><a href=""https://urls.greenkeeper.io/koopjs/koop-logger/commit/16d26bd5a24774946987b2fe4dd862500ddd4e2e""><code>16d26bd</code></a> <code>:package: 2.0.6</code></li>\n<li><a href=""https://urls.greenkeeper.io/koopjs/koop-logger/commit/437fe71f81db973e7441740613ad88dfa8381149""><code>437fe71</code></a> <code>Merge pull request #9 from koopjs/p/winston-syntax</code></li>\n<li><a href=""https://urls.greenkeeper.io/koopjs/koop-logger/commit/c23408f68551662b623ddbe457233f98e5914889""><code>c23408f</code></a> <code>Update winston syntax.</code></li>\n</ul>\n<p>See the <a href=""https://urls.greenkeeper.io/koopjs/koop-logger/compare/c9f6a7adc5f5738f8731c7e4444c70905c2fe52f...16d26bd5a24774946987b2fe4dd862500ddd4e2e"">full diff</a></p>\n</details>",CONTRIBUTOR,23040076,greenkeeper[bot],Bot


In [81]:
events[events['event_issue_id'] == 3513599523]


Unnamed: 0,event_issue_url,event_issue_id,event_id,event_url,event_created_at,event_description,event_actor_id,event_actor_login,event_commit_id,event_commit_url,event_label


In [92]:
# events[(events['event_description'] != 'labeled') & (events['event_description'] != 'closed') & (events['event_description'] != 'reopened')]
events[events['event_description'] == 'referenced'].head()

Unnamed: 0,event_issue_url,event_issue_id,event_id,event_url,event_created_at,event_description,event_actor_id,event_actor_login,event_commit_id,event_commit_url,event_label
32,https://api.github.com/repos/raymond-lam/writable-stream-and-promise/issues/15,628092079,3513599523,https://api.github.com/repos/raymond-lam/writable-stream-and-promise/issues/events/3513599523,2020-07-05T19:33:20Z,referenced,13293035.0,raymond-lam,953cf69ef9a3a5f6908b05c2594d4b96e881e913,https://api.github.com/repos/raymond-lam/writable-stream-and-promise/commits/953cf69ef9a3a5f6908b05c2594d4b96e881e913,
34,https://api.github.com/repos/raymond-lam/writable-stream-and-promise/issues/15,628092079,3513601137,https://api.github.com/repos/raymond-lam/writable-stream-and-promise/issues/events/3513601137,2020-07-05T19:35:29Z,referenced,13293035.0,raymond-lam,d2c406785c3f4daa423502a62835254f24059741,https://api.github.com/repos/raymond-lam/writable-stream-and-promise/commits/d2c406785c3f4daa423502a62835254f24059741,
669,https://api.github.com/repos/sharvit/generator-node-mdl/issues/93,622407198,3635527854,https://api.github.com/repos/sharvit/generator-node-mdl/issues/events/3635527854,2020-08-08T09:55:50Z,referenced,1262502.0,sharvit,cb2a509a8e3c17f81a058bb36dc5ff15d00a7888,https://api.github.com/repos/sharvit/generator-node-mdl/commits/cb2a509a8e3c17f81a058bb36dc5ff15d00a7888,
2009,https://api.github.com/repos/jubianchi/semver-check/issues/76,614969004,3605497508,https://api.github.com/repos/jubianchi/semver-check/issues/events/3605497508,2020-07-30T19:41:55Z,referenced,327237.0,jubianchi,88e6ff1cf2f578b2fcf052f221fc17765215f12f,https://api.github.com/repos/jubianchi/semver-check/commits/88e6ff1cf2f578b2fcf052f221fc17765215f12f,
2011,https://api.github.com/repos/jubianchi/semver-check/issues/76,614969004,3605516452,https://api.github.com/repos/jubianchi/semver-check/issues/events/3605516452,2020-07-30T19:47:08Z,referenced,327237.0,jubianchi,f19a165d1ab02d8cad202b1782415b4adf29f948,https://api.github.com/repos/jubianchi/semver-check/commits/f19a165d1ab02d8cad202b1782415b4adf29f948,


In [93]:
get_issue(628092079)

Unnamed: 0,issue_id,issue_number,issue_url,issue_title,issue_state,issue_is_locked,issue_created_at,issue_updated_at,issue_closed_at,issue_user_login,issue_labels,issue_num_comments,issue_events_url,issue_dependency_name,issue_dependency_type,issue_dependency_actual_version,issue_dependency_next_version,issue_dependency_bundle_name,issue_body_parser,issue_repo_url
24,628092079,15,https://api.github.com/repos/raymond-lam/writable-stream-and-promise/issues/15,An in-range update of nyc is breaking the build 🚨,closed,False,2020-06-01T01:25:17Z,2020-07-05T19:35:27Z,2020-07-05T19:35:27Z,greenkeeper[bot],[greenkeeper],1,https://api.github.com/repos/raymond-lam/writable-stream-and-promise/issues/15/events,,,,,,,https://api.github.com/repos/raymond-lam/writable-stream-and-promise


### Comments by users

In [18]:
user_comments = comments[comments['comment_user_type'] == 'User']
issues_with_user_comments = issues[issues['issue_id'].isin(user_comments['comment_issue_id'])]

How long until first user response?

In [19]:
issues_with_user_comments[['issue_url', "issue_id"]].head(10)

Unnamed: 0,issue_url,issue_id
21,https://api.github.com/repos/cssinjs/caniuse-support/issues/71,629741164
26,https://api.github.com/repos/cssinjs/caniuse-support/issues/70,628799957
34,https://api.github.com/repos/cerner/carbon-graphs/issues/223,628100727
42,https://api.github.com/repos/atlassian/stricter/issues/121,628604170
44,https://api.github.com/repos/cerner/carbon-graphs/issues/226,628657186
72,https://api.github.com/repos/cssinjs/css-vendor/issues/209,627843386
106,https://api.github.com/repos/cerner/carbon-graphs/issues/221,627134159
194,https://api.github.com/repos/Mermade/oas-kit/issues/231,625292166
213,https://api.github.com/repos/cerner/carbon-graphs/issues/218,625315492
240,https://api.github.com/repos/johnpapa/vscode-peacock/issues/396,625109277


In [20]:
issue_id = 629741164

events[events['event_issue_id'] == issue_id]


Unnamed: 0,event_issue_url,event_issue_id,event_id,event_url,event_created_at,event_description,event_actor_id,event_actor_login,event_commit_id,event_commit_url,event_label
25,https://api.github.com/repos/cssinjs/caniuse-support/issues/71,629741164,3400682154,https://api.github.com/repos/cssinjs/caniuse-support/issues/events/3400682154,2020-06-03T06:53:03Z,labeled,23040076.0,greenkeeper[bot],,,greenkeeper
26,https://api.github.com/repos/cssinjs/caniuse-support/issues/71,629741164,3604755981,https://api.github.com/repos/cssinjs/caniuse-support/issues/events/3604755981,2020-07-30T16:33:52Z,closed,1459899.0,AleshaOleg,,,


In [21]:
get_comments_for_issue(issue_id)

Unnamed: 0,comment_issue_url,comment_issue_id,comment_id,comment_url,comment_created_at,comment_updated_at,comment_body,comment_author_association,comment_user_id,comment_user_login,comment_user_type
19,https://api.github.com/repos/cssinjs/caniuse-support/issues/71,629741164,637996706,https://api.github.com/repos/cssinjs/caniuse-support/issues/comments/637996706,2020-06-03T06:54:19Z,2020-06-03T06:54:19Z,After pinning to **1.3.0** your tests are still failing. The reported issue _might_ not affect your project. These imprecisions are caused by inconsistent test results.,CONTRIBUTOR,23040076,greenkeeper[bot],Bot
20,https://api.github.com/repos/cssinjs/caniuse-support/issues/71,629741164,638021467,https://api.github.com/repos/cssinjs/caniuse-support/issues/comments/638021467,2020-06-03T07:42:43Z,2020-06-03T07:42:43Z,"- The `devDependency` [merge2](https://github.com/teambition/merge2) was updated from `1.4.0` to `1.4.1`.\n\n\nYour tests are passing again with this update. [Explicitly upgrade to this version 🚀](https://github.com/cssinjs/caniuse-support/compare/master...cssinjs:greenkeeper%2Fmerge2-1.4.1)\n\n\n\n<details>\n<summary>Commits</summary>\n<p>The new version differs by 1 commits.</p>\n<ul>\n<li><a href=""https://urls.greenkeeper.io/teambition/merge2/commit/a7300d42983d668f61d751ad2bfc42c880a29a36""><code>a7300d4</code></a> <code>Fixed: remove error listener on end.</code></li>\n</ul>\n<p>See the <a href=""https://urls.greenkeeper.io/teambition/merge2/compare/5dc3ea60a18feed7fd26f29ec2f06ddb89bf88cd...a7300d42983d668f61d751ad2bfc42c880a29a36"">full diff</a></p>\n</details>",CONTRIBUTOR,23040076,greenkeeper[bot],Bot
21,https://api.github.com/repos/cssinjs/caniuse-support/issues/71,629741164,666510646,https://api.github.com/repos/cssinjs/caniuse-support/issues/comments/666510646,2020-07-30T16:33:52Z,2020-07-30T16:33:52Z,https://github.com/cssinjs/caniuse-support/pull/73,MEMBER,1459899,AleshaOleg,User
