Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updates to contributor resolution #1508

Merged
merged 14 commits into from
Oct 30, 2021
Merged
1 change: 1 addition & 0 deletions augur/routes/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def get_repo_info():
repo_info.fork_count,
repo_info.watchers_count,
repo_info.stars_count,
repo_info.commit_count,
repo_info.committers_count,
repo_info.open_issues,
repo_info.issues_count,
Expand Down
3 changes: 3 additions & 0 deletions docs/source/rest-api/spec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,9 @@ paths:
stars_count:
description: 'Example: 443'
type: integer
commits_count:
description: '4434'
type: integer
committers_count:
description: 'Example: 42'
type: integer
Expand Down
4 changes: 2 additions & 2 deletions metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@

__short_description__ = "Python 3 package for free/libre and open-source software community metrics & data collection"

__version__ = "0.21.6"
__release__ = "v0.21.6"
__version__ = "0.21.7"
__release__ = "v0.21.7"

__license__ = "MIT"
__copyright__ = "CHAOSS & Augurlabs 2021"
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
"Intended Audience :: Developers",
"Topic :: Software Development :: Version Control",
"License :: OSI Approved :: MIT License",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.8",
],
install_requires=[
"wheel",
Expand Down
16 changes: 9 additions & 7 deletions workers/github_worker/github_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def is_valid_pr_block(issue):
#The problem happens when ['insert'] is empty but ['all'] is not.
if len(inc_source_issues['insert']) > 0:
inc_source_issues['insert'] = self.enrich_cntrb_id(
inc_source_issues['insert'], 'user.login', action_map_additions={
inc_source_issues['insert'], str('user.login'), action_map_additions={
'insert': {
'source': ['user.node_id'],
'augur': ['gh_node_id']
Expand All @@ -113,11 +113,11 @@ def is_valid_pr_block(issue):
if is_valid_pr_block(issue) else None
),
'created_at': issue['created_at'],
'issue_title': issue['title'].encode(encoding='UTF-8',errors='backslashreplace').decode(encoding='UTF-8',errors='ignore') if (
'issue_title': str(issue['title']).encode(encoding='UTF-8',errors='backslashreplace').decode(encoding='UTF-8',errors='ignore') if (
issue['title']
) else None,
# 'issue_body': issue['body'].replace('0x00', '____') if issue['body'] else None,
'issue_body': issue['body'].encode(encoding='UTF-8',errors='backslashreplace').decode(encoding='UTF-8',errors='ignore') if (
'issue_body': str(issue['body']).encode(encoding='UTF-8',errors='backslashreplace').decode(encoding='UTF-8',errors='ignore') if (
issue['body']
) else None,
'comment_count': issue['comments'],
Expand Down Expand Up @@ -202,6 +202,8 @@ def issues_model(self, entry_info, repo_id):
if pk_source_issues:
try:
self.issue_comments_model(pk_source_issues)
issue_events_all = self.issue_events_model(pk_source_issues)
self.issue_nested_data_model(pk_source_issues, issue_events_all)
except Exception as e:
self.logger.info(f"issue comments model failed on {e}. exception registered")
stacker = traceback.format_exc()
Expand Down Expand Up @@ -261,7 +263,7 @@ def issue_comments_insert(inc_issue_comments, comment_action_map):
#This is sending empty data to enrich_cntrb_id, fix with check
if len(inc_issue_comments['insert']) > 0:
inc_issue_comments['insert'] = self.enrich_cntrb_id(
inc_issue_comments['insert'], 'user.login', action_map_additions={
inc_issue_comments['insert'], str('user.login'), action_map_additions={
'insert': {
'source': ['user.node_id'],
'augur': ['gh_node_id']
Expand Down Expand Up @@ -427,7 +429,7 @@ def issue_events_model(self, pk_source_issues):
#This is sending empty data to enrich_cntrb_id, fix with check
if len(pk_issue_events) > 0:
pk_issue_events = self.enrich_cntrb_id(
pk_issue_events, 'actor.login', action_map_additions={
pk_issue_events, str('actor.login'), action_map_additions={
'insert': {
'source': ['actor.node_id'],
'augur': ['gh_node_id']
Expand Down Expand Up @@ -494,7 +496,7 @@ def issue_nested_data_model(self, pk_source_issues, issue_events_all):
if len(events_df):
events_df = pd.DataFrame(
self.enrich_cntrb_id(
events_df.to_dict(orient='records'), 'actor.login', action_map_additions={
events_df.to_dict(orient='records'), str('actor.login'), action_map_additions={
'insert': {
'source': ['actor.node_id'],
'augur': ['gh_node_id']
Expand Down Expand Up @@ -631,7 +633,7 @@ def is_nan(value):
self.logger.info(f"source_assignees_insert after organize_needed_data: {source_assignees_insert}")
if len(source_assignees_insert) > 0:
source_assignees_insert = self.enrich_cntrb_id(
source_assignees_insert, 'login', action_map_additions={
source_assignees_insert, str('login'), action_map_additions={
'insert': {
'source': ['node_id'],
'augur': ['gh_node_id']
Expand Down
26 changes: 13 additions & 13 deletions workers/pull_request_worker/pull_request_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ def pk_source_increment_insert(inc_source_prs, action_map):
#This is sending empty data to enrich_cntrb_id, fix with check
if len(inc_source_prs['insert']) > 0:
inc_source_prs['insert'] = self.enrich_cntrb_id(
inc_source_prs['insert'], 'user.login', action_map_additions={
inc_source_prs['insert'], str('user.login'), action_map_additions={
'insert': {
'source': ['user.node_id'],
'augur': ['gh_node_id']
Expand Down Expand Up @@ -447,9 +447,9 @@ def pk_source_increment_insert(inc_source_prs, action_map):
'pr_src_number': pr['number'],
'pr_src_state': pr['state'],
'pr_src_locked': pr['locked'],
'pr_src_title': pr['title'],
'pr_src_title': str(pr['title']),
'pr_augur_contributor_id': pr['cntrb_id'],
'pr_body': pr['body'].encode(encoding='UTF-8',errors='backslashreplace').decode(encoding='UTF-8',errors='ignore') if (
'pr_body': str(pr['body']).encode(encoding='UTF-8',errors='backslashreplace').decode(encoding='UTF-8',errors='ignore') if (
pr['body']
) else None,
'pr_created_at': pr['created_at'],
Expand Down Expand Up @@ -664,7 +664,7 @@ def pr_comments_insert(inc_pr_comments, comment_action_map, comment_ref_action_m
#This is sending empty data to enrich_cntrb_id, fix with check
if len(inc_pr_comments['insert']) > 0:
inc_pr_comments['insert'] = self.enrich_cntrb_id(
inc_pr_comments['insert'], 'user.login', action_map_additions={
inc_pr_comments['insert'], str('user.login'), action_map_additions={
'insert': {
'source': ['user.node_id'],
'augur': ['gh_node_id']
Expand All @@ -677,7 +677,7 @@ def pr_comments_insert(inc_pr_comments, comment_action_map, comment_ref_action_m
pr_comments_insert = [
{
'pltfrm_id': self.platform_id,
'msg_text': comment['body'].encode(encoding='UTF-8',errors='backslashreplace').decode(encoding='UTF-8',errors='ignore') if (
'msg_text': str(comment['body']).encode(encoding='UTF-8',errors='backslashreplace').decode(encoding='UTF-8',errors='ignore') if (
comment['body']
) else None,
'msg_timestamp': comment['created_at'],
Expand Down Expand Up @@ -811,7 +811,7 @@ def pull_request_events_model(self, pk_source_prs=[]):

if len(pk_pr_events) > 0:
pk_pr_events = self.enrich_cntrb_id(
pk_pr_events, 'actor.login', action_map_additions={
pk_pr_events, str('actor.login'), action_map_additions={
'insert': {
'source': ['actor.node_id'],
'augur': ['gh_node_id']
Expand Down Expand Up @@ -890,7 +890,7 @@ def pull_request_reviews_model(self, pk_source_prs=[]):

if len(source_reviews_insert) > 0:
source_reviews_insert = self.enrich_cntrb_id(
source_reviews_insert, 'user.login', action_map_additions={
source_reviews_insert, str('user.login'), action_map_additions={
'insert': {
'source': ['user.node_id'],
'augur': ['gh_node_id']
Expand All @@ -906,7 +906,7 @@ def pull_request_reviews_model(self, pk_source_prs=[]):
'cntrb_id': review['cntrb_id'],
'pr_review_author_association': review['author_association'],
'pr_review_state': review['state'],
'pr_review_body': review['body'].encode(encoding='UTF-8',errors='backslashreplace').decode(encoding='UTF-8',errors='ignore') if (
'pr_review_body': str(review['body']).encode(encoding='UTF-8',errors='backslashreplace').decode(encoding='UTF-8',errors='ignore') if (
review['body']
) else None,
'pr_review_submitted_at': review['submitted_at'] if (
Expand Down Expand Up @@ -1007,7 +1007,7 @@ def pull_request_reviews_model(self, pk_source_prs=[]):

if len(review_msgs['insert']) > 0:
review_msgs['insert'] = self.enrich_cntrb_id(
review_msgs['insert'], 'user.login', action_map_additions={
review_msgs['insert'], str('user.login'), action_map_additions={
'insert': {
'source': ['user.node_id'],
'augur': ['gh_node_id']
Expand All @@ -1020,7 +1020,7 @@ def pull_request_reviews_model(self, pk_source_prs=[]):
review_msg_insert = [
{
'pltfrm_id': self.platform_id,
'msg_text': comment['body'].encode(encoding='UTF-8',errors='backslashreplace').decode(encoding='UTF-8',errors='ignore') if (
'msg_text': str(comment['body']).encode(encoding='UTF-8',errors='backslashreplace').decode(encoding='UTF-8',errors='ignore') if (
comment['body']
) else None,
'msg_timestamp': comment['created_at'],
Expand Down Expand Up @@ -1209,7 +1209,7 @@ def pull_request_nested_data_model(self, pk_source_prs=[]):

if len(source_reviewers_insert) > 0:
source_reviewers_insert = self.enrich_cntrb_id(
source_reviewers_insert, 'login', action_map_additions={
source_reviewers_insert, str('login'), action_map_additions={
'insert': {
'source': ['node_id'],
'augur': ['gh_node_id']
Expand Down Expand Up @@ -1254,7 +1254,7 @@ def pull_request_nested_data_model(self, pk_source_prs=[]):

if len(source_assignees_insert) > 0:
source_assignees_insert = self.enrich_cntrb_id(
source_assignees_insert, 'login', action_map_additions={
source_assignees_insert, str('login'), action_map_additions={
'insert': {
'source': ['node_id'],
'augur': ['gh_node_id']
Expand Down Expand Up @@ -1299,7 +1299,7 @@ def pull_request_nested_data_model(self, pk_source_prs=[]):

if len(source_meta_insert) > 0:
source_meta_insert = self.enrich_cntrb_id(
source_meta_insert, 'user.login', action_map_additions={
source_meta_insert, str('user.login'), action_map_additions={
'insert': {
'source': ['user.node_id'],
'augur': ['gh_node_id']
Expand Down
2 changes: 1 addition & 1 deletion workers/worker_git_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,7 @@ def enrich_cntrb_id(
self.logger.info("login: {}".format(data[f'{prefix}login']))

try:
url = ("https://api.github.com/users/" + data[f'{prefix}login'])
url = ("https://api.github.com/users/" + str(data[f'{prefix}login']))
except Exception as e:
self.logger.info(f"Error when creating url: {e}. Data: {data}")
continue
Expand Down