Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updates to contributor resolution #1508

Merged
merged 14 commits into from
Oct 30, 2021
Merged
1 change: 1 addition & 0 deletions augur/routes/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def get_repo_info():
repo_info.fork_count,
repo_info.watchers_count,
repo_info.stars_count,
repo_info.commit_count,
repo_info.committers_count,
repo_info.open_issues,
repo_info.issues_count,
Expand Down
3 changes: 3 additions & 0 deletions docs/source/rest-api/spec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,9 @@ paths:
stars_count:
description: 'Example: 443'
type: integer
commits_count:
description: '4434'
type: integer
committers_count:
description: 'Example: 42'
type: integer
Expand Down
4 changes: 2 additions & 2 deletions metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@

__short_description__ = "Python 3 package for free/libre and open-source software community metrics & data collection"

__version__ = "0.21.6"
__release__ = "v0.21.6"
__version__ = "0.21.7"
__release__ = "v0.21.7"

__license__ = "MIT"
__copyright__ = "CHAOSS & Augurlabs 2021"
3 changes: 3 additions & 0 deletions schema/create_schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
\i schema/generate/05-seed_data.sql
\i schema/generate/79-schema_update_81.sql
\i schema/generate/80-schema_update_82.sql
\i schema/generate/83-schema_update_84.sql



-- prior update scripts incorporated into
-- augur.sql file for release v0.21.1
Expand Down
39 changes: 39 additions & 0 deletions schema/generate/83-schema_update_84.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
BEGIN;

CREATE OR REPLACE FUNCTION create_constraint_if_not_exists (t_name text, c_name text, constraint_sql text)
RETURNS void
AS
$BODY$
BEGIN
-- Look for our constraint
IF NOT EXISTS (SELECT constraint_name
FROM information_schema.constraint_column_usage
WHERE constraint_name = c_name) THEN
EXECUTE 'ALTER TABLE ' || t_name || ' ADD CONSTRAINT ' || c_name || ' ' || constraint_sql;
END IF;
END;
$BODY$
LANGUAGE plpgsql VOLATILE;

update "augur_operations"."augur_settings" set value = 83 where setting = 'augur_data_version';


COMMIT;

BEGIN;

DELETE FROM pull_request_files a
USING pull_request_files b
WHERE a.pr_file_id < b.pr_file_id
AND a.pull_request_id = b.pull_request_id
AND a.repo_id = b.repo_id
AND a.pr_file_path = b.pr_file_path;

SELECT create_constraint_if_not_exists('augur_data.pull_request_files', 'prfiles_unique', 'UNIQUE ("pull_request_id", "repo_id", "pr_file_path");');

SELECT create_constraint_if_not_exists('augur_data.pull_request_commits', 'pr_commit_nk', 'UNIQUE ("pull_request_id", "repo_id", "pr_cmt_sha")');

update "augur_operations"."augur_settings" set value = 84 where setting = 'augur_data_version';


COMMIT;
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
"Intended Audience :: Developers",
"Topic :: Software Development :: Version Control",
"License :: OSI Approved :: MIT License",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.8",
],
install_requires=[
"wheel",
Expand Down
16 changes: 9 additions & 7 deletions workers/github_worker/github_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def is_valid_pr_block(issue):
#The problem happens when ['insert'] is empty but ['all'] is not.
if len(inc_source_issues['insert']) > 0:
inc_source_issues['insert'] = self.enrich_cntrb_id(
inc_source_issues['insert'], 'user.login', action_map_additions={
inc_source_issues['insert'], str('user.login'), action_map_additions={
'insert': {
'source': ['user.node_id'],
'augur': ['gh_node_id']
Expand All @@ -113,11 +113,11 @@ def is_valid_pr_block(issue):
if is_valid_pr_block(issue) else None
),
'created_at': issue['created_at'],
'issue_title': issue['title'].encode(encoding='UTF-8',errors='backslashreplace').decode(encoding='UTF-8',errors='ignore') if (
'issue_title': str(issue['title']).encode(encoding='UTF-8',errors='backslashreplace').decode(encoding='UTF-8',errors='ignore') if (
issue['title']
) else None,
# 'issue_body': issue['body'].replace('0x00', '____') if issue['body'] else None,
'issue_body': issue['body'].encode(encoding='UTF-8',errors='backslashreplace').decode(encoding='UTF-8',errors='ignore') if (
'issue_body': str(issue['body']).encode(encoding='UTF-8',errors='backslashreplace').decode(encoding='UTF-8',errors='ignore') if (
issue['body']
) else None,
'comment_count': issue['comments'],
Expand Down Expand Up @@ -202,6 +202,8 @@ def issues_model(self, entry_info, repo_id):
if pk_source_issues:
try:
self.issue_comments_model(pk_source_issues)
issue_events_all = self.issue_events_model(pk_source_issues)
self.issue_nested_data_model(pk_source_issues, issue_events_all)
except Exception as e:
self.logger.info(f"issue comments model failed on {e}. exception registered")
stacker = traceback.format_exc()
Expand Down Expand Up @@ -261,7 +263,7 @@ def issue_comments_insert(inc_issue_comments, comment_action_map):
#This is sending empty data to enrich_cntrb_id, fix with check
if len(inc_issue_comments['insert']) > 0:
inc_issue_comments['insert'] = self.enrich_cntrb_id(
inc_issue_comments['insert'], 'user.login', action_map_additions={
inc_issue_comments['insert'], str('user.login'), action_map_additions={
'insert': {
'source': ['user.node_id'],
'augur': ['gh_node_id']
Expand Down Expand Up @@ -427,7 +429,7 @@ def issue_events_model(self, pk_source_issues):
#This is sending empty data to enrich_cntrb_id, fix with check
if len(pk_issue_events) > 0:
pk_issue_events = self.enrich_cntrb_id(
pk_issue_events, 'actor.login', action_map_additions={
pk_issue_events, str('actor.login'), action_map_additions={
'insert': {
'source': ['actor.node_id'],
'augur': ['gh_node_id']
Expand Down Expand Up @@ -494,7 +496,7 @@ def issue_nested_data_model(self, pk_source_issues, issue_events_all):
if len(events_df):
events_df = pd.DataFrame(
self.enrich_cntrb_id(
events_df.to_dict(orient='records'), 'actor.login', action_map_additions={
events_df.to_dict(orient='records'), str('actor.login'), action_map_additions={
'insert': {
'source': ['actor.node_id'],
'augur': ['gh_node_id']
Expand Down Expand Up @@ -631,7 +633,7 @@ def is_nan(value):
self.logger.info(f"source_assignees_insert after organize_needed_data: {source_assignees_insert}")
if len(source_assignees_insert) > 0:
source_assignees_insert = self.enrich_cntrb_id(
source_assignees_insert, 'login', action_map_additions={
source_assignees_insert, str('login'), action_map_additions={
'insert': {
'source': ['node_id'],
'augur': ['gh_node_id']
Expand Down
27 changes: 14 additions & 13 deletions workers/pull_request_worker/pull_request_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ def pk_source_increment_insert(inc_source_prs, action_map):
#This is sending empty data to enrich_cntrb_id, fix with check
if len(inc_source_prs['insert']) > 0:
inc_source_prs['insert'] = self.enrich_cntrb_id(
inc_source_prs['insert'], 'user.login', action_map_additions={
inc_source_prs['insert'], str('user.login'), action_map_additions={
'insert': {
'source': ['user.node_id'],
'augur': ['gh_node_id']
Expand Down Expand Up @@ -447,9 +447,9 @@ def pk_source_increment_insert(inc_source_prs, action_map):
'pr_src_number': pr['number'],
'pr_src_state': pr['state'],
'pr_src_locked': pr['locked'],
'pr_src_title': pr['title'],
'pr_src_title': str(pr['title']),
'pr_augur_contributor_id': pr['cntrb_id'],
'pr_body': pr['body'].encode(encoding='UTF-8',errors='backslashreplace').decode(encoding='UTF-8',errors='ignore') if (
'pr_body': str(pr['body']).encode(encoding='UTF-8',errors='backslashreplace').decode(encoding='UTF-8',errors='ignore') if (
pr['body']
) else None,
'pr_created_at': pr['created_at'],
Expand Down Expand Up @@ -664,7 +664,7 @@ def pr_comments_insert(inc_pr_comments, comment_action_map, comment_ref_action_m
#This is sending empty data to enrich_cntrb_id, fix with check
if len(inc_pr_comments['insert']) > 0:
inc_pr_comments['insert'] = self.enrich_cntrb_id(
inc_pr_comments['insert'], 'user.login', action_map_additions={
inc_pr_comments['insert'], str('user.login'), action_map_additions={
'insert': {
'source': ['user.node_id'],
'augur': ['gh_node_id']
Expand All @@ -677,7 +677,7 @@ def pr_comments_insert(inc_pr_comments, comment_action_map, comment_ref_action_m
pr_comments_insert = [
{
'pltfrm_id': self.platform_id,
'msg_text': comment['body'].encode(encoding='UTF-8',errors='backslashreplace').decode(encoding='UTF-8',errors='ignore') if (
'msg_text': str(comment['body']).encode(encoding='UTF-8',errors='backslashreplace').decode(encoding='UTF-8',errors='ignore') if (
comment['body']
) else None,
'msg_timestamp': comment['created_at'],
Expand Down Expand Up @@ -811,7 +811,7 @@ def pull_request_events_model(self, pk_source_prs=[]):

if len(pk_pr_events) > 0:
pk_pr_events = self.enrich_cntrb_id(
pk_pr_events, 'actor.login', action_map_additions={
pk_pr_events, str('actor.login'), action_map_additions={
'insert': {
'source': ['actor.node_id'],
'augur': ['gh_node_id']
Expand Down Expand Up @@ -890,7 +890,7 @@ def pull_request_reviews_model(self, pk_source_prs=[]):

if len(source_reviews_insert) > 0:
source_reviews_insert = self.enrich_cntrb_id(
source_reviews_insert, 'user.login', action_map_additions={
source_reviews_insert, str('user.login'), action_map_additions={
'insert': {
'source': ['user.node_id'],
'augur': ['gh_node_id']
Expand All @@ -906,7 +906,7 @@ def pull_request_reviews_model(self, pk_source_prs=[]):
'cntrb_id': review['cntrb_id'],
'pr_review_author_association': review['author_association'],
'pr_review_state': review['state'],
'pr_review_body': review['body'].encode(encoding='UTF-8',errors='backslashreplace').decode(encoding='UTF-8',errors='ignore') if (
'pr_review_body': str(review['body']).encode(encoding='UTF-8',errors='backslashreplace').decode(encoding='UTF-8',errors='ignore') if (
review['body']
) else None,
'pr_review_submitted_at': review['submitted_at'] if (
Expand Down Expand Up @@ -1007,7 +1007,7 @@ def pull_request_reviews_model(self, pk_source_prs=[]):

if len(review_msgs['insert']) > 0:
review_msgs['insert'] = self.enrich_cntrb_id(
review_msgs['insert'], 'user.login', action_map_additions={
review_msgs['insert'], str('user.login'), action_map_additions={
'insert': {
'source': ['user.node_id'],
'augur': ['gh_node_id']
Expand All @@ -1020,7 +1020,7 @@ def pull_request_reviews_model(self, pk_source_prs=[]):
review_msg_insert = [
{
'pltfrm_id': self.platform_id,
'msg_text': comment['body'].encode(encoding='UTF-8',errors='backslashreplace').decode(encoding='UTF-8',errors='ignore') if (
'msg_text': str(comment['body']).encode(encoding='UTF-8',errors='backslashreplace').decode(encoding='UTF-8',errors='ignore') if (
comment['body']
) else None,
'msg_timestamp': comment['created_at'],
Expand Down Expand Up @@ -1209,7 +1209,7 @@ def pull_request_nested_data_model(self, pk_source_prs=[]):

if len(source_reviewers_insert) > 0:
source_reviewers_insert = self.enrich_cntrb_id(
source_reviewers_insert, 'login', action_map_additions={
source_reviewers_insert, str('login'), action_map_additions={
'insert': {
'source': ['node_id'],
'augur': ['gh_node_id']
Expand Down Expand Up @@ -1254,7 +1254,7 @@ def pull_request_nested_data_model(self, pk_source_prs=[]):

if len(source_assignees_insert) > 0:
source_assignees_insert = self.enrich_cntrb_id(
source_assignees_insert, 'login', action_map_additions={
source_assignees_insert, str('login'), action_map_additions={
'insert': {
'source': ['node_id'],
'augur': ['gh_node_id']
Expand Down Expand Up @@ -1299,7 +1299,7 @@ def pull_request_nested_data_model(self, pk_source_prs=[]):

if len(source_meta_insert) > 0:
source_meta_insert = self.enrich_cntrb_id(
source_meta_insert, 'user.login', action_map_additions={
source_meta_insert, str('user.login'), action_map_additions={
'insert': {
'source': ['user.node_id'],
'augur': ['gh_node_id']
Expand Down Expand Up @@ -1338,6 +1338,7 @@ def query_pr_repo(self, pr_repo, pr_repo_type, pr_meta_id):

table = 'pull_request_repo'
duplicate_col_map = {'pr_src_repo_id': 'id'}
##TODO Need to add pull request closed here.
update_col_map = {}
table_pkey = 'pr_repo_id'

Expand Down
2 changes: 1 addition & 1 deletion workers/worker_git_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,7 @@ def enrich_cntrb_id(
self.logger.info("login: {}".format(data[f'{prefix}login']))

try:
url = ("https://api.github.com/users/" + data[f'{prefix}login'])
url = ("https://api.github.com/users/" + str(data[f'{prefix}login']))
except Exception as e:
self.logger.info(f"Error when creating url: {e}. Data: {data}")
continue
Expand Down