In [1]:
import os
from dotenv import load_dotenv
from mongoengine import connect, disconnect
import pandas as pd
from pycoshark.mongomodels import Issue, Project, Commit, FileAction, VCSSystem, CodeReview
import re
from datetime import datetime, timedelta
import tqdm
import random

In [None]:
# Load environment variables from .env file
load_dotenv('../.env', override=True)

# Retrieve the password from the environment variable
db_host = os.getenv('DB_HOST')
# db_host = '132.231.141.24'
db_pw = os.getenv('DB_PW')
db_name = os.getenv('DB_NAME')
# db_name = 'smartshark'

print(f'Connecting to {db_host} with {db_name}; {db_pw}...')

disconnect()

connect(host=f'mongodb://root:{db_pw}@{db_host}:27017/{db_name}?authSource=admin')

Connecting to 132.231.141.24 with smartshark; upinndonau...


MongoClient(host=['132.231.141.24:27017'], document_class=dict, tz_aware=False, connect=True, authsource='admin', read_preference=Primary())

In [4]:
all_information = pd.read_csv('../input/all_information.csv', sep='\t')

all_information.head()

Unnamed: 0,Bug_ID,Project,Duplicate_Bug_IDs,BIC,BFC,BIC_CodeReview,BIC_IntroducingIssue,BIC_Files,BIC_Files_ManualMatch,BIC_BUG_ML,BIC_II_ML,BIC_OtherIssues,CGC,CG_CodeReview,CG_IntroducingIssue,CG_OtherIssues,CG_II_ML
0,Elastic Search 1134,elasticsearch,[],61ad8b614a94dabf8a263cf1edd35faa50ede36d,cbb1c35f94a36e8871301fce435d516db3cd4256,[],[1104],[ modules/elasticsearch/src/main/java/org/elas...,{},[],[],[],10660d390d760cde78fb115107c04e1cf104a4da,[],[],[],[]
1,Elastic Search 1154,elasticsearch,[],b3337c312765e51cec7bde5883bbc0a08f56fb65,31ea01bbc68f64cd7787b97479cb5deba1b529b9,[],[],[ modules/elasticsearch/src/main/java/org/elas...,"{ ""modules/elasticsearch/src/main/java/org/ela...",[],[],[],7a38e384c9ff783f17e9db92a730bdf654dbda6b,[],[],[],[]
2,Elastic Search 1162,elasticsearch,[],d4547c629f53ad76ea463dc0acb1f26f0a2b784b,b70694ce631d7b55be6edd7b9049237456a6e4b4,[],[],[ modules/elasticsearch/src/main/java/org/elas...,{},[],[],[],e5b041c8efd408fdc71fd2f2c84439e5a4985244,[],[],[],[]
3,Elastic Search 12193,elasticsearch,[],15a62448343fd24f8e63f43b1e4b16f50005e4a5,2ea45fd753b89c12431dab08c4827835c616cc1b,[],[],[ core/pom.xml ],{},[],[],[],e88535a67e9594f3135465b5021ba9b502fef950,[],[],[],[]
4,Elastic Search 1380,elasticsearch,[],adc3dc0e994ababa7917c81a61cc93f4690060d4,bd87f8de3ac84eb408d5ada0976664545c9228a0,[],[873],[ modules/elasticsearch/src/main/java/org/elas...,{},[],[],[],16a046f686c26309ee0041df8987a4d2ffedd956,[],[],[],[]


In [5]:
def flatten(xss):
    return [x for xs in xss for x in xs]

def str_to_list(s: str) -> list:
    l = s.replace('\n', '').strip('[]').split(',')
    l = [x.strip(' ') for x in l]
    return [x for x in l if x != '']

In [12]:
# find bugs for our inducing issues

results = []

for _, row in tqdm.tqdm(all_information.iterrows()):
    id_project = row['Project']
    bug_id = row['Bug_ID']
    bfc = row['BFC']
    bic = row['BIC']
    introducing_issues = str_to_list(row['BIC_IntroducingIssue'])

    result = {
        'id_project': id_project,
        'bug_id': bug_id,
        'bfc': bfc,
        'bic': bic,
        'introducing_issues': introducing_issues
    }

    # check if introducing issue really introduces the correct bug
    for i, introducing_issue_id in enumerate(introducing_issues):
        cnt = i + 1
        if introducing_issue_id is not None:
            introducing_issue = Issue.objects(external_id=re.compile(rf'^{introducing_issue_id}')).first()

            if introducing_issue is None:
                result[f'bugs_found_{cnt}'] = None
                continue

            introducing_commits = Commit.objects(linked_issue_ids=introducing_issue.id)
            introducing_file_actions = FileAction.objects(commit_id__in=[commit.id for commit in introducing_commits])
            bug_file_actions = FileAction.objects(id__in=[induces['change_file_action_id'] for induces in flatten([file_action.induces for file_action in introducing_file_actions])])
            bug_commits = Commit.objects(id__in=[file_action.commit_id for file_action in bug_file_actions])
            bug_issues = Issue.objects(id__in=flatten([commit.linked_issue_ids for commit in bug_commits]))

            result[f'bugs_found_{cnt}'] = [bug.external_id for bug in bug_issues]

    results.append(result)

pd.DataFrame(results).to_csv('introducing_issues_check.csv', index=False)


72it [00:23,  3.11it/s]


In [19]:
# find inducing issues for our bugs

results = []

for _, row in tqdm.tqdm(all_information.iterrows()):
    id_project = row['Project']
    bug_id = row['Bug_ID']
    bfc = row['BFC']
    bic = row['BIC']
    introducing_issues = str_to_list(row['BIC_IntroducingIssue'])

    bug_issue = Issue.objects(external_id=str(bug_id.split(' ')[-1])).first()
    bug_commits = Commit.objects(linked_issue_ids=bug_issue.id)
    bug_file_actions = FileAction.objects(commit_id__in=[commit.id for commit in bug_commits])
    introducing_file_actions = FileAction.objects(induces__change_file_action_id__in=[file_action.id for file_action in bug_file_actions])
    introducing_commits = Commit.objects(id__in=[file_action.commit_id for file_action in introducing_file_actions])
    inducing_issues = Issue.objects(id__in=flatten([commit.linked_issue_ids for commit in introducing_commits]))

    

    result = {
        'id_project': id_project,
        'bug_id': bug_id,
        'bfc': bfc,
        'bic': bic,
        'introducing_issues': introducing_issues,
        'inducing_issues__found': [issue.external_id for issue in inducing_issues]
    }
    results.append(result)

pd.DataFrame(results).to_csv('introducing_issues_check_reverse.csv', index=False)


72it [05:03,  4.22s/it]


In [6]:
blueprints = Issue.objects(issue_type="blueprint")
print(len(blueprints))
blueprint_commits = Commit.objects(linked_issue_ids__in=[blueprint.id for blueprint in blueprints])

for commit in tqdm.tqdm(blueprint_commits):
    inducing_file_actions = FileAction.objects(commit_id=commit.id, induces__ne=[])

    if len(inducing_file_actions) > 0:
        print(f'{commit.revision_hash} has inducing file actions')

3062


 23%|██▎       | 188/834 [00:00<00:00, 928.88it/s]

2e1e12cd626f657d31b34fb21759a47213750646 has inducing file actions
0807b7ae9a0e5ef02dba4e50cf773032c10ace44 has inducing file actions
ddf96bcd31bad89ffa391251179ba13bb789991d has inducing file actions
57ab45323cf5617ebd2decd757e708673d949a8f has inducing file actions
a755e5d9f25c7bb06533a3799d9c39b74f334873 has inducing file actions
5505b6f438853a6c738f7192479edd8a0318284f has inducing file actions
140b3b81f924c98299c361a76042873b77745fbe has inducing file actions
d984a6d88670b60e12b8dc374607ec59f169c5f1 has inducing file actions
19b7cf21706c7975088dd52e02178e7c5f85666b has inducing file actions
7ecdfb61a92301fef8c036572367ee6d1ffc3c0d has inducing file actions
1f71696ecc2cd1abfc30f2f03f3c4857e51b9fbf has inducing file actions
ab49f97b2c08294234c7bfd3dedb75780ca519e6 has inducing file actions
191bdf2069086493be2a2e0351afa7f3efad7099 has inducing file actions
a543cb23aa30194dfb5d1fbfad4906257deeaf59 has inducing file actions
e906a8c0ec87b870b0ae75c20cf1d2da36433636 has inducing file act

 45%|████▌     | 378/834 [00:00<00:00, 942.70it/s]

9a673a8faafe8f7bcbca881964e0d3630c87c682 has inducing file actions
52b505c9cb6d40f488cc1b974ad277007731fd12 has inducing file actions
5cd1acc3bfbd70e18258d832d0910b250b1adb3e has inducing file actions
e7d704899f4e2e32a290b4fc66b22656a2a158b7 has inducing file actions
9fff6893ce2ee38d2404a96ecfd240e256fb12c3 has inducing file actions
8cddd243bfaac4454b088d49ba65d4fb3c42211a has inducing file actions
a1673d31180f3f7c90af1447573f362424c95e48 has inducing file actions
c4b28a5496e9e679c72cd6fa403fd6b82b0eceb1 has inducing file actions
7fbd787b1b5106f3c4143b9de51980ede0167d3e has inducing file actions
383e2a8bdcc9210cbe9719d3470fe15b787d46b0 has inducing file actions
583e85622bf9d7e2fdf2a8cf5157bbd1ec5f8b2b has inducing file actions
1361ea5ad128e7048430612e01d97281fd094f05 has inducing file actions
6f1f71620ad7abb3593ea484c60eee2effbc63f5 has inducing file actions
64b4271279035289c5baa94b50d37c0a7bfd3984 has inducing file actions
1356ef5b571f80d9fc9a0284034e853cb9c97233 has inducing file act

 68%|██████▊   | 571/834 [00:00<00:00, 940.05it/s]

5b5cbc64f9b4b024ec6686ed05e8b9b887d64ca1 has inducing file actions
58784943f73b00b2c7f32dfe1f7206c526ab94f5 has inducing file actions
4bd5af66b55b1ac5e5fa654eb31bf90616d62256 has inducing file actions
733d4133df8d0e13c48f45416658ec71ffff5f04 has inducing file actions
89dbd08976da6faabb5c8d0e7a29f4b7f9e8b812 has inducing file actions
c433b1df4261edf8646bece371ab15bfc8b4f234 has inducing file actions
cc630b4eb62c4a45ff08a1862a8339a0f129e0a3 has inducing file actions
94c7e7ad4333286ac7730048cacf2a6837d1c6ec has inducing file actions
741c0f60bde282c5cb7b94f4574c3f6be697a207 has inducing file actions
c6365f1b55cdbc33452fe3b40d3a7039f53f631c has inducing file actions


 92%|█████████▏| 766/834 [00:00<00:00, 956.39it/s]

6ebee92445d799a2e610116cf72b4bf3d3d6a2f3 has inducing file actions
f6f4657e9c979217969544848cf5ee3f9acb125b has inducing file actions
750aef54b198290e517bf726e137134db361ba58 has inducing file actions
11b7bcd947ba90021da5c297b023d71e55321383 has inducing file actions
8c7224172641c6194582ca4cf7ce11e907df50aa has inducing file actions
3f9411071d4c1a04ab0b68fd635597bf6959c0ca has inducing file actions
0d653f35c4c7c130065fd35bd7602207fff23b1e has inducing file actions
6f5358ac1992b17b7f3f99d9a32290e0d4740dae has inducing file actions
c741258f8fc069f223d233f46598f790834eb37b has inducing file actions
a2e53cacd3f6df268d861e2db69bfb0fe508419c has inducing file actions
7ecd502f6df01efe8ed829cd0513df1d7bf9f9da has inducing file actions
242557333a4a657ba0bc8f5aa0c934820ef26342 has inducing file actions
d02c326f23452b721ce8f516e57c1963b264ad0f has inducing file actions
8da70ef95ee5b52c1cd77137b4cf5381dd380425 has inducing file actions
c39ad2383c1921bda486d58c8670846518caed72 has inducing file act

100%|██████████| 834/834 [00:00<00:00, 943.47it/s]

1c5144330412aa57d39df60ab62ba14ac636c416 has inducing file actions
b7ad974723d2d4587e0fa90604c87f74087f0f45 has inducing file actions
e750c8e86289561483cecac93ef082658e8ae745 has inducing file actions
35a591d33d8b1a6c30bf40ddc48a07715fd87339 has inducing file actions
e37b937e063a3318b9242b41de485e0e86b96434 has inducing file actions
5e749150649bd5eb9916f43c9121f9a1904f36e7 has inducing file actions





In [6]:
blueprint_commits = Commit.objects(revision_hash="73515209cf09821a2fc922e1ae2c97650802fab4")

for commit in tqdm.tqdm(blueprint_commits):
    r = re.compile(r"(?:bp|blueprint)(?::? |/)(?P<ID>(?:\w+-?)+)", re.I | re.M)

    for iterator in [r.finditer(commit.message)]:
        for m in iterator:
            try:
                Issue.objects.get(external_id=m.group("ID").lower())
                print(f'Found blueprint {m.group("ID")} in commit {commit.revision_hash}')

            except:
                continue

  0%|          | 0/1 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00, 349.64it/s]

Found blueprint v3-api-policy in commit 73515209cf09821a2fc922e1ae2c97650802fab4





In [24]:
for (bug_id, bic) in all_information[['Bug_ID', 'BIC']].itertuples(index=False):
    print(bug_id, bic)
    bug = Issue.objects(external_id=bug_id).first()

    commit = Commit.objects(revision_hash=re.compile(rf'^{bic}')).first()

    if commit is None:
        continue

    inducing_file_actions = FileAction.objects(commit_id=commit.id, induces__ne=[])

    if len(inducing_file_actions) > 0:
        print(f'{bug_id} has inducing file actions')

Elastic Search 1134 61ad8b614a94dabf8a263cf1edd35faa50ede36d
Elastic Search 1134 has inducing file actions
Elastic Search 1154 b3337c312765e51cec7bde5883bbc0a08f56fb65
Elastic Search 1154 has inducing file actions
Elastic Search 1162 d4547c629f53ad76ea463dc0acb1f26f0a2b784b
Elastic Search 1162 has inducing file actions
Elastic Search 12193 15a62448343fd24f8e63f43b1e4b16f50005e4a5
Elastic Search 12193 has inducing file actions
Elastic Search 1380 adc3dc0e994ababa7917c81a61cc93f4690060d4
Elastic Search 1380 has inducing file actions
Elastic Search 14782 904cbf53409ae62bb67cd07c31ef88121cff9361
Elastic Search 14782 has inducing file actions
Elastic Search 15858 0cde90fcb10c93c56d5f7d6c9228002ffdff1fec
Elastic Search 16246 57d697125225d694376287eda7e644108ccbebda
Elastic Search 16246 has inducing file actions
Elastic Search 1626 4180a7f73ab4d32f001a1d0bdc17aebd3073be76
Elastic Search 16790 1e209e3802a693414a18bfddbb917044c50e5a6a
Elastic Search 1725 bd6b89f7cab39acf5cd2b3d5b33adbcddf69c0d1

In [25]:
for (bug_id, cgc) in tqdm.tqdm(all_information[['Bug_ID', 'CGC']].itertuples(index=False)):
    bug = Issue.objects(external_id=bug_id).first()

    commit = Commit.objects(revision_hash=re.compile(rf'^{cgc}')).first()

    if commit is None:
        continue

    inducing_file_actions = FileAction.objects(commit_id=commit.id, induces__ne=[])

    if len(inducing_file_actions) > 0:
        print(f'{bug_id} has inducing file actions')

71it [00:03, 21.36it/s]
