In [1]:
import json
import numpy as np

In [2]:
metrics_to_analyze = ['number_of_merged_prs', 'experience_in_days', 'avg_size_of_commits', 'mean_time_between_merged_prs', 
           'number_of_pull_requests_opened', 'pulls_opened', 'number_of_commits', 'pulls_closed', 'mean_discussion_duration',
          'mean_time_between_comments', 'mean_words', 'total_words', 'number_of_comments', 'lines_revised', 'number_files_revised',
          'number_modules_revised', 'number_reviews']

In [3]:
def load_files(system):
    pull_file_path = f'data/metrics/{system}_info_devs.json'
    refactorings_file_path = f'data/metrics/refactorings_{system}.json'
    metrics = f'data/metrics/metrics_output.json'

    with open(pull_file_path) as json_input:
        dev_info = json.load(json_input)

    with open(refactorings_file_path) as json_input:
        refactorings_info = json.load(json_input)

    with open(metrics) as json_input:
        metrics = json.load(json_input)
        
    return dev_info, refactorings_info, metrics

In [4]:
def define_dict_quartiles(system, refactorings_info, users_metrics):
    dict_metrics = {}

    for user in users_metrics:
        for metric in metrics_to_analyze:
            if metric in user:
                if metric in dict_metrics:
                    dict_metrics[metric].append(user[metric])
                else:
                    dict_metrics[metric] = []
                    dict_metrics[metric].append(user[metric])
    
    quartiles_metrics = {}
    for metric in dict_metrics:
        quartiles_metrics[metric] = {}

        quartiles_metrics[metric]["1st"] = np.percentile(list(set(dict_metrics[metric])), 25)
        quartiles_metrics[metric]["median"] = np.percentile(list(set(dict_metrics[metric])), 50)
        quartiles_metrics[metric]["3rd"] = np.percentile(list(set(dict_metrics[metric])), 75)
        
    all_refs = []
    for dev_name in refactorings_info:
        dev = refactorings_info[dev_name]
        all_refs.append(dev['num_refactorings'])

    quartiles_metrics['refactorings'] = {}
    quartiles_metrics['refactorings']["1st"] = np.percentile(list(set(all_refs)), 25)
    quartiles_metrics['refactorings']["median"] = np.percentile(list(set(all_refs)), 50)
    quartiles_metrics['refactorings']["3rd"] = np.percentile(list(set(all_refs)), 75)
    
    return quartiles_metrics

In [5]:
def get_group_quartile(dict_quartiles, metric_name, value_metric):
    if value_metric >= quartiles_metrics[metric_name]['3rd']:
        return "HIGH"
    elif value_metric <= quartiles_metrics[metric_name]['1st']:
        return "LOW"
    else:
        return "MEDIUM"

In [6]:
def get_user_quartiles_metrics(user_metrics, metrics):
    user_quartiles_metrics = {}
    for user in users_metrics:
        username = user['username']
        user_quartiles_metrics[username] = {}
        for metric in metrics:
            if metric in user:
                user_quartiles_metrics[username][metric] = get_group_quartile(dict_quartiles=quartiles_metrics, 
                                                                          metric_name=metric, 
                                                                          value_metric=user[metric])
            else:
                user_quartiles_metrics[username][metric] = "NONE"

    for user in refactorings_info:
        if user in user_quartiles_metrics:
            user_quartiles_metrics[user]['refactorings'] = get_group_quartile(dict_quartiles=quartiles_metrics, 
                                                                              metric_name='refactorings', 
                                                                              value_metric=refactorings_info[user]['num_refactorings'])
        else:
            user_quartiles_metrics[user] = {}
            user_quartiles_metrics[user]['refactorings'] = get_group_quartile(dict_quartiles=quartiles_metrics, 
                                                                              metric_name='refactorings', 
                                                                              value_metric=refactorings_info[user]['num_refactorings'])
    
    return user_quartiles_metrics

In [7]:
def get_groups_by_nfr(nfr):
    list_participates = []
    list_commited = []
    list_opened = []
    list_reviewed = []
    list_commented = []

    for current_dev in dev_info:
        dev = dev_info[current_dev]
        
        if current_dev:
            if dev[f'participates_{nfr}_high']:
                list_participates.append(current_dev)
            if dev[f'commited_{nfr}_high']:
                list_commited.append(current_dev)
            if dev[f'opened_discussion_{nfr}_high']:
                list_opened.append(current_dev)
            if dev[f'commented_{nfr}_high']:
                list_commented.append(current_dev)
            if dev[f'reviewed_{nfr}_high']:
               list_reviewed.append(current_dev)

    #print ("Participates:", list_participates)
    print ("Commits:", list_commited)
    print ("Opens:", list_opened)
    print ("Reviews:", list_reviewed)
    print ("Comments:", list_commented)

In [8]:
def analyze_whole_system(system):
    print (f"Analyzing {system}")
    dev_info, refactorings_info, metrics = load_files(system)
    print (f"Dev Info: {list(dev_info.keys())[0]}")
    print (f"Ref Info: {list(refactorings_info.keys())[0]}")

    metrics_current_system = metrics[system]
    users_metrics = metrics_current_system['user_metrics']
    print (f"User Info: {list(users_metrics[0]['username'])}")
    
    quartiles_metrics = define_dict_quartiles(system, refactorings_info, users_metrics)
    user_quartiles_metrics = get_user_quartiles_metrics(users_metrics, metrics_to_analyze)
    
    get_groups_by_nfr("security")

    

In [9]:
dev_info, refactorings_info, metrics = load_files(system)
metrics_current_system = metrics[system]
users_metrics = metrics_current_system['user_metrics']
quartiles_metrics = define_dict_quartiles(system, refactorings_info, users_metrics)

json_formatted_str = json.dumps(quartiles_metrics, indent=2)
print(json_formatted_str)

NameError: name 'system' is not defined

In [None]:
user_quartiles_metrics = get_user_quartiles_metrics(users_metrics, metrics_to_analyze)
json_formatted_str = json.dumps(user_quartiles_metrics, indent=2)
print (system)
print(json_formatted_str)

with open(f"data/metrics/groups_users_metrics_{system}.json", "w") as write_file:
    json.dump(user_quartiles_metrics, write_file, indent=4)



In [None]:
get_groups_by_nfr("security")

In [None]:
get_groups_by_nfr("robustness")

In [None]:
get_groups_by_nfr("robustness")

In [None]:
get_groups_by_nfr("maintainability")

In [None]:
get_groups_by_nfr("performance")

In [None]:
get_groups_by_nfr("performance")

<h3> All NFRs </h3>

- jgrandja: Commits, Opens, Reviews, Comments

- jzheaux: Commits, Opens, Reviews, Comments

- rwinch: Commits, Reviews, Comments

- eleftherias: Commits, Reviews, eleftherias

- marcusdacoregio: Opens

- evgeniycheban: Reviews

- ch4mpy: Reviews

- Buzzardo: Reviews

<h3> Security </h3>

- jgrandja: Commits, Opens, Reviews, Comments

- jzheaux: Commits, Opens, Reviews, Comments

- rwinch: Commits, Reviews, Comments

- evgeniycheban: Reviews

- rh-id: Reviews [Não aparecia no geral]


<h3> Robustness </h3>

- jgrandja: Commits, Opens, Reviews, Comments

- jzheaux: Reviews, Comments

- rwinch: Reviews, Comments

- DevDengChao: Commits

- fhanik: fhanik

- kostya05983: Reviews

<h3> Maintainability </h3>

- jgrandja: Opens, Comments

- jzheaux: Commits, Reviews, Comments

- rwinch: Commits, Reviews, Comments
          
- eleftherias: Commits, Reviews     

- marcusdacoregio: Commits

- evgeniycheban: Reviews

- ch4mpy: Reviews


In [14]:
<h3> Performance </h3>
Opens: ['ThomasVitale', 'larsgrefer', 'bagyoni', 'aj-jaswanth', 'bstuder', 'robotmrv', '20fps', 'stsypanov', 'avpoloz', 'miremond', 'william-tran', 'dratler', 'sedran']
Reviews: ['rwinch', 'eleftherias', 'dratler']
Comments: ['rwinch', 'jzheaux', 'miremond']

- jgrandja: Commits, Opens, Reviews, Comments

- jzheaux: Commits, Opens, Reviews, Comments

- rwinch: Commits, Reviews, Comments

- evgeniycheban: Reviews

- rh-id: Reviews [Não aparecia no geral]

SyntaxError: invalid syntax (4147502435.py, line 1)

In [None]:
#jgrandja, jzheaux, rwinch
data_jgrandja = json.dumps(user_quartiles_metrics['jgrandja'], indent=2)
print (data_jgrandja)

In [None]:
data_jzheaux = json.dumps(user_quartiles_metrics['jzheaux'], indent=2)
print (data_jzheaux)

In [None]:
data_rwinch = json.dumps(user_quartiles_metrics['rwinch'], indent=2)
print (data_rwinch)

<h1> Análise Spring Framework </h1>

<h3> All NFRs </h3>

- philwebb: Commits

- diguage: Commits

- chenqimiao: Commits, Opens

- bclozel: Opens, Reviews

- dreis2211: Opens

- diguage: Opens

- sbrannen: Reviews

- poutsma: Reviews

- rstoyanchev: Comments

- sbrannen: Comments

- jhoeller: Comments

<h3> Security </h3>

- rstoyanchev: Comments


<h3> Maintainability </h3>

- diguage: Opens, Commits

- chenqimiao: Opens, Commits

- sbrannen: Reviews, Comments

- poutsma: Reviews

- rstoyanchev: Comments

<h3> Robustness </h3>

- loiclefevre: Commits

- ShaoqiangLu: Commits

- xCubeSource: Commits

- bclozel: Opens

- sbrannen: Reviews, Comments

- rstoyanchev: Comments

<h3> Performance </h3>

- philwebb: Commits

- dreis2211: Opens

- stsypanov: Opens

- bclozel: Reviews

- poutsma: Reviews

- mentallurg: Reviews

- rstoyanchev: Comments

- sbrannen: Comments

- jhoeller: Comments




- rstoyanchev: 
    - Comments (All, Security, Rob, Per)

- sbrannen: 
    - Comments (All, Maint, Rob, Perf)
    - Reviews (All, Maint, Rob)


- philwebb: Commits (All, Per)

- diguage: 
    Commits (All, Maint)
    Opens (Maint)

- chenqimiao: Commits, Opens (All, Maint)

- bclozel: 
    Opens (All, Rob)
    Reviews (All, Per)

- dreis2211: Opens (All)

- diguage: Opens (All)

- sbrannen: Reviews (All)

- poutsma: Reviews (All)

- jhoeller: Comments (All, Per)
        
- poutsma: Reviews (Maint, Per)
    
- loiclefevre: Commits (Rob)

- ShaoqiangLu: Commits (Rob)

- xCubeSource: Commits (Rob)

- dreis2211: Opens (Per)

- stsypanov: Opens (Per)

- mentallurg: Reviews (Per)