# Analytics

#### Date: 2021/02


#### SUMMARY:

- This notebook represents the project quality analysis of the date exposed right above. 

### TEAM:

##### Semester: 2021/02

##### Professor: Hilmer Neri

##### Members:

- Lucas Lopes Xavier
- João Victor Lustosa Braz
- Caio César de Almeida Beleza
- Arthur José Nascimento de Lima
- Cícero Barrozo Fernandes Filho
- Gabriel de Souza Fonseca Ribeiro
- André Macedo Rodrigues Alves
- Pedro Victor Lima Torreão
- Daniel Rocha Oliveira
- Matheus Raphael Soares de Oliveira

### LIBRARIES

In [107]:
# Deal with data
import pandas as pd
import numpy as np
import json

# Deal with API request
import urllib3
from urllib3 import request

# Deal with visualization
import seaborn as sns
import matplotlib.pyplot as plt

### GRAPH SETTINGS

In [108]:
%config InlineBackend.figure_format ='retina'
sns.set(font_scale=1.5)
sns.set_style('darkgrid',
              {'xtick.bottom' : True,
               'ytick.left': True,
               'grid.linestyle':'--',
               'font.monospace': ['Computer Modern Typewriter'],
               'axes.edgecolor' : 'white'})

### DATAFRAME SETTINGS

In [109]:
pd.set_option("display.max_rows", None, "display.max_columns", None)

### SonarCloud

#### KEYS

In [110]:
front_key = 'fga-eps-mds_2021.2-Oraculo-FrontEnd'

In [111]:
profile_key = 'fga-eps-mds_2021.2-Oraculo-Profile'

In [112]:
registros_key = 'fga-eps-mds_2021.2-Oraculo-Registros'

#### METRICS

In [113]:
metric_list = ['files',
               'functions',
               'complexity',
               'comment_lines_density',
               'duplicated_lines_density',
               'security_rating',
               'tests',
               'test_success_density',
               'test_execution_time',
               'reliability_rating']

len(metric_list)

10

In [114]:
def generate_metric_string(metric_list):
    metric_str = ''
    
    for metric in metric_list:
        metric_str += metric + ','
        
    return metric_str

In [115]:
metric_str = generate_metric_string(metric_list)

#### URLS

In [116]:
front_url = f"https://sonarcloud.io/api/measures/component_tree?component={front_key}&metricKeys={metric_str}&ps=500"
profile_url = f"https://sonarcloud.io/api/measures/component_tree?component={profile_key}&metricKeys={metric_str}&ps=500"
registros_url = f"https://sonarcloud.io/api/measures/component_tree?component={registros_key}&metricKeys={metric_str}&ps=500"

#### API REQUEST

In [117]:
http = urllib3.PoolManager()

In [118]:
front_request = http.request('GET', front_url)
front_request.status

200

In [119]:
profile_request = http.request('GET', profile_url)
profile_request.status

200

In [120]:
registros_request = http.request('GET', registros_url)
registros_request.status

200

In [121]:
tags_request = http.request('GET', tags_url)
tags_request.status

200

#### JSON DECODING

In [122]:
front_json = json.loads(front_request.data.decode('utf-8'))
profile_json = json.loads(profile_request.data.decode('utf-8'))
registros_json = json.loads(registros_request.data.decode('utf-8'))

## DATA

### PROJECT

In [123]:
project_front_json = front_json['baseComponent']['measures']
project_profile_json = profile_json['baseComponent']['measures']
project_registros_json = registros_json['baseComponent']['measures']

In [124]:
project_front_data = pd.DataFrame(project_front_json)
project_profile_data = pd.DataFrame(project_profile_json)
project_registros_data = pd.DataFrame(project_registros_json)

##### FRONT

In [125]:
project_front_data

Unnamed: 0,metric,value,bestValue
0,duplicated_lines_density,2.9,False
1,functions,384.0,
2,security_rating,1.0,True
3,files,111.0,
4,complexity,538.0,
5,reliability_rating,1.0,True
6,comment_lines_density,1.6,False


##### BACK

In [126]:
project_profile_data

Unnamed: 0,metric,value,bestValue
0,duplicated_lines_density,0.0,True
1,functions,30.0,
2,security_rating,1.0,True
3,files,11.0,
4,complexity,61.0,
5,reliability_rating,1.0,True
6,comment_lines_density,0.6,False


In [127]:
project_registros_data

Unnamed: 0,metric,value,bestValue
0,duplicated_lines_density,0.0,True
1,functions,54.0,
2,security_rating,1.0,True
3,files,18.0,
4,complexity,122.0,
5,reliability_rating,1.0,True
6,comment_lines_density,2.7,False


### FILES

In [128]:
def metric_per_file(json):
    file_json = []
    
    for component in json['components']:
        if component['qualifier'] == 'FIL':
            file_json.append(component)
            
    return file_json

In [129]:
front_file_json = metric_per_file(front_json)
profile_file_json = metric_per_file(profile_json)
registros_file_json = metric_per_file(registros_json)

In [130]:
def generate_file_dataframe(metric_list, json, language_extension):
    df_columns = metric_list
    df = pd.DataFrame(columns = df_columns)
    
    for file in json:
        try:
            if file['language'] == language_extension:
                for measure in file['measures']:
                    df.at[file['path'], measure['metric']] = measure['value']
        except:
            pass
        
    df.reset_index(inplace = True)
    df = df.rename({'index': 'path'}, axis=1).drop(['files'], axis=1)

    return df

In [131]:
front_files_data = generate_file_dataframe(metric_list, front_file_json, language_extension = 'js')
profile_files_data = generate_file_dataframe(metric_list, profile_file_json, language_extension = 'js')
registros_files_data = generate_file_dataframe(metric_list, registros_file_json, language_extension = 'js')

##### FRONT

In [132]:
front_files_data

Unnamed: 0,path,functions,complexity,comment_lines_density,duplicated_lines_density,security_rating,tests,test_success_density,test_execution_time,reliability_rating
0,src/App.js,1,1,0.0,0.0,1.0,,100.0,,1.0
1,src/App.test.js,3,3,0.0,0.0,1.0,,100.0,,1.0
2,src/Auth/Auth.js,5,7,19.2,0.0,1.0,,100.0,,1.0
3,src/Services/Axios/BaseService/baseService.js,6,7,0.0,0.0,1.0,,100.0,,1.0
4,src/Constants/baseUrls.js,0,1,28.6,0.0,1.0,,100.0,,1.0
5,src/Components/DropDownButton/DivSelectSetor.js,0,0,0.0,0.0,1.0,,100.0,,1.0
6,src/Constants/federativeUnits.js,0,0,0.0,0.0,1.0,,100.0,,1.0
7,src/history.js,0,0,0.0,0.0,1.0,,100.0,,1.0
8,src/index.js,0,0,20.0,0.0,1.0,,100.0,,1.0
9,src/Pages/EditRecord/index.js,27,43,0.9,5.8,1.0,,100.0,,1.0


##### BACK

In [133]:
profile_files_data

Unnamed: 0,path,functions,complexity,comment_lines_density,duplicated_lines_density,security_rating,tests,test_success_density,test_execution_time,reliability_rating
0,src/Model/Department.js,3,3,0.0,0.0,1.0,,100.0,,1.0
1,src/Controller/DepartmentController.js,3,6,0.0,0.0,1.0,,100.0,,1.0
2,src/Utils/hash.js,1,1,0.0,0.0,1.0,,100.0,,1.0
3,src/index.js,0,1,0.0,0.0,1.0,,100.0,,1.0
4,src/Database/index.js,7,12,1.4,0.0,1.0,,100.0,,1.0
5,src/Utils/JWT.js,2,4,0.0,0.0,1.0,,100.0,,1.0
6,src/Model/Level.js,2,2,0.0,0.0,1.0,,100.0,,1.0
7,src/routes.js,0,0,0.0,0.0,1.0,,100.0,,1.0
8,src/Model/User.js,2,2,0.0,0.0,1.0,,100.0,,1.0
9,src/Controller/UserController.js,10,30,0.9,0.0,1.0,,100.0,,1.0


In [134]:
registros_files_data

Unnamed: 0,path,functions,complexity,comment_lines_density,duplicated_lines_density,security_rating,tests,test_success_density,test_execution_time,reliability_rating
0,src/Model/Department.js,2,2,0.0,0.0,1.0,,100.0,,1.0
1,src/Controller/DepartmentController.js,2,3,0.0,0.0,1.0,,100.0,,1.0
2,src/Constants/errors.js,0,0,0.0,0.0,1.0,,100.0,,1.0
3,src/Model/Field.js,1,1,40.0,0.0,1.0,,100.0,,1.0
4,src/Model/History.js,2,2,9.4,0.0,1.0,,100.0,,1.0
5,src/index.js,1,3,0.0,0.0,1.0,,100.0,,1.0
6,src/Database/index.js,7,13,6.8,0.0,1.0,,100.0,,1.0
7,src/Model/Record.js,2,2,0.0,0.0,1.0,,100.0,,1.0
8,src/Controller/RecordController.js,24,74,1.1,0.0,1.0,,100.0,,1.0
9,src/Model/RecordNumber.js,1,1,0.0,0.0,1.0,,100.0,,1.0


# ANALYSIS

## MAINTAINABILITY

### CODE QUALITY

##### COMPLEXITY

In [135]:
def m1(df):
    
    density_non_complex_files = round((len(df[(df['complexity'].astype(float)/df['functions'].astype(float)) < 10])/len(df))*100, 2)
    
    return density_non_complex_files

##### COMMENTS

In [136]:
def m2(df):
    
    density_comment_files = round((len(df[(df['comment_lines_density'].astype(float) > 10) & (df['comment_lines_density'].astype(float) < 30)])/len(df))*100, 2)
    
    return density_comment_files

##### DUPLICATIONS

In [137]:
def m3(df):
    
    duplication = round((len(df[(df['duplicated_lines_density'].astype(float) < 5)])/len(df))*100, 2)
    
    return duplication

### BLOCKING CODE

#### NON-BLOCKING FILES

In [138]:
def m4(df):
    
    non_blocking_files = round((len(df[(df['security_rating'].astype(float) >= 4)])/len(df))*100,2)
    
    return non_blocking_files

## RELIABILITY

#### TEST SUCCESS

In [139]:
def m5(df):
    
    test_success_file = df[['path', 'test_success_density']]
    test_success_repository = df['test_success_density'].astype(float).mean()
    
    print("Project test unit density: ", test_success_repository)
    
    return test_success_file

#### FAST TESTS

In [140]:
def m6(df):
    
    fast_test_df = df[(df['test_execution_time'].astype(float) < 300)]
    fast_test_df['fast_test'] = fast_test_df['test_execution_time']/fast_test_df['tests']
    
    fast_test_file = fast_test_df[['path', 'fast_test']]
    
    fast_test_repository = fast_test_df['fast_test'].astype(float).mean()
    
    print("Project test unit density: ", fast_test_repository)
    
    return fast_test_file

## PRODUCTIVITY

#### RESOLVED ISSUES' THROUGHPUT

In [141]:
def m7(number_of_issues_resolved, number_of_issues):
    
    resolved_issues_throughput = round((number_of_issues_resolved / number_of_issues) * 100, 2)
    
    return resolved_issues_throughput

#### ISSUE TYPE IN A TIMEFRAME

In [142]:
def density(issue, number_of_issues):
    issue_density = round((issue / number_of_issues) * 100, 2)
    return issue_density

In [143]:
def m8(tag_dict, number_of_issues):
    
    issue_densities = {
        "hotfix": [density(tag_dict["HOTFIX"], number_of_issues)],
        "docs": [density(tag_dict["DOCS"], number_of_issues)],
        "feature": [density(tag_dict["FEATURE"], number_of_issues)],
        "arq": [density(tag_dict["ARQ"], number_of_issues)],
        "devops": [density(tag_dict["DEVOPS"], number_of_issues)],
        "analytics": [density(tag_dict["ANALYTICS"], number_of_issues)],
        "us": [density(tag_dict["US"], number_of_issues)],
        "easy": [density(tag_dict["EASY"], number_of_issues)],
        "medium": [density(tag_dict["MEDIUM"], number_of_issues)],
        "hard": [density(tag_dict["HARD"], number_of_issues)],
        "eps": [density(tag_dict["EPS"], number_of_issues)],
        "mds": [density(tag_dict["MDS"], number_of_issues)]
    }

    issue_densities = pd.DataFrame.from_dict(issue_densities).T.reset_index()
    issue_densities.columns = ['density' ,'percentage']
    
    return issue_densities

#### BUGS RATIO

In [144]:
def m9(tag_dict, number_of_issues): 
    bugs_ratio = round(((tag_dict["DOCS"] + tag_dict["FEATURE"] + tag_dict["ARQ"] + tag_dict["DEVOPS"] + tag_dict["ANALYTICS"]) / number_of_issues) * 100, 2)

    return bugs_ratio

### TIMEFRAME: SPRINT 1

In [145]:
SPRINT = 1
NUMBER_OF_ISSUES_RESOLVED = 0
NUMBER_OF_ISSUES = 22

TAGS = {
    "HOTFIX": 0,
    "DOCS": 13,
    "FEATURE": 0,
    "ARQ": 0,
    "DEVOPS": 2,
    "ANALYTICS": 1,
    "US":  0,
    "EASY":  0,
    "MEDIUM": 0,
    "HARD": 0,
    "EPS": 18,
    "MDS": 4,

}

In [146]:
sprint1_m7 = m7(NUMBER_OF_ISSUES_RESOLVED, NUMBER_OF_ISSUES)
sprint1_m8 = m8(TAGS, NUMBER_OF_ISSUES)
sprint1_m9 = m9(TAGS, NUMBER_OF_ISSUES)

### TIMEFRAME: SPRINT 2

In [147]:
SPRINT = 2
NUMBER_OF_ISSUES_RESOLVED = 24
NUMBER_OF_ISSUES = 24

TAGS = {
    "HOTFIX": 1,
    "DOCS": 13,
    "FEATURE": 0,
    "ARQ": 0, 
    "DEVOPS": 2, 
    "ANALYTICS": 1, 
    "US": 0, 
    "EASY": 0, 
    "MEDIUM": 0, 
    "HARD": 0, 
    "EPS": 19, 
    "MDS": 4, 

}

In [148]:
sprint2_m7 = m7(NUMBER_OF_ISSUES_RESOLVED, NUMBER_OF_ISSUES)
sprint2_m8 = m8(TAGS, NUMBER_OF_ISSUES)
sprint2_m9 = m9(TAGS, NUMBER_OF_ISSUES)

### TIMEFRAME: SPRINT 3

In [149]:
SPRINT = 3
NUMBER_OF_ISSUES_RESOLVED = 3
NUMBER_OF_ISSUES = 3

TAGS = {
     
    "HOTFIX": 0,
    "DOCS": 0,
    "FEATURE": 0, 
    "ARQ": 0,
    "DEVOPS": 0, 
    "ANALYTICS": 0, 
    "US": 0,
    "EASY": 0, 
    "MEDIUM": 0, 
    "HARD": 0,
    "EPS": 0, 
    "MDS": 3, 
}

In [150]:
sprint3_m7 = m7(NUMBER_OF_ISSUES_RESOLVED, NUMBER_OF_ISSUES)
sprint3_m8 = m8(TAGS, NUMBER_OF_ISSUES)
sprint3_m9 = m9(TAGS, NUMBER_OF_ISSUES)

### TIMEFRAME: SPRINT 4

In [151]:
SPRINT = 4
NUMBER_OF_ISSUES_RESOLVED = 4
NUMBER_OF_ISSUES = 4

TAGS = {
   "HOTFIX": 0,
    "DOCS": 1,
    "FEATURE": 1, 
    "ARQ": 0,
    "DEVOPS": 0, 
    "ANALYTICS": 0, 
    "US": 2,
    "EASY": 0, 
    "MEDIUM": 0,  
    "HARD": 0,
    "EPS": 1, 
    "MDS": 3,
}

In [152]:
sprint4_m7 = m7(NUMBER_OF_ISSUES_RESOLVED, NUMBER_OF_ISSUES)
sprint4_m8 = m8(TAGS, NUMBER_OF_ISSUES)
sprint4_m9 = m9(TAGS, NUMBER_OF_ISSUES)

### TIMEFRAME: SPRINT 5

In [153]:
SPRINT = 5
NUMBER_OF_ISSUES_RESOLVED = 0
NUMBER_OF_ISSUES = 7

TAGS = {
    "HOTFIX": 0,
    "DOCS": 1,
    "FEATURE": 1,  
    "ARQ": 0,
    "DEVOPS": 0,  
    "ANALYTICS": 1,  
    "US": 2,
    "EASY": 0,  
    "MEDIUM": 0,  
    "HARD": 0,
    "EPS": 2, 
    "MDS": 4,
}

In [154]:
sprint5_m7 = m7(NUMBER_OF_ISSUES_RESOLVED, NUMBER_OF_ISSUES)
sprint5_m8 = m8(TAGS, NUMBER_OF_ISSUES)
sprint5_m9 = m9(TAGS, NUMBER_OF_ISSUES)

### METRIC RESULTS

#### M1 Results

In [155]:
front_m1 = m1(front_files_data)
profile_m1 = m1(profile_files_data)
registros_m1 = m1(registros_files_data)

m1_results = {"Repository": ["Front End", "Profile", "Registros"], "Results": [front_m1, profile_m1, registros_m1]}

m1_results = pd.DataFrame(data=m1_results)

m1_results.style.hide_index

m1_results

Unnamed: 0,Repository,Results
0,Front End,53.21
1,Profile,80.0
2,Registros,82.35


#### M2 Results

In [156]:
front_m2 = m2(front_files_data)
profile_m2 = m2(profile_files_data)
registros_m2 = m2(registros_files_data)


m2_results = {"Repository": ["Front End", "Profile", "Registros"], "Results": [front_m2, profile_m2, registros_m2]}

m2_results = pd.DataFrame(data=m2_results)

m2_results.style.hide_index

m2_results

Unnamed: 0,Repository,Results
0,Front End,3.67
1,Profile,0.0
2,Registros,0.0


#### M3 Results

In [157]:
front_m3 = m3(front_files_data)
profile_m3 = m3(profile_files_data)
registros_m3 = m3(registros_files_data)


m3_results = {"Repository": ["Front End", "Profile", "Registros"], "Results": [front_m3, profile_m3, registros_m3]}

m3_results = pd.DataFrame(data=m3_results)

m3_results.style.hide_index

m3_results

Unnamed: 0,Repository,Results
0,Front End,96.33
1,Profile,100.0
2,Registros,100.0


#### M4 Results

In [158]:
front_m4 = m4(front_files_data)
profile_m4 = m4(profile_files_data)
registros_m4 = m4(registros_files_data)


m4_results = {"Repository": ["Front End", "Profile", "Registros"], "Results": [front_m4, profile_m4, registros_m4]}

m4_results = pd.DataFrame(data=m4_results)

m4_results.style.hide_index

m4_results

Unnamed: 0,Repository,Results
0,Front End,0.0
1,Profile,0.0
2,Registros,0.0


#### M5 Results

In [159]:
front_m5 = m5(front_files_data)
profile_m5 = m5(profile_files_data)
registros_m5 = m5(registros_files_data)


m5_results = {"Repository": ["Front End", "Profile", "Registros"], "Results": [front_m5, profile_m5, registros_m5]}

m5_results = pd.DataFrame(data=m5_results)

m5_results.style.hide_index

m5_results

Project test unit density:  100.0
Project test unit density:  100.0
Project test unit density:  100.0


Unnamed: 0,Repository,Results
0,Front End,...
1,Profile,path test...
2,Registros,path t...


#### M6 Results

In [160]:
front_m6 = m6(front_files_data)
profile_m6 = m6(profile_files_data)
registros_m6 = m6(registros_files_data)


m6_results = {"Repository": ["Front End", "Profile", "Registros"], "Results": [front_m6, profile_m6, registros_m6]}

m6_results = pd.DataFrame(data=m6_results)

m6_results.style.hide_index

m6_results

Project test unit density:  nan
Project test unit density:  nan
Project test unit density:  nan


Unnamed: 0,Repository,Results
0,Front End,"Empty DataFrame Columns: [path, fast_test] Ind..."
1,Profile,"Empty DataFrame Columns: [path, fast_test] Ind..."
2,Registros,"Empty DataFrame Columns: [path, fast_test] Ind..."


#### M7 Results

In [161]:
sprint1_m7
sprint2_m7
sprint3_m7
sprint4_m7
sprint5_m7

m7_results = {"Sprint": [1,2,3,4,5], "Results": [sprint1_m7,sprint2_m7,sprint3_m7,sprint4_m7,sprint5_m7]}
m7_results = pd.DataFrame(data=m7_results)
m7_results

Unnamed: 0,Sprint,Results
0,1,0.0
1,2,100.0
2,3,100.0
3,4,100.0
4,5,0.0


#### M8 Results

In [162]:
sprint1_m8
sprint2_m8
sprint3_m8
sprint4_m8
sprint5_m8

m8_list = [sprint1_m8,sprint2_m8,sprint3_m8,sprint4_m8,sprint5_m8]

for i in range(len(m8_list)):
    print("Sprint {sprint}".format(sprint=i))
    display(m8_list[i])


Sprint 0


Unnamed: 0,density,percentage
0,hotfix,0.0
1,docs,59.09
2,feature,0.0
3,arq,0.0
4,devops,9.09
5,analytics,4.55
6,us,0.0
7,easy,0.0
8,medium,0.0
9,hard,0.0


Sprint 1


Unnamed: 0,density,percentage
0,hotfix,4.17
1,docs,54.17
2,feature,0.0
3,arq,0.0
4,devops,8.33
5,analytics,4.17
6,us,0.0
7,easy,0.0
8,medium,0.0
9,hard,0.0


Sprint 2


Unnamed: 0,density,percentage
0,hotfix,0.0
1,docs,0.0
2,feature,0.0
3,arq,0.0
4,devops,0.0
5,analytics,0.0
6,us,0.0
7,easy,0.0
8,medium,0.0
9,hard,0.0


Sprint 3


Unnamed: 0,density,percentage
0,hotfix,0.0
1,docs,25.0
2,feature,25.0
3,arq,0.0
4,devops,0.0
5,analytics,0.0
6,us,50.0
7,easy,0.0
8,medium,0.0
9,hard,0.0


Sprint 4


Unnamed: 0,density,percentage
0,hotfix,0.0
1,docs,14.29
2,feature,14.29
3,arq,0.0
4,devops,0.0
5,analytics,14.29
6,us,28.57
7,easy,0.0
8,medium,0.0
9,hard,0.0


#### M9 Results

In [163]:
sprint1_m9
sprint2_m9
sprint3_m9
sprint4_m9
sprint5_m9

m9_results = {"Sprint": [1,2,3,4,5], "Results": [sprint1_m9,sprint2_m9,sprint3_m9,sprint4_m9,sprint5_m9]}
m9_results = pd.DataFrame(data=m9_results)
m9_results

Unnamed: 0,Sprint,Results
0,1,72.73
1,2,66.67
2,3,0.0
3,4,50.0
4,5,42.86
