# Analytics

#### Date: 2020/02

#### SUMMARY:

- This notebook represents the project quality analysis of the date exposed right above. 

### TEAM: SysArq

##### Semester: 2021/01
##### Professor: Hilmer Neri

##### Members:

- Member x
- Member y

### LIBRARIES

In [1]:
# Deal with data
import pandas as pd
import numpy as np
import json

# Deal with API request
import urllib3
from urllib3 import request

# Deal with visualization
import seaborn as sns
import matplotlib.pyplot as plt

### GRAPH SETTINGS

In [2]:
%config InlineBackend.figure_format ='retina'
sns.set(font_scale=1.5)
sns.set_style('darkgrid',
              {'xtick.bottom' : True,
               'ytick.left': True,
               'grid.linestyle':'--',
               'font.monospace': ['Computer Modern Typewriter'],
               'axes.edgecolor' : 'white'})

### DATAFRAME SETTINGS

In [3]:
pd.set_option("display.max_rows", None, "display.max_columns", None)

### SonarCloud

#### KEYS

In [21]:
front_key = 'fga-eps-mds_2021.1-PC-GO1-Frontend'

In [22]:
archives_key = 'fga-eps-mds_2021.1-PC-GO1-Archives'

In [23]:
profile_key = 'fga-eps-mds_2021.1-PC-GO1-Profile'

#### METRICS

In [24]:
metric_list = ['files',
               'functions',
               'complexity',
               'comment_lines_density',
               'duplicated_lines_density',
               'security_rating',
               'tests',
               'test_success_density',
               'test_execution_time',
               'reliability_rating']

len(metric_list)

10

In [25]:
def generate_metric_string(metric_list):
    metric_str = ''
    
    for metric in metric_list:
        metric_str += metric + ','
        
    return metric_str

In [26]:
metric_str = generate_metric_string(metric_list)

#### URLS

In [27]:
front_url = f"https://sonarcloud.io/api/measures/component_tree?component={front_key}&metricKeys={metric_str}&ps=500"
archives_url = f"https://sonarcloud.io/api/measures/component_tree?component={archives_key}&metricKeys={metric_str}&ps=500"
profile_url = f"https://sonarcloud.io/api/measures/component_tree?component={profile_key}&metricKeys={metric_str}&ps=500"

#### API REQUEST

In [28]:
http = urllib3.PoolManager()

In [29]:
front_request = http.request('GET', front_url)
front_request.status

200

In [30]:
archives_request = http.request('GET', archives_url)
archives_request.status

200

In [31]:
profile_request = http.request('GET', profile_url)
profile_request.status

200

#### JSON DECODING

In [32]:
front_json = json.loads(front_request.data.decode('utf-8'))
archives_json = json.loads(archives_request.data.decode('utf-8'))
profile_json = json.loads(profile_request.data.decode('utf-8'))

## DATA

### PROJECT

In [33]:
project_front_json = front_json['baseComponent']['measures']
project_archives_json = archives_json['baseComponent']['measures']
project_profile_json = profile_json['baseComponent']['measures']

In [36]:
project_front_data = pd.DataFrame(project_front_json)
project_archives_data = pd.DataFrame(project_archives_json)
project_profile_data = pd.DataFrame(project_profile_json)

##### FRONT

In [37]:
project_front_data

Unnamed: 0,bestValue,metric,value
0,False,duplicated_lines_density,1.8
1,,functions,556.0
2,True,security_rating,1.0
3,,files,102.0
4,,complexity,759.0
5,True,reliability_rating,1.0
6,False,comment_lines_density,0.8


##### ARCHIVES

In [38]:
project_archives_data

Unnamed: 0,bestValue,metric,value
0,True,duplicated_lines_density,0.0
1,,functions,44.0
2,True,security_rating,1.0
3,,files,21.0
4,,complexity,66.0
5,True,reliability_rating,1.0
6,False,comment_lines_density,6.7


#### PROFILE

In [39]:
project_profile_data

Unnamed: 0,bestValue,metric,value
0,True,duplicated_lines_density,0.0
1,,functions,5.0
2,True,security_rating,1.0
3,,files,17.0
4,,complexity,8.0
5,True,reliability_rating,1.0
6,False,comment_lines_density,16.7


### FILES

In [40]:
def metric_per_file(json):
    file_json = []
    
    for component in json['components']:
        if component['qualifier'] == 'FIL':
            file_json.append(component)
            
    return file_json

In [41]:
front_file_json = metric_per_file(front_json)
archives_file_json = metric_per_file(archives_json)
profile_file_json = metric_per_file(profile_json)

In [44]:
def generate_file_dataframe(metric_list, json, language_extension):
    df_columns = metric_list
    df = pd.DataFrame(columns = df_columns, dtype=object)
    
    for file in json:
        try:
            if file['language'] == language_extension:
                for measure in file['measures']:
                    df.at[file['path'], measure['metric']] = measure['value']
        except:
            pass
        
    df.reset_index(inplace = True)
    df = df.rename({'index': 'path'}, axis=1).drop(['files'], axis=1)

    return df

In [45]:
front_files_data = generate_file_dataframe(metric_list, front_file_json, language_extension = 'js')
archives_files_data = generate_file_dataframe(metric_list, archives_file_json, language_extension = 'py')
profile_files_data = generate_file_dataframe(metric_list, profile_file_json, language_extension = 'py')

##### FRONT

In [46]:
front_files_data

Unnamed: 0,path,functions,complexity,comment_lines_density,duplicated_lines_density,security_rating,tests,test_success_density,test_execution_time,reliability_rating
0,sysarq/src/tests/Documents/Create/ConnectionEr...,5,5,0.0,0.0,1.0,,100.0,,1.0
1,sysarq/src/pages/components/Inputs/Abbreviatio...,9,9,0.0,0.0,1.0,,100.0,,1.0
2,sysarq/src/Api.js,0,0,0.0,0.0,1.0,,100.0,,1.0
3,sysarq/src/App.js,1,1,0.0,0.0,1.0,,100.0,,1.0
4,sysarq/src/tests/App.test.js,2,2,0.0,0.0,1.0,,100.0,,1.0
5,sysarq/src/pages/FieldsRegister/BoxAbbreviatio...,1,1,0.0,0.0,1.0,,100.0,,1.0
6,sysarq/src/tests/BoxAbbreviation.test.js,2,2,0.0,0.0,1.0,,100.0,,1.0
7,sysarq/src/pages/components/Container/ChipsCon...,2,2,0.0,0.0,1.0,,100.0,,1.0
8,sysarq/src/pages/components/CommonSet/CommonSe...,1,1,0.0,0.0,1.0,,100.0,,1.0
9,sysarq/src/pages/Documents/Create/CreateAdmini...,37,61,0.0,0.0,1.0,,100.0,,1.0


##### ARCHIVES

In [47]:
archives_files_data

Unnamed: 0,path,functions,complexity,comment_lines_density,duplicated_lines_density,security_rating,tests,test_success_density,test_execution_time,reliability_rating
0,archives_app/migrations/0001_initial.py,0,0,0.6,0.0,1.0,,100.0,,1.0
1,archives_app/__init__.py,0,0,,0.0,1.0,,100.0,,1.0
2,archives_app/migrations/__init__.py,0,0,,0.0,1.0,,100.0,,1.0
3,project/__init__.py,0,0,,0.0,1.0,,100.0,,1.0
4,archives_app/admin.py,0,0,,0.0,1.0,,100.0,,1.0
5,archives_app/apps.py,0,0,0.0,0.0,1.0,,100.0,,1.0
6,project/asgi.py,0,0,66.7,0.0,1.0,,100.0,,1.0
7,archives_app/documents_models.py,0,0,0.0,0.0,1.0,,100.0,,1.0
8,archives_app/documents_serializers.py,3,6,0.0,0.0,1.0,,100.0,,1.0
9,archives_app/fields_models.py,0,0,0.0,0.0,1.0,,100.0,,1.0


##### PROFILE

In [48]:
profile_files_data

Unnamed: 0,path,functions,complexity,comment_lines_density,duplicated_lines_density,security_rating,tests,test_success_density,test_execution_time,reliability_rating
0,profile_app/migrations/0001_initial.py,0,0,3.7,0.0,1.0,,100.0,,1.0
1,profile_app/__init__.py,0,0,,0.0,1.0,,100.0,,1.0
2,profile_app/migrations/__init__.py,0,0,,0.0,1.0,,100.0,,1.0
3,project/__init__.py,0,0,,0.0,1.0,,100.0,,1.0
4,profile_app/admin.py,0,0,,0.0,1.0,,100.0,,1.0
5,profile_app/apps.py,0,0,0.0,0.0,1.0,,100.0,,1.0
6,project/asgi.py,0,0,66.7,0.0,1.0,,100.0,,1.0
7,manage.py,1,2,16.7,0.0,1.0,,100.0,,1.0
8,profile_app/models.py,2,3,0.0,0.0,1.0,,100.0,,1.0
9,profile_app/serializers.py,1,2,0.0,0.0,1.0,,100.0,,1.0


# ANALYSIS

## MAINTAINABILITY

### CODE QUALITY

##### COMPLEXITY

In [49]:
def m1(df):
    
    density_non_complex_files = round((len(df[(df['complexity'].astype(float)/df['functions'].astype(float)) < 10])/len(df))*100, 2)
    
    return density_non_complex_files

##### COMMENTS

In [50]:
def m2(df):
    
    density_comment_files = round((len(df[(df['comment_lines_density'].astype(float) > 10) & (df['comment_lines_density'].astype(float) < 30)])/len(df))*100, 2)
    
    return density_comment_files

##### DUPLICATIONS

In [51]:
def m3(df):
    
    duplication = round((len(df[(df['duplicated_lines_density'].astype(float) < 5)])/len(df))*100, 2)
    
    return duplication

### BLOCKING CODE

#### NON-BLOCKING FILES

In [52]:
def m4(df):
    
    non_blocking_files = round((len(df[(df['security_rating'].astype(float) >= 4)])/len(df))*100,2)
    
    return non_blocking_files

## RELIABILITY

#### TEST SUCCESS

In [53]:
def m5(df):
    
    test_success_file = df[['path', 'test_success_density']]
    test_success_repository = df['test_success_density'].astype(float).mean()
    
    print("Project test unit density: ", test_success_repository)
    
    return test_success_file

#### FAST TESTS

In [54]:
def m6(df):
    
    fast_test_df = df[(df['test_execution_time'].astype(float) < 300)]
    fast_test_df['fast_test'] = fast_test_df['test_execution_time']/fast_test_df['tests']
    
    fast_test_file = fast_test_df[['path', 'fast_test']]
    
    fast_test_repository = fast_test_df['fast_test'].astype(float).mean()
    
    print("Project test unit density: ", fast_test_repository)
    
    return fast_test_file

## PRODUCTIVITY

#### RESOLVED ISSUES' THROUGHPUT

In [57]:
def m7(number_of_issues_resolved, number_of_issues):
    
    resolved_issues_throughput = round((number_of_issues_resolved / number_of_issues) * 100, 2)
    
    return resolved_issues_throughput

#### ISSUE TYPE IN A TIMEFRAME

In [58]:
def density(issue, number_of_issues):
    issue_density = round((issue / number_of_issues) * 100, 2)
    return issue_density

In [59]:
def m8(tag_dict, number_of_issues):
    
    issue_densities = {
        "hotfix": [density(tag_dict["HOTFIX"], number_of_issues)],
        "docs": [density(tag_dict["DOCS"], number_of_issues)],
        "feature": [density(tag_dict["FEATURE"], number_of_issues)],
        "arq": [density(tag_dict["ARQ"], number_of_issues)],
        "devops": [density(tag_dict["DEVOPS"], number_of_issues)],
        "analytics": [density(tag_dict["ANALYTICS"], number_of_issues)],
        "us": [density(tag_dict["US"], number_of_issues)],
        "easy": [density(tag_dict["EASY"], number_of_issues)],
        "medium": [density(tag_dict["MEDIUM"], number_of_issues)],
        "hard": [density(tag_dict["HARD"], number_of_issues)],
        "eps": [density(tag_dict["EPS"], number_of_issues)],
        "mds": [density(tag_dict["MDS"], number_of_issues)]
    }

    issue_densities = pd.DataFrame.from_dict(issue_densities).T.reset_index()
    
    issue_densities.columns = ['density' ,'percentage']
    
    return issue_densities

#### BUGS RATIO

In [60]:
def m9(tag_dict, number_of_issues):
    
    bugs_ratio = round(((tag_dict["DOCS"] + tag_dict["FEATURE"] + tag_dict["ARQ"] + tag_dict["DEVOPS"] + tag_dict["ANALYTICS"]) / number_of_issues) * 100, 2)
    
    return bugs_ratio

### PRODUCTIVITY METRIC RESULTS PER SPRINT

#### PRE-SPRINT AND SPRINT 1

In [125]:
NUMBER_OF_ISSUES_RESOLVED = 10
NUMBER_OF_ISSUES = 10

TAGS = {
    "HOTFIX": 0,
    "DOCS": 7,
    "FEATURE": 0,
    "ARQ": 0,
    "DEVOPS": 4,
    "ANALYTICS": 0,
    "US": 0,
    "EASY": 4,
    "MEDIUM": 2,
    "HARD": 4,
    "EPS": 8,
    "MDS": 3
}

In [126]:
pre_sprint_m7 = m7(NUMBER_OF_ISSUES_RESOLVED, NUMBER_OF_ISSUES)
pre_sprint_m7

100.0

In [128]:
pre_sprint_m8 = m8(TAGS, NUMBER_OF_ISSUES)
pre_sprint_m8

Unnamed: 0,density,percentage
0,hotfix,0.0
1,docs,70.0
2,feature,0.0
3,arq,0.0
4,devops,40.0
5,analytics,0.0
6,us,0.0
7,easy,40.0
8,medium,20.0
9,hard,40.0


In [129]:
pre_sprint_m9 = m9(TAGS, NUMBER_OF_ISSUES)
pre_sprint_m9

110.0

#### SPRINT 2

In [105]:
NUMBER_OF_ISSUES_RESOLVED = 7
NUMBER_OF_ISSUES = 11

TAGS = {
    "HOTFIX": 0,
    "DOCS": 5,
    "FEATURE": 2,
    "ARQ": 1,
    "DEVOPS": 1,
    "ANALYTICS": 0,
    "US": 2,
    "EASY": 3,
    "MEDIUM": 6,
    "HARD": 2,
    "EPS": 7,
    "MDS": 3
}

In [106]:
s2_m7 = m7(NUMBER_OF_ISSUES_RESOLVED, NUMBER_OF_ISSUES)
s2_m7

63.64

In [107]:
s2_m8 = m8(TAGS, NUMBER_OF_ISSUES)
s2_m8

Unnamed: 0,density,percentage
0,hotfix,0.0
1,docs,45.45
2,feature,18.18
3,arq,9.09
4,devops,9.09
5,analytics,0.0
6,us,18.18
7,easy,27.27
8,medium,54.55
9,hard,18.18


In [108]:
s2_m9 = m9(TAGS, NUMBER_OF_ISSUES)
s2_m9

81.82

#### SPRINT 3

In [109]:
NUMBER_OF_ISSUES_RESOLVED = 12
NUMBER_OF_ISSUES = 15

TAGS = {
    "HOTFIX": 0,
    "DOCS": 10,
    "FEATURE": 2,
    "ARQ": 2,
    "DEVOPS": 1,
    "ANALYTICS": 0,
    "US": 2,
    "EASY": 3,
    "MEDIUM": 8,
    "HARD": 4,
    "EPS": 11,
    "MDS": 4
}

In [110]:
s3_m7 = m7(NUMBER_OF_ISSUES_RESOLVED, NUMBER_OF_ISSUES)
s3_m7

80.0

In [114]:
s3_m8 = m8(TAGS, NUMBER_OF_ISSUES)
s3_m8

Unnamed: 0,density,percentage
0,hotfix,0.0
1,docs,66.67
2,feature,13.33
3,arq,13.33
4,devops,6.67
5,analytics,0.0
6,us,13.33
7,easy,20.0
8,medium,53.33
9,hard,26.67


In [115]:
s3_m9 = m9(TAGS, NUMBER_OF_ISSUES)
s3_m9

100.0

#### SPRINT 4

In [142]:
NUMBER_OF_ISSUES_RESOLVED = 6
NUMBER_OF_ISSUES = 12

TAGS = {
    "HOTFIX": 0,
    "DOCS": 4,
    "FEATURE": 5,
    "ARQ": 1,
    "DEVOPS": 3,
    "ANALYTICS": 0,
    "US": 5,
    "EASY": 3,
    "MEDIUM": 5,
    "HARD": 4,
    "EPS": 9,
    "MDS": 4
}

In [143]:
s4_m7 = m7(NUMBER_OF_ISSUES_RESOLVED, NUMBER_OF_ISSUES)
s4_m7

50.0

In [144]:
s4_m8 = m8(TAGS, NUMBER_OF_ISSUES)
s4_m8

Unnamed: 0,density,percentage
0,hotfix,0.0
1,docs,33.33
2,feature,41.67
3,arq,8.33
4,devops,25.0
5,analytics,0.0
6,us,41.67
7,easy,25.0
8,medium,41.67
9,hard,33.33


In [145]:
s4_m9 = m9(TAGS, NUMBER_OF_ISSUES)
s4_m9

108.33

#### SPRINT 5

In [138]:
NUMBER_OF_ISSUES_RESOLVED = 2
NUMBER_OF_ISSUES = 11

TAGS = {
    "HOTFIX": 1,
    "DOCS": 2,
    "FEATURE": 6,
    "ARQ": 1,
    "DEVOPS": 2,
    "ANALYTICS": 0,
    "US": 7,
    "EASY": 1,
    "MEDIUM": 7,
    "HARD": 3,
    "EPS": 7,
    "MDS": 5
}

In [139]:
s5_m7 = m7(NUMBER_OF_ISSUES_RESOLVED, NUMBER_OF_ISSUES)
s5_m7

18.18

In [140]:
s5_m8 = m8(TAGS, NUMBER_OF_ISSUES)
s5_m8

Unnamed: 0,density,percentage
0,hotfix,9.09
1,docs,18.18
2,feature,54.55
3,arq,9.09
4,devops,18.18
5,analytics,0.0
6,us,63.64
7,easy,9.09
8,medium,63.64
9,hard,27.27


In [141]:
s5_m9 = m9(TAGS, NUMBER_OF_ISSUES)
s5_m9

100.0

#### SPRINT 6

In [148]:
NUMBER_OF_ISSUES_RESOLVED = 13
NUMBER_OF_ISSUES = 15

TAGS = {
    "HOTFIX": 4,
    "DOCS": 1,
    "FEATURE": 6,
    "ARQ": 0,
    "DEVOPS": 3,
    "ANALYTICS": 0,
    "US": 7,
    "EASY": 3,
    "MEDIUM": 7,
    "HARD": 3,
    "EPS": 11,
    "MDS": 4
}

In [149]:
s6_m7 = m7(NUMBER_OF_ISSUES_RESOLVED, NUMBER_OF_ISSUES)
s6_m7

86.67

In [150]:
s6_m8 = m8(TAGS, NUMBER_OF_ISSUES)
s6_m8

Unnamed: 0,density,percentage
0,hotfix,26.67
1,docs,6.67
2,feature,40.0
3,arq,0.0
4,devops,20.0
5,analytics,0.0
6,us,46.67
7,easy,20.0
8,medium,46.67
9,hard,20.0


In [151]:
s6_m9 = m9(TAGS, NUMBER_OF_ISSUES)
s6_m9

66.67

#### SPRINT 7

In [152]:
NUMBER_OF_ISSUES_RESOLVED = 6
NUMBER_OF_ISSUES = 9

TAGS = {
    "HOTFIX": 0,
    "DOCS": 5,
    "FEATURE": 2,
    "ARQ": 1,
    "DEVOPS": 1,
    "ANALYTICS": 1,
    "US": 2,
    "EASY": 2,
    "MEDIUM": 6,
    "HARD": 1,
    "EPS": 8,
    "MDS": 1
}

In [153]:
s7_m7 = m7(NUMBER_OF_ISSUES_RESOLVED, NUMBER_OF_ISSUES)
s7_m7

66.67

In [154]:
s7_m8 = m8(TAGS, NUMBER_OF_ISSUES)
s7_m8

Unnamed: 0,density,percentage
0,hotfix,0.0
1,docs,55.56
2,feature,22.22
3,arq,11.11
4,devops,11.11
5,analytics,11.11
6,us,22.22
7,easy,22.22
8,medium,66.67
9,hard,11.11


In [155]:
s7_m9 = m9(TAGS, NUMBER_OF_ISSUES)
s7_m9

111.11

### CODE METRIC RESULTS

#### FRONTEND

In [99]:
m1(front_files_data)

96.51

In [100]:
m2(front_files_data)

1.16

In [101]:
m3(front_files_data)

95.35

In [102]:
m4(front_files_data)

0.0

In [66]:
m5(front_files_data)

Project test unit density:  100.0


Unnamed: 0,path,test_success_density
0,sysarq/src/tests/Documents/Create/ConnectionEr...,100.0
1,sysarq/src/pages/components/Inputs/Abbreviatio...,100.0
2,sysarq/src/Api.js,100.0
3,sysarq/src/App.js,100.0
4,sysarq/src/tests/App.test.js,100.0
5,sysarq/src/pages/FieldsRegister/BoxAbbreviatio...,100.0
6,sysarq/src/tests/BoxAbbreviation.test.js,100.0
7,sysarq/src/pages/components/Container/ChipsCon...,100.0
8,sysarq/src/pages/components/CommonSet/CommonSe...,100.0
9,sysarq/src/pages/Documents/Create/CreateAdmini...,100.0


In [68]:
m6(front_files_data)

Project test unit density:  nan


Unnamed: 0,path,fast_test


#### ARCHIVES

In [72]:
m1(archives_files_data)

31.58

In [73]:
m2(archives_files_data)

26.32

In [74]:
m3(archives_files_data)

100.0

In [75]:
m4(archives_files_data)

0.0

In [76]:
m5(archives_files_data)

Project test unit density:  100.0


Unnamed: 0,path,test_success_density
0,archives_app/migrations/0001_initial.py,100.0
1,archives_app/__init__.py,100.0
2,archives_app/migrations/__init__.py,100.0
3,project/__init__.py,100.0
4,archives_app/admin.py,100.0
5,archives_app/apps.py,100.0
6,project/asgi.py,100.0
7,archives_app/documents_models.py,100.0
8,archives_app/documents_serializers.py,100.0
9,archives_app/fields_models.py,100.0


In [77]:
m6(archives_files_data)

Project test unit density:  nan


Unnamed: 0,path,fast_test


#### PROFILE

In [78]:
m1(profile_files_data)

26.67

In [79]:
m2(profile_files_data)

20.0

In [80]:
m3(profile_files_data)

100.0

In [81]:
m4(profile_files_data)

0.0

In [82]:
m5(profile_files_data)

Project test unit density:  100.0


Unnamed: 0,path,test_success_density
0,profile_app/migrations/0001_initial.py,100.0
1,profile_app/__init__.py,100.0
2,profile_app/migrations/__init__.py,100.0
3,project/__init__.py,100.0
4,profile_app/admin.py,100.0
5,profile_app/apps.py,100.0
6,project/asgi.py,100.0
7,manage.py,100.0
8,profile_app/models.py,100.0
9,profile_app/serializers.py,100.0


In [83]:
m6(profile_files_data)

Project test unit density:  nan


Unnamed: 0,path,fast_test
