# Analytics

**Date: 2021/1**

**SUMMARY:**
    
   * This notebook represents the project quality analysis of the date exposed right above
    
### TEAM:

**Semester: 2021/1**

**Professor: Hilmer Neri**

**Members:**

* Artur
* Eduardo
* Gabriel
* Levi
* Luis
* Joao
* Nícolas

### LIBRARIES

In [28]:
import requests
import json
import utils.constants as CONST
import utils.widgets as WDG
import ipywidgets as widgets
import time
import pandas as pd
from glob import glob
import matplotlib.pyplot as plt
import os
import utils.test_coverage as tc 

#### Dataframe Config

In [29]:
pd.set_option("display.max_rows", None, "display.max_columns", None)

### **SonarCloud**

**Path to metrics files: ../analytics-raw-data/**

In [30]:
jsons = glob('../analytics-raw-data/*.json')

In [31]:
def read_json(json_):
    
    with open(json_) as json_file:
        json_obj = json.load(json_file)
            
    return json_.split('/')[-1], json_obj

def create_base_df(json_list):
    
    df = pd.DataFrame()
    
    for i in json_list:
        
        file_name, data = read_json(i)
        
        df_ = pd.DataFrame(data['baseComponent']['measures'])
        
                          
        df_['file_name'] = file_name
                  
        df = df.append(df_, ignore_index=True)
        
    aux_df = df['file_name'].str.split(r'fga-eps-mds-2021_1-PUMA-([a-zA-Z]+)-([\d-]+).json', expand=True)
    
    df['repository'] = aux_df[1]
    
    df['date'] = aux_df[2]
    
    df = df.sort_values(by=['repository', 'date']).drop(['file_name'], axis=1)

    return df

In [32]:
df = create_base_df(jsons)

In [33]:
df.head(10)

Unnamed: 0,metric,value,bestValue,repository,date
8,duplicated_lines_density,0.0,True,AlocateService,10-19-2021-01-08
9,functions,4.0,,AlocateService,10-19-2021-01-08
10,security_rating,1.0,True,AlocateService,10-19-2021-01-08
11,files,4.0,,AlocateService,10-19-2021-01-08
12,complexity,9.0,,AlocateService,10-19-2021-01-08
13,ncloc,204.0,,AlocateService,10-19-2021-01-08
14,reliability_rating,1.0,True,AlocateService,10-19-2021-01-08
15,comment_lines_density,0.0,False,AlocateService,10-19-2021-01-08
112,duplicated_lines_density,0.0,True,ApiGateway,10-06-2021-15-49
113,functions,21.0,,ApiGateway,10-06-2021-15-49


In [34]:
metric_list = ['files',
               'functions',
               'complexity',
               'comment_lines_density',
               'duplicated_lines_density',
               'coverage',
               'ncloc',
               'security_rating',
               'tests',
               'test_success_density',
               'test_execution_time',
               'reliability_rating']

len(metric_list)

12

In [35]:
def metric_per_file(json):
    
    file_json = []
        
    for component in json[1]['components']:
        if component['qualifier'] == 'FIL':
            file_json.append(component)
            
    return file_json

def generate_file_dataframe_per_release(metric_list, json, language_extension):
    
    df_columns = metric_list
    df = pd.DataFrame(columns = df_columns)
    
    for file in json:
        try:
            if file['language'] == language_extension:
                for measure in file['measures']:
                    df.at[file['path'], measure['metric']] = measure['value']
        except:
            pass
        
    df.reset_index(inplace = True)
    df = df.rename({'index': 'path'}, axis=1).drop(['files'], axis=1)

    return df

def create_file_df(json_list):
    
    df = pd.DataFrame()

    for i in json_list:

        file_component = read_json(i)
        
        file_component_data = metric_per_file(file_component)

        file_component_df = generate_file_dataframe_per_release(metric_list, file_component_data, language_extension = 'js')

        file_component_df['filename'] = os.path.basename(i)

        df = df.append(file_component_df, ignore_index=True)
        
    aux_df = df['filename'].str.split(r"fga-eps-mds-2021_1-PUMA-([a-zA-Z]+)-([\d-]+).json", expand=True)
    
    df['repository'] = aux_df[1]
    
    df['version'] = aux_df[2]
    
    df = df.sort_values(by=['repository', 'version']).drop(['filename'], axis=1)
        
    return df

In [36]:
file_component = create_file_df(jsons)

In [10]:
REPOS = ['ApiGateway', 'UserService', 'ProjectService', 'Frontend']

In [39]:
test_json = {}
for repo in REPOS:
    path_time = f'../../2021-1-PUMA-{repo}/test-results.xml'
    path_coverage = f'../../2021-1-PUMA-{repo}/cobertura-coverage.xml'
    try:
        test_json[repo] = tc.get_test_obj(path_time, path_coverage)
    except:
        pass

src/controller/userController.js
src/models/user.js
src/repository/userRepository.js


In [41]:
jsons

['../analytics-raw-data/fga-eps-mds-2021_1-PUMA-ProjectService-10-29-2021-21-54.json',
 '../analytics-raw-data/fga-eps-mds-2021_1-PUMA-AlocateService-10-19-2021-01-08.json',
 '../analytics-raw-data/fga-eps-mds-2021_1-PUMA-UserService-10-06-2021-15-47.json',
 '../analytics-raw-data/fga-eps-mds-2021_1-PUMA-Frontend-10-06-2021-15-50.json',
 '../analytics-raw-data/fga-eps-mds-2021_1-PUMA-Frontend-10-29-2021-21-53.json',
 '../analytics-raw-data/fga-eps-mds-2021_1-PUMA-UserService-10-29-2021-21-16.json',
 '../analytics-raw-data/fga-eps-mds-2021_1-PUMA-13-09-2021-21_1.json',
 '../analytics-raw-data/fga-eps-mds-2021_1-PUMA-ApiGateway-10-19-2021-01-06.json',
 '../analytics-raw-data/fga-eps-mds-2021_1-PUMA-ApiGateway-10-29-2021-22-19.json',
 '../analytics-raw-data/fga-eps-mds-2021_1-PUMA-UserService-11-07-2021-01-05.json',
 '../analytics-raw-data/fga-eps-mds-2021_1-PUMA-UserService-10-19-2021-01-05.json',
 '../analytics-raw-data/fga-eps-mds-2021_1-PUMA-Frontend-10-29-2021-22-20.json',
 '../analy

In [11]:
dfs = {}
for repo in REPOS:
    dfs[repo] = file_component[file_component['repository'] == repo]

In [12]:
file_component.head(10)

Unnamed: 0,path,functions,complexity,comment_lines_density,duplicated_lines_density,coverage,ncloc,security_rating,tests,test_success_density,test_execution_time,reliability_rating,repository,version
11,dbconfig/dbConfig.js,3,8,0.0,0.0,,44,1.0,,100.0,,1.0,AlocateService,10-19-2021-01-08
12,dbconfig/dbSchema.js,0,0,0.0,0.0,,146,1.0,,100.0,,1.0,AlocateService,10-19-2021-01-08
13,index.js,1,1,0.0,0.0,,14,1.0,,100.0,,1.0,AlocateService,10-19-2021-01-08
161,src/config/environment.js,1,2,0.0,0.0,,9,1.0,,100.0,,1.0,ApiGateway,10-06-2021-15-49
162,index.js,0,0,0.0,0.0,,11,1.0,,100.0,,1.0,ApiGateway,10-06-2021-15-49
163,src/routes/router.js,10,10,0.0,0.0,,31,1.0,,100.0,,1.0,ApiGateway,10-06-2021-15-49
164,src/routes/userRouter.js,10,10,4.7,0.0,,41,1.0,,100.0,,1.0,ApiGateway,10-06-2021-15-49
85,tests/integrationTests/constants.js,0,0,0.0,0.0,,128,1.0,,100.0,,1.0,ApiGateway,10-19-2021-01-06
86,src/config/environment.js,1,4,0.0,0.0,,17,1.0,,100.0,,1.0,ApiGateway,10-19-2021-01-06
87,index.js,0,0,0.0,0.0,,11,1.0,,100.0,,1.0,ApiGateway,10-19-2021-01-06


In [13]:
def _nloc(df):
    ncloc = 0
    for each in df['ncloc']:
        ncloc += int(each)

    return ncloc

In [14]:
def m1(df):
    return len(df[(df['complexity'].astype(float)/df['functions'].astype(float)) < 10])/len(df)

In [15]:
def m2(df):
    return len(df[(df['comment_lines_density'].astype(float) > 10) &\
                  (df['comment_lines_density'].astype(float) < 30)])/len(df)

In [16]:
def m3(df):
    return len(df[(df['duplicated_lines_density'].astype(float) < 5)])/len(df)

In [17]:
def m4(df):
    return df['test_success_density'].astype(float).median() / 100

In [18]:
def m5(df):
    return len(df[(df['test_execution_time'].astype(float)) < 300]) / sum(df['tests'].astype(float))

In [19]:
def m6(df):
    return len(df[(df['coverage'].astype(float) > 60)]) / len(df)

In [20]:
def create_metrics(df):
    version_vec = df['version'].unique()
    
    m1_list = []
    m2_list = []
    m3_list = []
    m4_list = []
    m5_list = []
    m6_list = []
    
    ncloc_list = []
    repository_list = []
    version_list = []
    
    metrics_df = pd.DataFrame()
    
    for version in version_vec:
        
        version_df = df[df['version'] == version]
        
        m1_list.append(m1(version_df))
        m2_list.append(m2(version_df))
        m3_list.append(m3(version_df))
        m4_list.append(m4(version_df))
        m5_list.append(m5(version_df))
        m6_list.append(m6(version_df))
        
        ncloc_list.append(_nloc(version_df))
        repository_list.append(version_df['repository'].iloc[0])
        version_list.append(version)
    
    metrics_df = pd.DataFrame({
            'm1': m1_list,
            'm2': m2_list,
            'm3': m3_list,
            'm4': m4_list,
            'm5': m5_list,
            'm6': m6_list,
            'repository': repository_list, 
            'version': version_list,
            'ncloc': ncloc_list})
    
    return metrics_df

In [21]:
def plot_metrics(df):
    fig = plt.figure(figsize=(20, 10))
#     ax = subplot(1,1,1)

    plt.plot(df['m1'], linewidth=3, marker='o', markersize=10, label='complexity')
    plt.plot(df['m2'], linewidth=3, marker='o', markersize=10, label='comment lines')
    plt.plot(df['m3'], linewidth=3, marker='o', markersize=10, label='duplicate')
    plt.plot(df['m4'], linewidth=3, marker='o', markersize=10, label='test success')
    plt.plot(df['m5'], linewidth=3, marker='o', markersize=10, label='test execution time')
    plt.plot(df['m6'], linewidth=3, marker='o', markersize=10, label='test coverage')
#     handles, labels = ax.get_legend_handles_labels()
    fig.legend(loc='lower center')
    plt.show()

In [25]:
from IPython.display import display, Markdown, Latex
from utils.analysis import ANALYSIS

output = []
for i in range(len(REPOS)):
    output.append(widgets.Output())

tab = widgets.Tab(children = output)

for i in range(len(REPOS)):
    tab.set_title(i, REPOS[i])

display(tab)

metrics = {}

for i in range(len(REPOS)):
    with output[i]:
        metrics[REPOS[i]] = create_metrics(dfs[REPOS[i]]) 
        plot_metrics(metrics[REPOS[i]])
        display(Markdown(ANALYSIS[REPOS[i]]))

Tab(children=(Output(), Output(), Output(), Output()), _titles={'0': 'ApiGateway', '1': 'UserService', '2': 'P…

In [26]:
psc1 = 1
psc2 = 1
pc1 = 0.5
pc2 = 0.5
pm1 = 0.33
pm2 = 0.33
pm3 = 0.33
pm4 = 0.15
pm5 = 0.15
pm6 = 0.7

for repo in REPOS:

    metrics[repo]['code_quality'] = ((metrics[repo]['m1']*pm1) + (metrics[repo]['m2']*pm2) + (metrics[repo]['m3']*pm3)) * psc1
    metrics[repo]['testing_status'] = ((metrics[repo]['m4']*pm4) + (metrics[repo]['m5']*pm5) + (metrics[repo]['m6']*pm6)) * psc2


In [27]:
metrics['UserService'].head()

Unnamed: 0,m1,m2,m3,m4,m5,m6,repository,version,ncloc,code_quality,testing_status
0,1.0,0.0,1.0,1.0,,0.0,UserService,09-27-2021-22-25,145,0.66,
1,0.545455,0.0,1.0,1.0,,0.0,UserService,10-06-2021-15-47,384,0.51,
2,0.545455,0.0,1.0,1.0,,0.0,UserService,10-19-2021-01-05,384,0.51,
3,0.636364,0.0,1.0,1.0,,0.0,UserService,10-29-2021-21-16,465,0.54,
4,0.666667,0.0,1.0,1.0,,0.0,UserService,11-07-2021-01-05,828,0.55,
