# Analytics

#### Date: 2020/02

#### SUMMARY:

- This notebook represents the project quality analysis of the date exposed right above. 

### TEAM:

##### Semester: 2021/1
##### Professor: Hilmer Neri

##### Members:


### LIBRARIES

In [151]:
# Deal with data
import pandas as pd
import numpy as np
import json
from glob import glob
import os

# Deal with API request
import urllib3
from urllib3 import request

# Deal with visualization
import seaborn as sns
import matplotlib.pyplot as plt

from random import randint
from IPython.core.display import display, HTML

### GRAPH SETTINGS

In [152]:
%config InlineBackend.figure_format ='retina'
sns.set(font_scale=1.5)
sns.set_style('darkgrid',
              {'xtick.bottom' : True,
               'ytick.left': True,
               'grid.linestyle':'--',
               'font.monospace': ['Computer Modern Typewriter'],
               'axes.edgecolor' : 'white'})

### DATAFRAME SETTINGS

In [153]:
pd.set_option("display.max_rows", None, "display.max_columns", None)

### SonarCloud

In [154]:
repos = [
    '2021.1-Cartografia-social-api-comunidades',
    '2021.1-Cartografia-social-api-gateway',
    #'2021.1-Cartografia-social-api-mapas',
    #'2021.1-Cartografia-social-api-midia',
    #'2021.1-Cartografia-social-api-users',
    #'2021.1-Cartografia-social-front'
]

language = [['2021.1-Cartografia-social-api-comunidades', 'ts']] 
            ['2021.1-Cartografia-social-api-gateway', 'ts'], 
            #['2021.1-Cartografia-social-api-mapas', 'ts'], 
            #['2021.1-Cartografia-social-api-midia', 'ts']] 
            #['2021.1-Cartografia-social-api-users', 'ts'], 
            #['2021.1-Cartografia-social-front', 'js']]

repos_language = {}

for item in language:
    repos_language[f"{item[0]}"] = item[1]

In [155]:
repos_language

{'2021.1-Cartografia-social-api-comunidades': 'ts'}

##### Path to the folder with all your jsons

In [156]:
jsons = glob('2021.1-Cartografia-social-api-comunidades/*.json') # add the path here

In [157]:
issues = glob('issues.json')
sprints = glob('sprints.json')

In [158]:
def read_json(json_path):
    
    with open(json_path) as json_file:
        json_obj = json.load(json_file)
        
    return json_obj


def create_base_component_df(json_list):
    
    df = pd.DataFrame()

    for i in json_list:

        base_component = read_json(i)

        base_component_data = base_component['baseComponent']['measures']

        base_component_df = pd.DataFrame(base_component_data)

        base_component_df['filename'] = os.path.basename(i)

        df = df.append(base_component_df, ignore_index=True)
        
        aux_df = df['filename'].str.split(r"fga-eps-mds-2021_1-(.*?)_(.*?)_(.*?).json", expand=True)
    
    df['repository'] = aux_df[1]
    
    df['version'] = aux_df[3]
    
    df = df.sort_values(by=['repository', 'version'])
        
    return df

def create_base_issue_df(json_list):
    df = pd.DataFrame()
    
    for i in issue_list:
        base_issue = read_json(i)
        base_issue_data = base_issue['baseIssue']['TAGS']
        base_issue_df = pd.DataFrame(base_issue_data)
        base_issue_df['filename'] = os.path.basename(i)
        df = df.append(base_issue_df, ignore_index=True)
    return df, repos

In [159]:
all_issues = pd.DataFrame()
all_issues = read_json(issues[0])

In [160]:
all_sprints = read_json(sprints[0])

#### Create base component dataframe

In [161]:
base_component_df = create_base_component_df(jsons)
base_component_df.head(10)

Unnamed: 0,metric,value,bestValue,filename,repository,version
54,duplicated_lines_density,0.0,True,fga-eps-mds-2021_1-Cartografia-social-api-comu...,Cartografia-social-api-comunidades,v0.0.1
55,functions,35.0,,fga-eps-mds-2021_1-Cartografia-social-api-comu...,Cartografia-social-api-comunidades,v0.0.1
56,security_rating,1.0,True,fga-eps-mds-2021_1-Cartografia-social-api-comu...,Cartografia-social-api-comunidades,v0.0.1
57,files,30.0,,fga-eps-mds-2021_1-Cartografia-social-api-comu...,Cartografia-social-api-comunidades,v0.0.1
58,complexity,35.0,,fga-eps-mds-2021_1-Cartografia-social-api-comu...,Cartografia-social-api-comunidades,v0.0.1
59,ncloc,336.0,,fga-eps-mds-2021_1-Cartografia-social-api-comu...,Cartografia-social-api-comunidades,v0.0.1
60,reliability_rating,1.0,True,fga-eps-mds-2021_1-Cartografia-social-api-comu...,Cartografia-social-api-comunidades,v0.0.1
61,comment_lines_density,0.0,False,fga-eps-mds-2021_1-Cartografia-social-api-comu...,Cartografia-social-api-comunidades,v0.0.1
9,duplicated_lines_density,0.0,True,fga-eps-mds-2021_1-Cartografia-social-api-comu...,Cartografia-social-api-comunidades,v0.0.2
10,functions,16.0,,fga-eps-mds-2021_1-Cartografia-social-api-comu...,Cartografia-social-api-comunidades,v0.0.2


#### Create dataframe per file

In [162]:
metric_list = ['files',
               'functions',
               'complexity',
               'comment_lines_density',
               'duplicated_lines_density',
               'coverage',
               'ncloc',
               'security_rating',
               'tests',
               'test_success_density',
               'test_execution_time',
               'reliability_rating']

len(metric_list)

12

In [163]:
def metric_per_file(json):
    
    file_json = []
    
    for component in json['components']:
        if component['qualifier'] == 'FIL':
            file_json.append(component)
            
    return file_json

def generate_file_dataframe_per_release(metric_list, json, language_extension):
    
    df_columns = metric_list
    df = pd.DataFrame(columns = df_columns)
    
    for file in json:
        try:
            if file['language'] == language_extension:
                for measure in file['measures']:
                    df.at[file['path'], measure['metric']] = measure['value']
        except:
            pass
        
    df.reset_index(inplace = True)
    df = df.rename({'index': 'path'}, axis=1).drop(['files'], axis=1)

    return df

def create_file_df(json_list):
    
    df = pd.DataFrame()

    for i in json_list:

        file_component = read_json(i)
        
        file_component_data = metric_per_file(file_component)

        file_component_df = generate_file_dataframe_per_release(metric_list, file_component_data, language_extension = 'ts')

        file_component_df['filename'] = os.path.basename(i)

        df = df.append(file_component_df, ignore_index=True)
        
    # replace TeamName by yours.    
    aux_df = df['filename'].str.split(r"fga-eps-mds-2021_1-(.*?)_(.*?)_(.*?).json", expand=True)
    
    df['repository'] = aux_df[1]
    
    df['version'] = aux_df[3]
    
    df = df.sort_values(by=['repository', 'version'])

    df.to_csv('result2.csv')
    
    return df

In [164]:
file_component_df = create_file_df(jsons)

In [165]:
#file_component_df['version'] = pd.to_datetime(file_component_df['version'], format="%d-%m-%Y")

In [166]:
file_component_df = file_component_df.sort_values(by='version')

In [167]:
file_component_df.to_excel('data/data.xlsx', index = False)

In [168]:
file_component_df.repository.unique()

array(['Cartografia-social-api-comunidades'], dtype=object)

#### Create dataframe per repository

In [169]:
repository_dataframes = {} 

for repository in repos:
    df = file_component_df[file_component_df['repository'] == f"{repository}"]
    df.name = f"{repository}"
    repository_dataframes[f"{repository}"] = df
    
repository_dataframes.keys()

dict_keys(['2021.1-Cartografia-social-api-comunidades'])

In [170]:
api_comunidades_df = file_component_df[file_component_df['repository'] == 'Cartografia-social-api-comunidades']
...
#api_gateway_df = file_component_df[file_component_df['repository'] == '2021.1-Cartografia-social-api-gateway']
#api_mapas_df = file_component_df[file_component_df['repository'] == '2021.1-Cartografia-social-api-mapas']
#api_midia_df = file_component_df[file_component_df['repository'] == '2021.1-Cartografia-social-api-midia']
#api_users_df = file_component_df[file_component_df['repository'] == '2021.1-Cartografia-social-api-users']
#front_df = file_component_df[file_component_df['repository'] == '2021.1-Cartografia-social-api-front']

print(api_comunidades_df)

                                                  path functions complexity  \
58                                test/app.e2e-spec.ts         3          3   
78     src/questionario/dto/update-questionario.dto.ts         0          0   
77   src/questionario/entities/survey_response.sche...         0          0   
76       src/questionario/dto/sendSurveyAsnwers.dto.ts         0          0   
75            src/questionario/questionario.service.ts         3          3   
74      test/questionario/questionario.service.spec.ts         8          8   
73    src/questionario/entities/questionario.schema.ts         0          0   
72             src/questionario/questionario.module.ts         0          0   
71         src/questionario/questionario.controller.ts         3          3   
69                                         src/main.ts         2          2   
70   test/questionario/questionario.controller.spec.ts         5          5   
67                         src/config/configuration.

### Metric calculations

##### COMPLEXITY

In [171]:
def m1(df):
    
    density_non_complex_files = len(df[(df['complexity'].astype(float)/df['functions'].astype(float)) < 10])/len(df)
    
    return density_non_complex_files

##### COMMENTS

In [172]:
def m2(df):
    
    density_comment_files = len(df[(df['comment_lines_density'].astype(float) > 10) & (df['comment_lines_density'].astype(float) < 30)])/len(df)
    
    return density_comment_files

##### DUPLICATIONS

In [173]:
def m3(df):
    
    duplication = len(df[(df['duplicated_lines_density'].astype(float) < 5)])/len(df)
    
    return duplication

### NCLOC

In [174]:
def _ncloc(df):
    ncloc = 0
    for each in df['ncloc']:
        ncloc += int(each)
    
    return ncloc

##### PRODUCTIVITY

#### RESOLVED ISSUES' THROUGHPUT

In [175]:
def m7(number_of_issues_resolved, number_of_issues):
    
    resolved_issues_throughput = round((number_of_issues_resolved / number_of_issues) * 100, 2)
    
    return resolved_issues_throughput

#### ISSUE TYPE IN A TIMEFRAME

In [176]:
def density(issue, number_of_issues):
    issue_density = round((issue / number_of_issues) * 100, 2)
    return issue_density

In [177]:
def m8(tag_dict, number_of_issues):
    
    issue_densities = {
        "hotfix": [density(tag_dict["HOTFIX"], number_of_issues)],
        "docs": [density(tag_dict["DOCS"], number_of_issues)],
        "feature": [density(tag_dict["FEATURE"], number_of_issues)],
        "arq": [density(tag_dict["ARQ"], number_of_issues)],
        "devops": [density(tag_dict["DEVOPS"], number_of_issues)],
        "analytics": [density(tag_dict["ANALYTICS"], number_of_issues)],
        "us": [density(tag_dict["US"], number_of_issues)],
        "easy": [density(tag_dict["EASY"], number_of_issues)],
        "medium": [density(tag_dict["MEDIUM"], number_of_issues)],
        "hard": [density(tag_dict["HARD"], number_of_issues)],
        "eps": [density(tag_dict["EPS"], number_of_issues)],
        "mds": [density(tag_dict["MDS"], number_of_issues)]
    }

    issue_densities = pd.DataFrame.from_dict(issue_densities).T.reset_index()
    
    issue_densities.columns = ['density' ,'percentage']
    
    return issue_densities

In [178]:
issues = glob('issues.json')
sprints = glob('sprints.json')
all_issues = read_json(issues[0])

### Generate M8 and Create CSV

In [179]:
def calculate_m8(all_issues):
    for issue in all_issues:
        df = m8(all_issues[issue]['TAGS'], NUMBER_OF_ISSUES)
        df.to_csv(f'fga-eps-mds-2021-1-Cartografia-social-qualidade-total-processo-m8-{int(issue.split(" ")[1])}.csv')

In [180]:
TAGS = {
    'HOTFIX': 15,
    'DOCS': 121,
    'FEATURE': 32,
    'ARQ': 15,
    'DEVOPS': 12,
    'ANALYTICS': 23,
    'US': 19,
    'EASY': 28,
    'MEDIUM': 22,
    'HARD': 7,
    'EPS': 61,
    'MDS': 41
}
NUMBER_OF_ISSUES_RESOLVED=201
NUMBER_OF_ISSUES=236

In [181]:
calculate_m8(all_issues)

#### BUGS RATIO

In [182]:
def m9(tag_dict, number_of_issues):
    
    bugs_ratio = round(((tag_dict["DOCS"] + tag_dict["FEATURE"] + tag_dict["ARQ"] + tag_dict["DEVOPS"] + tag_dict["ANALYTICS"]) / number_of_issues) * 100, 2)
    
    return bugs_ratio

In [183]:
#df = pd.read_csv('result.csv')

In [184]:
m7(NUMBER_OF_ISSUES_RESOLVED, NUMBER_OF_ISSUES)

85.17

In [185]:
m8(TAGS, NUMBER_OF_ISSUES)

Unnamed: 0,density,percentage
0,hotfix,6.36
1,docs,51.27
2,feature,13.56
3,arq,6.36
4,devops,5.08
5,analytics,9.75
6,us,8.05
7,easy,11.86
8,medium,9.32
9,hard,2.97


In [186]:
m9(TAGS, NUMBER_OF_ISSUES)

86.02

### Calculate m1, m2, m3, m7, m8, m9 for each repository

In [187]:
def create_metrics_df(df):
    
    version_vec = df['version'].unique()
    
    m1_list = []
    m2_list = []
    m3_list = []
    repository_list = []
    version_list = []
    ncloc_list = []
    
    metrics_df = pd.DataFrame()
    
    for version in version_vec:

        version_df = df[df['version'] == version]

        m1_list.append(m1(version_df))
        m2_list.append(m2(version_df))
        m3_list.append(m3(version_df))
        repository_list.append(version_df['repository'].iloc[0])
        version_list.append(version)
        ncloc_list.append(_ncloc(version_df))
        
    metrics_df = pd.DataFrame({'m1': m1_list,
                               'm2': m2_list,
                               'm3': m3_list,
                               'repository': repository_list, 
                               'version': version_list,
                               'ncloc': ncloc_list
                              })
    
    metrics_df.to_csv('result.csv')
        
    return metrics_df

In [188]:
repository_metrics = {}

for repository, repo_df in repository_dataframes.items():   
    metrics_df = create_metrics_df(repo_df)
    metrics_df.name = f"{repository}"
    repository_metrics[f"{repository}"] = metrics_df

In [189]:
def create_df(base_issue_data):
    base_issue_m7 = []
    base_issue_m9 = []
    sprint_list = []
    start_list = []
    end_list = []

    for issue in base_issue_data:
        base_issue_m7.append(m7(base_issue_data[issue]['CLOSED_ISSUES'], NUMBER_OF_ISSUES))
        base_issue_m9.append(m9(base_issue_data[issue]['TAGS'], NUMBER_OF_ISSUES))
        sprint_list.append(int(issue.split(" ")[1]))
        start_list.append(all_sprints[issue]['start'])
        end_list.append(all_sprints[issue]['end'])
    df = pd.DataFrame({
                       'm7': base_issue_m7, 
                       'm9': base_issue_m9, 
                       'sprints': sprint_list,
                       'start_sprint': start_list,
                       'end_sprint': end_list
                    })
    metrics_df.to_csv('fga-eps-mds-2021-1-Cartografia-social-qualidade-total-processo.csv')
    
    return df

In [190]:
issue_df = create_df(all_issues)
issue_df['start_sprint'] = pd.to_datetime(issue_df['start_sprint'], format='%d/%m/%Y')
issue_df['end_sprint'] = pd.to_datetime(issue_df['end_sprint'], format='%d/%m/%Y')

In [191]:
api_comunidades_metrics = create_metrics_df(api_comunidades_df)
...
#api_gateway_metrics = create_metrics_df(api_gateway_df)
#api_mapas_metrics = create_metrics_df(api_mapas_df)
#api_midia_metrics = create_metrics_df(api_midia_df)
#api_users_metrics = create_metrics_df(api_users_df)
#front_metrics = create_metrics_df(front_df)

In [192]:
all_metrics = pd.concat([api_comunidades_metrics], ignore_index=True)

In [193]:
def add_m7_and_m9(metrics_df):

    m7_list = []
    m9_list = []

    for _, release in metrics_df.iterrows():

        curr_version = release['version']

        for _, row in issue_df.iterrows():
            if pd.Timestamp.to_numpy(row['start_sprint']) <= curr_version <= pd.Timestamp.to_numpy(row['end_sprint']):
                m7_list.append(row['m7'])
                m9_list.append(row['m9'])

    m7_list = pd.DataFrame(m7_list, columns=['m7'])
    m9_list = pd.DataFrame(m9_list, columns=['m9'])

    metrics_df = pd.concat([metrics_df, m7_list], axis=1)
    metrics_df = pd.concat([metrics_df, m9_list], axis=1)

    return metrics_df

In [194]:
api_comunidades_metrics = add_m7_and_m9(api_comunidades_metrics)
all_metrics = add_m7_and_m9(all_metrics)

TypeError: '<=' not supported between instances of 'numpy.ndarray' and 'str'

In [None]:
def calculate_ac(df):
    df['asc1'] = (df['m1'] + df['m2'] + df['m3']) / 3
    df['ac1'] = df['asc1']
    df['totalAC1'] = df['asc1']
    return df

In [None]:
def calculate_ac2(issue_df):
    aux_df = pd.DataFrame({
        "asc2": [],
        "totalAC2": []
    })
    aux_df['asc2'] = (issue_df['m7'] + issue_df['m9']) / 2
    aux_df['totalAC2'] = aux_df['asc2']
    return pd.concat([issue_df, aux_df], 1)

issue_df_v2 = calculate_ac2(issue_df)

In [None]:
api_comunidades_metrics = calculate_ac(api_comunidades_metrics)
all_metrics = calculate_ac(all_metrics)

In [None]:
api_comunidades_metrics = calculate_ac2(api_comunidades_metrics)
all_metrics = calculate_ac2(all_metrics)

### Linear Regression

In [None]:
def plot_linear_regression(df):
    sns.regplot(x=df["totalAC1"], y=df["totalAC2"], line_kws={"color":"r","alpha":0.7,"lw":5})
    plt.show()

# Descriptive Statistic Analysis

- Realizes the calculation of statistic data such as mean, median, mode, min, max, standard deviation and variance.

In [None]:
def descriptive_statistics(df):
    
    metrics = df.describe()
    variance = df.var()
    variance_df = pd.DataFrame(variance, columns=["var"])
    variance_df = variance_df.T
    return metrics.append(variance_df).T

### Data visualization

- You must do this for each of your repositories

In [None]:
def plot_history(df, repository):
    fig = plt.figure(figsize=(20, 10))
    plt.title(f"{repository}:COMPLEXITY")
    plt.plot(df['m1'], linewidth=3, marker='o', markersize=10)
    fig = plt.figure(figsize=(20, 10))
    plt.title(f"{repository}:COMMENTS")
    plt.plot(df['m2'], linewidth=3, marker='o', markersize=10)
    fig = plt.figure(figsize=(20, 10))
    plt.title(f"{repository}:DUPLICATIONS")
    plt.plot(df['m3'], linewidth=3, marker='o', markersize=10)

# Histogram Repositories Analysis

In [None]:
def generate_histogram(repository, df):
    fig = plt.figure(figsize=(30, 10))
    plt.title(f"{repository}:COMPLEXITY")
    plt.bar(df.index.values.tolist(), df['m1'], color='g')
    plt.show()
    plt.title(f"{repository}:COMMENTS")
    plt.bar(df.index.values.tolist(), df['m2'], color='b')
    plt.show()
    plt.title(f"{repository}:DUPLICATIONS")
    plt.bar(df.index.values.tolist(), df['m3'], color='red')
    plt.show()
    plt.title(f"{repository}:MAINTAINABILITY")
    plt.bar(df.index.values.tolist(), df['asc1'], color='purple')
    plt.show()
    plt.title(f"{repository}:PRODUCTIVITY")
    plt.bar(df.index.values.tolist(), df['asc2'], color='orange')
    plt.show()

# Box-plot

In [None]:
def generate_boxplot(df, repository):
    fig = plt.figure(figsize=(10, 10))
    plt.title(f"{repository}:COMPLEXITY")
    plt.boxplot(df['m1'])
    plt.show()
    plt.title(f"{repository}:COMMENTS")
    plt.boxplot(df['m2'])
    plt.show()
    plt.title(f"{repository}:DUPLICATION")
    plt.boxplot(df['m3'])
    plt.show()
    plt.title(f"{repository}:MAINTAINABILITY")
    plt.boxplot(df['totalAC1'])
    plt.show()

# Correlation Matrix

In [None]:
def correlationMatrix(df):
    corrMatrix = df.corr()
    return corrMatrix

# Matriz de correlação 

Ao fazermos uma análise de dado é importante criar uma matriz de correlação para que possamos associar os valores das váriaveis entre si. Dessa forma, uma matriz de correlação é o retorno de uma tabela com coeficientes que mostram a correlação de cada variavel.

## Como interpretar o resultado

O coeficiente da correlação vária de 1 e -1, sendo assim, respectivamente, positiva, negativa ou nula, tal que:

* Correlação positiva: Ambas as variáveis mudam na mesma direção. Assim, ambas váriaveis correlacionadas se movem na mesma direção, logo se uma tem seu valor aumentado, a outra também.

* Correlação Nula: Nenhuma relação na mudança das variáveis. Isso ocorre apenas se o coeficiente da correlação for 0.

* Correlação negativa: As variáveis mudam em direções opostas. Dessa forma, se uma váriavel tem seu valor aumentado, a outra diminui seu valor. 


# DataFrame M1, M2, M3, M7, M9, NCLOC, AC1, ASC1, ASC2, TOTALAC1, TOTALAC2


In [None]:
def descriptive_percentage(df):
    df_copy = df.copy()
    df_copy['m1'] = df_copy['m1'] * 100
    df_copy['m2'] = df_copy['m2'] * 100
    df_copy['m3'] = df_copy['m3'] * 100
    df_copy['asc1'] = df_copy['asc1'] * 100
    df_copy['ac1'] = df_copy['ac1'] * 100
    df_copy['totalAC1'] = df_copy['totalAC1'] * 100
    descriptive_percentage_product = df_copy.to_html(formatters={
    'm1': '{:.4}%'.format,
    'm2': '{:.4}%'.format,
    'm3': '{:.4}%'.format,
    'asc1': '{:.4}%'.format,
    'ac1': '{:.4}%'.format,
    'totalAC1': '{:.4}%'.format,
    'asc2': '{:.4}%'.format,
    'totalAC2': '{:.4}%'.format,
    'm7': '{:.4}%'.format,
    'm9': '{:.4}%'.format,
    })
    display(HTML(descriptive_percentage_product))

In [None]:
descriptive_percentage(eccoar_complaint_metrics)

In [None]:
metrics_df.to_excel('data/metrics_df.xlsx', index = False)

# Análises dos Repositórios

## Front-end

In [None]:
generic_df = eccoar_frontend_metrics.copy()
repository = "FRONTEND"

### Gráfico histórico

In [None]:
plot_history(generic_df, repository)

### Percentis

In [None]:
descriptive_percentage(generic_df)

### Regressão Linear

In [None]:
plot_linear_regression(generic_df)

### Histogramas

In [None]:
generate_histogram(repository, generic_df)

### Análise Descritiva

In [None]:
descriptive_statistics(generic_df)

### Matriz de Correlação

In [None]:
correlationMatrix(generic_df)

### Box-plot

In [None]:
generate_boxplot(generic_df, repository)

## Complaint

In [None]:
generic_df = eccoar_complaint_metrics.copy()
repository = "COMPLAINT"

### Gráfico histórico

In [None]:
plot_history(generic_df, repository)

### Percentis

In [None]:
descriptive_percentage(generic_df)

### Regressão Linear

In [None]:
plot_linear_regression(generic_df)

### Histogramas

In [None]:
generate_histogram(repository, generic_df)

### Análise Descritiva

In [None]:
descriptive_statistics(generic_df)

### Matriz de Correlação

In [None]:
correlationMatrix(generic_df)

### Box-plot

In [None]:
generate_boxplot(generic_df, repository)

## Gateway

In [None]:
generic_df = eccoar_gateway_metrics.copy()
repository = "GATEWAY"

### Gráfico histórico

In [None]:
plot_history(generic_df, repository)

### Percentis

In [None]:
descriptive_percentage(generic_df)

### Regressão Linear

In [None]:
plot_linear_regression(generic_df)

### Histogramas

In [None]:
generate_histogram(repository, generic_df)

### Análise Descritiva

In [None]:
descriptive_statistics(generic_df)

### Matriz de Correlação

In [None]:
correlationMatrix(generic_df)

### Box-plot

In [None]:
generate_boxplot(generic_df, repository)

## Users

In [None]:
generic_df = eccoar_users_metrics.copy()
repository = "USERS"

### Gráfico histórico

In [None]:
plot_history(generic_df, repository)

### Percentis

In [None]:
descriptive_percentage(generic_df)

### Regressão Linear

In [None]:
plot_linear_regression(generic_df)

### Histogramas

In [None]:
generate_histogram(repository, generic_df)

### Análise Descritiva

In [None]:
descriptive_statistics(generic_df)

### Matriz de Correlação

In [None]:
correlationMatrix(generic_df)

### Box-plot

In [None]:
generate_boxplot(generic_df, repository)

## Mailer

In [None]:
generic_df = eccoar_mailer_metrics.copy()
repository = "MAILER"

### Gráfico histórico

In [None]:
plot_history(generic_df, repository)

### Percentis

In [None]:
descriptive_percentage(generic_df)

### Regressão Linear

In [None]:
plot_linear_regression(generic_df)

### Histogramas

In [None]:
generate_histogram(repository, generic_df)

### Análise Descritiva

In [None]:
descriptive_statistics(generic_df)

### Matriz de Correlação

In [None]:
correlationMatrix(generic_df)

### Box-plot

In [None]:
generate_boxplot(generic_df, repository)

## Reports

In [None]:
generic_df = eccoar_reports_metrics.copy()
repository = "REPORTS"

### Gráfico histórico

In [None]:
plot_history(generic_df, repository)

### Percentis

In [None]:
descriptive_percentage(generic_df)

### Regressão Linear

In [None]:
plot_linear_regression(generic_df)

### Histogramas

In [None]:
generate_histogram(repository, generic_df)

### Análise Descritiva

In [None]:
descriptive_statistics(generic_df)

### Matriz de Correlação

In [None]:
correlationMatrix(generic_df)

### Box-plot

In [None]:
generate_boxplot(generic_df, repository)

## All metrics

In [None]:
generic_df = all_metrics.copy()
repository = "ALL METRICS"

### Gráfico histórico

In [None]:
plot_history(generic_df, repository)

### Percentis

In [None]:
descriptive_percentage(generic_df)

### Regressão Linear

In [None]:
plot_linear_regression(generic_df)

### Histogramas

In [None]:
generate_histogram(repository, generic_df)

### Análise Descritiva

In [None]:
descriptive_statistics(generic_df)

### Matriz de Correlação

In [None]:
correlationMatrix(generic_df)

### Box-plot

In [None]:
generate_boxplot(generic_df, repository)