In [None]:
import pandas as pd
import json
import requests
import math
import matplotlib.pylab as plt
import statsmodels.api as sm
from scipy.stats import norm, mstats

from itertools import groupby
from datetime import datetime as dt

In [None]:
%pylab inline

In [None]:
METRICS_ALL = 'accessors,new_technical_debt,blocker_violations,conditions_to_cover,new_it_conditions_to_cover,new_conditions_to_cover,bugs,burned_budget,business_value,class_complexity_distribution,classes,code_smells,comment_lines,commented_out_code_lines,comment_lines_density,comment_lines_data,complexity,class_complexity,file_complexity,function_complexity,complexity_in_classes,complexity_in_functions,branch_coverage,new_it_branch_coverage,new_branch_coverage,conditions_by_line,confirmed_issues,coverage,new_it_coverage,coverage_line_hits_data,new_coverage,covered_conditions_by_line,critical_violations,last_commit_date,directories,duplicated_blocks,new_duplicated_blocks,duplicated_files,duplicated_lines,duplicated_lines_density,new_duplicated_lines,new_duplicated_lines_density,duplications_data,effort_to_reach_maintainability_rating_a,executable_lines_data,false_positive_issues,file_complexity_distribution,files,function_complexity_distribution,functions,generated_lines,generated_ncloc,info_violations,violations,it_conditions_to_cover,it_branch_coverage,it_conditions_by_line,it_coverage,it_coverage_line_hits_data,it_covered_conditions_by_line,it_line_coverage,it_lines_to_cover,it_uncovered_conditions,it_uncovered_lines,line_coverage,new_it_line_coverage,new_line_coverage,lines,ncloc,ncloc_language_distribution,new_lines,lines_to_cover,new_it_lines_to_cover,new_lines_to_cover,sqale_rating,new_maintainability_rating,major_violations,minor_violations,ncloc_data,new_blocker_violations,new_bugs,new_code_smells,new_critical_violations,new_info_violations,new_violations,new_major_violations,new_minor_violations,new_vulnerabilities,open_issues,overall_conditions_to_cover,new_overall_conditions_to_cover,overall_branch_coverage,new_overall_branch_coverage,overall_conditions_by_line,overall_coverage,overall_coverage_line_hits_data,new_overall_coverage,overall_covered_conditions_by_line,overall_line_coverage,new_overall_line_coverage,overall_lines_to_cover,new_overall_lines_to_cover,overall_uncovered_conditions,new_overall_uncovered_conditions,overall_uncovered_lines,new_overall_uncovered_lines,quality_profiles,projects,public_api,public_documented_api_density,public_undocumented_api,quality_gate_details,alert_status,reliability_rating,new_reliability_rating,reliability_remediation_effort,new_reliability_remediation_effort,reopened_issues,security_rating,new_security_rating,security_remediation_effort,new_security_remediation_effort,skipped_tests,development_cost,statements,team_size,sqale_index,sqale_debt_ratio,new_sqale_debt_ratio,uncovered_conditions,new_it_uncovered_conditions,new_uncovered_conditions,uncovered_lines,new_it_uncovered_lines,new_uncovered_lines,test_data,test_execution_time,test_errors,test_failures,test_success_density,tests,vulnerabilities,wont_fix_issues'
METRICS_NOTNULL = 'blocker_violations,bugs,classes,code_smells,comment_lines,comment_lines_density,complexity,class_complexity,file_complexity,function_complexity,complexity_in_classes,complexity_in_functions,confirmed_issues,critical_violations,last_commit_date,directories,duplicated_blocks,duplicated_files,duplicated_lines,duplicated_lines_density,effort_to_reach_maintainability_rating_a,false_positive_issues,file_complexity_distribution,files,function_complexity_distribution,functions,info_violations,violations,lines,ncloc,ncloc_language_distribution,sqale_rating,major_violations,minor_violations,open_issues,quality_profiles,quality_gate_details,alert_status,reliability_rating,reliability_remediation_effort,reopened_issues,security_rating,security_remediation_effort,development_cost,statements,sqale_index,sqale_debt_ratio,vulnerabilities,wont_fix_issues'
METRICS_NOTNULL_NOTSTRING = 'blocker_violations,bugs,classes,code_smells,comment_lines,comment_lines_density,complexity,class_complexity,file_complexity,function_complexity,complexity_in_classes,complexity_in_functions,confirmed_issues,critical_violations,last_commit_date,directories,duplicated_blocks,duplicated_files,duplicated_lines,duplicated_lines_density,effort_to_reach_maintainability_rating_a,false_positive_issues,files,functions,info_violations,violations,lines,ncloc,major_violations,minor_violations,open_issues,reliability_remediation_effort,reopened_issues,security_remediation_effort,development_cost,statements,sqale_index,sqale_debt_ratio,vulnerabilities,wont_fix_issues'
METRICS_NOTNULL_NOTSTRING_NOTLASTITEMVALUE = 'blocker_violations,bugs,classes,code_smells,comment_lines,comment_lines_density,complexity,class_complexity,file_complexity,function_complexity,confirmed_issues,critical_violations,last_commit_date,directories,duplicated_blocks,duplicated_files,duplicated_lines,duplicated_lines_density,effort_to_reach_maintainability_rating_a,false_positive_issues,files,functions,info_violations,violations,lines,ncloc,major_violations,minor_violations,open_issues,reliability_remediation_effort,reopened_issues,security_remediation_effort,development_cost,statements,sqale_index,sqale_debt_ratio,vulnerabilities,wont_fix_issues'
METRICS_MTD = 'code_smells,bugs,classes,lines,ncloc,sqale_index,violations,blocker_violations,critical_violations,major_violations,minor_violations'
METRICS_ECSA = 'complexity,class_complexity,file_complexity,function_complexity,comment_lines,comment_lines_density,duplicated_blocks,duplicated_files,duplicated_lines,duplicated_lines_density,violations,blocker_violations,critical_violations,major_violations,minor_violations,info_violations,open_issues,code_smells,sqale_index,sqale_debt_ratio,bugs,reliability_remediation_effort,vulnerabilities,security_remediation_effort,classes,directories,files,lines,ncloc,functions,statements'
METRICS = METRICS_ECSA
METRICS_LIST = METRICS.split(',')
metrics_list_length = len(METRICS_LIST)

#esolved=false
resolutions = 'FALSE-POSITIVE,WONTFIX,FIXED,REMOVED'.split(',')
severities = 'INFO,MINOR,MAJOR,CRITICAL,BLOCKER'.split(',')
types = 'CODE_SMELL,BUG,VULNERABILITY'.split(',')

# General functions

In [None]:
def get_rest_response(rest_url):
    response = requests.get(rest_url)
    return json.loads(response.text)

def jsonDate_to_date(date):
    return datetime.datetime.fromtimestamp(date / 1e3)

def get_date(date):
    return pd.Timestamp(date).date()

def plot(plt):
    plt.plot(figsize=(15,10))

In [None]:
def mk_test(x, alpha = 0.05):  
    """
    
    http://michaelpaulschramm.com/simple-time-series-trend-analysis/
    
    Input:
        x:   a vector of data
        alpha: significance level (0.05 default)

    Output:
        trend: tells the trend (increasing, decreasing or no trend)
        h: True (if trend is present) or False (if trend is absence)
        p: p value of the significance test
        z: normalized test statistics 

    Examples
    --------
      >>> x = np.random.rand(100)
      >>> trend,h,p,z = mk_test(x,0.05) 
    """
    n = len(x)

    # calculate S 
    s = 0
    for k in range(n-1):
        for j in range(k+1,n):
            s += np.sign(x[j] - x[k])

    # calculate the unique data
    unique_x = np.unique(x)
    g = len(unique_x)

    # calculate the var(s)
    if n == g: # there is no tie
        var_s = (n*(n-1)*(2*n+5))/18
    else: # there are some ties in data
        tp = np.zeros(unique_x.shape)
        for i in range(len(unique_x)):
            tp[i] = sum(unique_x[i] == x)
        var_s = (n*(n-1)*(2*n+5) + np.sum(tp*(tp-1)*(2*tp+5)))/18

    if s>0:
        z = (s - 1)/np.sqrt(var_s)
    elif s == 0:
            z = 0
    elif s<0:
        z = (s + 1)/np.sqrt(var_s)

    # calculate the p_value
    p = 2*(1-norm.cdf(abs(z))) # two tail test
    h = abs(z) > norm.ppf(1-alpha/2) 

    if (z<0) and h:
        trend = 'decreasing'
    elif (z>0) and h:
        trend = 'increasing'
    else:
        trend = 'no trend'

    return trend, h, p, z

# Functions for getting SonarQube timemachine metrics

In [28]:
from prep import *

In [None]:
def get_sonarqube_timemachine_metrics_DataFrame(ghp):
    json_data = get_sonarqube_timemachine_metrics(ghp)
    cells = get_cells(json_data)
    return transform_timemachine_metrics_to_series(cells)

def get_sonarqube_timemachine_metrics(ghp):
    return get_rest_response(ghp.timemachine_metrics_url + METRICS)

def get_cells(json_data):
    return json_data[0]['cells']  # cells - metric values

def get_colls(json_data):
    return json_data[0]['cols']  # cols - Metrcs

def get_no_of_versions(cells):
    return len(cells)

def transform_timemachine_metrics_to_series(cells):
    date_range_index = get_date_range_index_for_timemachine_metrics(cells)
    df = pd.DataFrame(index=date_range_index)
    for metric_index in range(0, metrics_list_length):
        s = pd.Series(index=date_range_index)
        for item in cells:
            s[pd.Timestamp(item.get('d')).date()] = item.get('v')[metric_index]
        df[METRICS_LIST[metric_index]] = s
    return df


def get_date_range_index_for_timemachine_metrics(cells):
    project_dates = get_project_dates(cells)
    min_project_date = min(project_dates)
    max_project_date = max(project_dates)
    return pd.date_range(start=min_project_date.date(), end=max_project_date.date(), freq='D')

def get_project_dates(cells):
    dates = []
    for item in cells:
        dates.append(pd.Timestamp(item.get('d')))
    return dates

# Perform the analysis

In [None]:
# repos = get_repos()
# for ghp in repos:
#     print(ghp.repo)
#     timemachine_metrics = get_sonarqube_timemachine_metrics_DataFrame(ghp)
#     timemachine_metrics = timemachine_metrics.fillna(method='ffill')
#     timemachine_metrics['normalized_td'] = timemachine_metrics['sqale_index']/timemachine_metrics['ncloc']
#     issues = timemachine_metrics[['violations','blocker_violations','critical_violations','major_violations','minor_violations','info_violations','open_issues']]
#     smells_bugs_classes = timemachine_metrics[['code_smells','bugs','classes']]
#     lines_sqale = timemachine_metrics[['lines','ncloc','sqale_index']]
    
#     complexity = timemachine_metrics[['complexity','class_complexity','file_complexity','function_complexity']]
#     documentation = timemachine_metrics[['comment_lines','comment_lines_density']]
#     duplications = timemachine_metrics[['duplicated_blocks','duplicated_files','duplicated_lines','duplicated_lines_density']]
#     maintainability = timemachine_metrics[['code_smells','sqale_index','sqale_debt_ratio']]
#     reliability = timemachine_metrics[['bugs','reliability_remediation_effort']]
#     security = timemachine_metrics[['vulnerabilities','security_remediation_effort']]
#     size = timemachine_metrics[['classes','directories','files','lines','ncloc','functions','statements']]
    
#     plot(complexity)
#     plot(documentation)
#     plot(duplications)
#     plot(issues)
#     plot(maintainability)
#     plot(reliability)
#     plot(security)
#     plot(size)
    
#     plot(smells_bugs_classes)
#     plot(lines_sqale)
#     plt.figure()
#     plot(timemachine_metrics['normalized_td'])
    
#     timemachine_metrics = timemachine_metrics[timemachine_metrics.index.weekday==6]
#     res = sm.tsa.seasonal_decompose(timemachine_metrics['normalized_td'])
#     resplot = res.plot()
#     resplot.show()

In [None]:
def get_no_loops(response):
    total = response['total']
    ps = response['ps']
    no_of_loops = math.ceil(response['total'] / response['ps']) + 1
    return total, ps, no_of_loops

def ddd(ghp):
    response = get_rest_response(ghp.issues_url)
    total, ps, no_of_loops = get_no_loops(response)
    
    for p in range(2, no_of_loops):
        print (get_rest_response(ghp.issues_url + '&p=' + str(p))['issues'])
        print(ghp.issues_url + '&p=' + str(p))

In [30]:
severities = 'INFO,MINOR,MAJOR,CRITICAL,BLOCKER'.split(',')
types = 'CODE_SMELL,BUG,VULNERABILITY'.split(',')

repos = get_repos1()
for ghp in repos:
    print(ghp.repo)
    project_rules = {}
    for severity in severities:
        for type in types:
            issues_url = build_issues_url(ghp.issues_url, type, severity)
            response = get_rest_response(issues_url)
            total, ps, no_of_loops = get_no_loops(response)
            issues = response['issues']
            for issue in issues:
                project_rules[issue['rule']] = project_rules.get(issue['rule'], 0) + 1
            for p in range(2, no_of_loops):
                response = get_rest_response(issues_url + '&p=' + str(p))
                issues = response['issues']
                for issue in issues:
                    project_rules[issue['rule']] = project_rules.get(issue['rule'], 0) + 1
                    
    print(project_rules)

sling
{'squid:S1135': 563, 'squid:S1133': 161, 'squid:RedundantThrowsDeclarationCheck': 565, 'squid:S2293': 2947, 'squid:S00119': 153, 'squid:S1481': 71, 'squid:S1596': 40, 'squid:S1199': 191, 'squid:ModifiersOrderCheck': 445, 'squid:S00100': 351, 'squid:UselessParenthesesCheck': 299, 'squid:S1170': 53, 'squid:UselessImportCheck': 168, 'squid:S1213': 626, 'squid:S1197': 63, 'squid:S2786': 13, 'squid:S1301': 7, 'squid:S2147': 152, 'squid:S00117': 61, 'squid:S00116': 130, 'squid:S1450': 113, 'squid:S3008': 130, 'squid:S3398': 39, 'squid:S1905': 16, 'squid:S1488': 124, 'squid:S1165': 7, 'squid:S1940': 4, 'squid:S2094': 12, 'squid:S1185': 37, 'squid:S1659': 45, 'squid:S1612': 7, 'squid:S1220': 2, 'squid:S1153': 46, 'squid:S00101': 3, 'squid:S1126': 6, 'squid:S1125': 12, 'squid:S1610': 16, 'squid:S1611': 1, 'squid:S3400': 14, 'squid:S00120': 15, 'squid:S1319': 12, 'squid:S1264': 1, 'squid:S2065': 3, 'squid:CallToDeprecatedMethod': 2, 'squid:S2912': 1, 'squid:S2184': 19, 'squid:EmptyStatemen

In [None]:
# def build_issues_url(issues_url, parameter_key, parameter_value):
#     return issues_url + '&' + parameter_key + '=' + parameter_value

# def get_issues_url_by_type(issues_url, type):
#     return build_issues_url(issues_url, 'types', type)
    
# def get_issues_url_by_severity(issues_url, severity):
#     return build_issues_url(issues_url, 'severities', severity)
    
# def get_issues_url_by_resolution(issues_url, resolution):
#     return build_issues_url(issues_url, 'resolutions', resolution)

In [None]:
def build_issues_url(issues_url, type, severity):
    return issues_url + '&types=' + type + '&severities=' + severity