# Loading metrics

In [165]:
import pandas as pd

In [166]:
allMetricsAndQuestions = pd.read_csv("..\\..\\3-DataMerge\\metricsAndQuestions.csv", index_col=[0,1])

## Mann Kendall Test

Null Hypothesis: There is no monotonic trend.

Alternative Hypothesis: There is a trend.

If p-value < 0.05 then null Hypothesis is rejected.

In [167]:
pip install pymannkendall

Note: you may need to restart the kernel to use updated packages.


In [168]:
import pymannkendall as mk

### MannKendallTest for Java files

In [169]:
result = mk.original_test(allMetricsAndQuestions["numberJavaFiles"])
print(f"Trend exists? {result.h}. Trend is {result.trend}. P-value: {format(result.p, '.20f')}")

Trend exists? True. Trend is increasing. P-value: 0.00000000000110400578


### MannKendallTest for LOC per file

In [170]:
result = mk.original_test((allMetricsAndQuestions["CountLineCode"]/allMetricsAndQuestions["numberJavaFiles"]))
print(f"Trend exists? {result.h}. Trend is {result.trend}. P-value: {format(result.p, '.20f')}")

Trend exists? True. Trend is increasing. P-value: 0.00000034793628089780


### MannKendallTest for cyclomatic complexity per method

In [171]:
result = mk.original_test((allMetricsAndQuestions["SumCyclomaticStrict"]/allMetricsAndQuestions["CountDeclMethod"]))
print(f"Trend exists? {result.h}. Trend is {result.trend}. P-value: {format(result.p, '.20f')}")

Trend exists? True. Trend is increasing. P-value: 0.00000290807819514072


### MannKendallTest for readability per file

In [172]:
result = mk.original_test(allMetricsAndQuestions["readability"])
print(f"Trend exists? {result.h}. Trend is {result.trend}. P-value: {format(result.p, '.20f')}")

Trend exists? True. Trend is decreasing. P-value: 0.00000048361142401809


In [173]:
def doMannKendallTest(data):
    try:
        result = mk.original_test(data)
    except:
        return None
    return result

In [179]:
def collectForAllMetric(data):
    results = pd.DataFrame(index=data.columns, columns=["trend", "p"])
    for metric in data.columns:
        result = doMannKendallTest(data[metric])
        results.loc()[metric, "trend"] = result.trend
        results.loc()[metric, "p"] = format(result.p, '.20f')            
           
    return results

In [180]:
results = collectForAllMetric(allMetricsAndQuestions)

In [182]:
results

Unnamed: 0,trend,p
AvgCyclomatic,increasing,0.00000000001656230708
AvgCyclomaticModified,increasing,0.00000000001127031801
AvgCyclomaticStrict,increasing,0.00000000001822675344
AvgEssential,increasing,0.00000000000178013160
AvgLine,increasing,0.00000000001434097285
...,...,...
SumCyclomaticStrict,increasing,0.00000000001434097285
SumEssential,increasing,0.00000000000024980018
numberJavaFiles,increasing,0.00000000000110400578
readability,decreasing,0.00000048361142401809


In [183]:
results.to_csv("trendTestResult.csv", float_format="%.15f")