In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import math

#### Load the result data

#### Settings ##############

# Path to the file containing the analysis results.
resultFile = "./result.json"

############################

# load json file
jsonResult = pd.read_json(resultFile)

# transpose result
jsonResult = jsonResult.transpose()

# function to compute the pearson correlation between two lists of samples
def pearson(x, y):
    if len(x) <= 1:
        return 0
    
    n = x.shape[0]
    x_sq = x.apply(lambda e: e*e)
    y_sq = y.apply(lambda e: e*e)
    xy = x.multiply(y)
    x_sum = sum(x)
    y_sum = sum(y)
    x_sqSum = sum(x_sq)
    y_sqSum = sum(y_sq)
    xy_sum = sum(xy)
    
    t = n * xy_sum - x_sum * y_sum
    b = math.sqrt((n * x_sqSum - x_sum * x_sum)*(n * y_sqSum - y_sum * y_sum))
    
    return t/b

In [None]:
#### Print the available metrics and corresponding measures

# loop over the metrics
for m in jsonResult:
    # print the metric name
    print(m + ":")
    
    # loop over the measures
    for c in jsonResult[m][0]:
        # print the measure name
        print("  " + c)

In [None]:
#### Plot the metrics

#### Settings ##############

# List of metrics to be plotted.
# The metrics are plotted over one another from left to right.
# For each metric a tupel with the metric name and
# a list of measures needs to be specified.
# The available metrics can be viewed using the previous cell.
# An empty list indicates all measures are to be used for a metric.
# If no tupels are provieded all metrics are used.
# Example: [("Git", ["CommitCount"]), ("Sct", [])]
metrics = [("Git", ["CommitCount"]), ("Sct", [])]

# Show the names of the corresponding gits on plotted points.
showNames = False

# Remove the listed repositories from the dataset.
filteredSamples = []

############################

# configure data
data = jsonResult.drop(filteredSamples)

# get labels
labels = data.index

# extract metrics
ms = []

# test if metrics is empty
if len(metrics) == 0:
    # extract all metrics
    for m in data:
        ms.append(pd.DataFrame(list(data[m]), index = data.index))
else:
    # extract specified metrics
    for m in metrics:
        if len(m[1]) == 0:
            # extract all measures
            ms.append(pd.DataFrame(list(data[m[0]]), index = data.index))
        else:
            # only extract specified measures
            ms.append(pd.DataFrame(list(data[m[0]]), index = data.index)[m[1]])

# plot metrics
for m in range(len(ms)):
    for m2 in range(m+1, len(ms)):
        for c in ms[m]:
            for c2 in ms[m2]:
                # create scatter plot
                plt.scatter(ms[m2][c2], ms[m][c])
                
                # calculate pearson correlation
                pearCo = pearson(ms[m2][c2], ms[m][c])
                
                # show names
                if showNames:
                    i = 0
                    for x,y in zip(m2[c2], m[c]):
                        plt.annotate(labels[i], (x,y), textcoords="offset points", xytext=(0,10), ha="center")
                        i += 1
                        
                # display plot
                plt.title(c + " over " + c2 + " " + str(pearCo))
                plt.show()

In [None]:
#### Display a metric

#### Settings ##############

# Name of the metric to be displayed.
metric = "Git"

# Remove the listed repositories from the dataset.
filteredSamples = []

############################

# configure data
data = jsonResult.drop(filteredSamples)

# get metric
m = pd.DataFrame(list(data[metric]), index=data.index)

# plot every measure
for c in m:
    values = m[c].sort_values()
    plt.barh(values.index, values)
    plt.title(c)
    plt.show()