In [None]:
def printStatus(index, size):
    print("{0}% Completed samples".format((index / size) * 100))

In [None]:
from git import Repo

In [None]:
def getCommitsFrom(project):
    projectPath = "repositories\\"+project
    repository = Repo(projectPath)
    repository.git.checkout("master", "-f")
    iterCommits = repository.iter_commits()
    commits = []
    for c in iterCommits:
        commits.append(c)
    return commits

In [None]:
def checkoutTo(project, sha):
    projectPath = "repositories\\" + project
    repository = Repo(projectPath)
    repository.git.checkout(sha, "-f")

In [None]:
import os

In [None]:
def extractMetricsWithUnderstand(owner, project):
    understandPath = "understand\\SciTools\\bin\\pc-win64\\und"
    os.system('cmd /c "{0} create -languages java {1}"'.format(understandPath, project))
    os.system('cmd /c "{0} add {1} {2}"'.format(understandPath, "repositories\\"+owner+"\\"+project, project))
    os.system('cmd /c "{0} settings -metrics all {1}"'.format(understandPath, project))
    os.system('cmd /c "{0} settings -metricsOutputFile {1}.csv {1}"'.format(understandPath, project))
    os.system('cmd /c "{0} -quiet analyze {1}"'.format(understandPath, project))
    os.system('cmd /c "{0} metrics {1}"'.format(understandPath, project))

In [None]:
import pandas as pd

In [None]:
def sumMetricsPerSample(owner, project):
    print(project)
    dataframe = pd.read_csv(project+".csv")
    numberJavaFiles = 0 if dataframe.empty else dataframe["Kind"].value_counts()["File"]
    dataframe = dataframe[dataframe["Kind"] == "Public Class"]
    dataframe["sample"] = owner+"\\"+project
    dataframe = dataframe.groupby("sample").sum()
    dataframe["numberJavaFiles"] = numberJavaFiles
    return dataframe

In [None]:
import fnmatch

In [None]:
def getUnderstandMetrics(owner, project):
    extractMetricsWithUnderstand(owner, project)
    metrics = sumMetricsPerSample(owner, project)
    return metrics

In [None]:
def findPaths(pattern, path):
    result = []
    for root, dirs, files in os.walk(path):
        if '.git' in root:
            continue
        for name in files:
            if fnmatch.fnmatch(name, pattern):
                result.append(os.path.join(root, name))
    return result

In [None]:
pip install JPype1

In [None]:
# Boiler plate stuff to start the module
import jpype
from jpype import *
from statistics import mean

class Readability(object):
    def __init__(self, project):
        super(Readability, self).__init__()
        self.project = project
        self.startJVM()
        self.readabilityPackage = JPackage("raykernel").apps.readability.eval.Main
        self.repositoryPath = "repositories\\"+project

    def startJVM(self):
        if not jpype.isJVMStarted():
            jpype.startJVM(jpype.getDefaultJVMPath(), '-Djava.class.path=readability.jar',  '-ea', '-Xmx4096M', convertStrings=True)
#             jpype.startJVM(classpath="readability.jar", convertStrings=True)


    def shutdownJVM(self):
        jpype.shutdownJVM()

    def getReadability(self):
        javaFiles = findPaths("*.java", self.repositoryPath)
        if len(javaFiles) == 0: return 0
        readability = 0
        array = []
        for javaFile in javaFiles:
            try:
                file = open(javaFile).read()
            except:
                file = ""
            array.append(float(self.readabilityPackage.getReadability(file)))
        return mean(array)

In [None]:
def getReadability(owner, project):
    r = Readability(owner+"\\"+project)
    readability = r.getReadability()
    del r
    return readability

In [None]:
def getMetrics(commit, owner, project):
    metrics = getUnderstandMetrics(owner, project)
    metrics["commitSha"] = commit.hexsha
    metrics["commitDate"] = commit.authored_datetime
    metrics["readability"] = getReadability(owner, project)
    return metrics

In [None]:
def deleteUnusedFiles(sample):
    os.remove(sample + ".csv")
    os.remove(sample + ".udb")

In [None]:
def createDirectoryIfNotExists(dirName):
    if not os.path.exists(dirName):
            os.makedirs(dirName)

In [None]:
def replaceSamplePathForWindowsLike(sample):
    return sample.replace("/", "\\")

In [None]:
def extractMetricsByCommit(sample, commit):
    owner, project = sample.split("\\")
    checkoutTo(sample, commit.hexsha)
    print("commit ======= " + commit.hexsha)
    metrics = getMetrics(commit, owner, project)
    return metrics

In [None]:
def extractMetricsForAllCommits(commits, sample):
    allCommits = pd.DataFrame()
    for index, commit in enumerate(commits):
        metrics = extractMetricsByCommit(sample, commit)
        allCommits = allCommits.append(metrics, ignore_index=True)
        print("{0}% of commits completed from sample {1}".format((index/len(commits) * 100), sample))
        allCommits.to_csv("metrics\\"+sample+".csv", index=False)
    return allCommits

In [None]:
def metricsByCommit(framework, samples):
    
    for index, sample in enumerate(samples):
        sample = replaceSamplePathForWindowsLike(sample)
        
        printStatus(index+1, len(samples))
        
        createDirectoryIfNotExists("metrics")
        
        commits = getCommitsFrom(sample)
        commits.reverse()
        
        owner, project = sample.split("\\")
        
        createDirectoryIfNotExists("metrics\\"+owner)

        ######### é so rodar, esta com a hash certa para o proximo
#         for index, commit in enumerate(commits):
#             if commit.hexsha == "b6e4c433c0298ae765ef79143f6e67ebbf9a67d0":
#                 position = index
#         commits = commits[position+1:]
        
        allCommits = extractMetricsForAllCommits(commits, sample)
        
        deleteUnusedFiles(project)
        
        allCommits.to_csv("metrics\\"+sample+".csv", index=False)

In [None]:
androidSamples = pd.read_csv("..\\1-GettingQuestions\\samplesWithQuestions\\androidSamples.csv")
awsSamples = pd.read_csv("..\\1-GettingQuestions\\samplesWithQuestions\\awsSamples.csv")
azureSamples = pd.read_csv("..\\1-GettingQuestions\\samplesWithQuestions\\azureSamples.csv")
springSamples = pd.read_csv("..\\1-GettingQuestions\\samplesWithQuestions\\springSamples.csv")

In [None]:
len(awsSamples["path"])

In [None]:
len(azureSamples["path"])

In [None]:
len(springSamples["path"])

In [None]:
len(androidSamples["path"])

In [None]:
metricsByCommit("Android", androidSamples["path"])

In [None]:
metricsByCommit("AWS", awsSamples["path"])

In [None]:
metricsByCommit("Spring", springSamples["path"])

In [None]:
metricsByCommit("Azure", azureSamples["path"])