In [None]:
import os
import subprocess
import re
import pandas as pd
import shutil

encoding="cp932"
RootFolder="C:\\Users\\syuuj"

def searchFile(ROOT_PATH,saveFile):
    if not os.path.isdir(os.path.dirname(saveFile)):
        os.makedirs(os.path.dirname(saveFile))
    allFile={}
    count=0
    for pathname,dirnames,filenames in os.walk(ROOT_PATH):
        for filename in filenames:
            if filename.endswith(".java"):
                fullPath=os.path.join(pathname,filename)
                count=count+1
                if "package-info" not in fullPath:
                    parentFolder=re.sub(r"\\src\\.*","",fullPath)
                    if parentFolder in allFile:
                        allFile[parentFolder].append(fullPath)
                    else:
                        allFile.setdefault(parentFolder,[fullPath])
                        
    with open(saveFile,"w",encoding="utf-8") as f:              
        for key,item in allFile.items():
            for pathfile in item:
                print(pathfile,file=f)
            print(file=f)
    
def editClass(Spoonfile, CKfile, outPutFile):   
    df = pd.read_csv(CKfile,encoding=encoding)
    df = df[["file", "class","type", "dit", "fanin", "fanout","lcc","lcom*","loc","noc","rfc","innerClassesQty","totalFieldsQty","totalMethodsQty","wmc"]]
    CK=df.copy()
    CK["class"] = CK["class"].str.replace("$Anonymous", "$")
    spoon = pd.read_csv(Spoonfile,encoding=encoding)
    join = spoon.merge(CK, how="inner", on=["file", "class"])
    join["class"]=join["class"].str.replace("$",".",)
    join.to_csv(
        outPutFile,mode="w",index=False,encoding=encoding
    )

def editMethod(SpoonFile, CKFile, outPutFile):
    df = pd.read_csv(CKFile,encoding=encoding)
    df = df[["file", "class", "method","fanin","fanout", "wmc", "loc","returnsQty","variablesQty","parametersQty","loopQty","anonymousClassesQty","innerClassesQty","line"]]
    CK=df.copy()
    CK["class"] = CK["class"].str.replace("$Anonymous", "$")    
    CK["method"]=CK["method"].str.replace(r"<[^>]*>", "", regex=True)

    spoon = pd.read_csv(SpoonFile,encoding=encoding)
    spoon["method"]=spoon["method"].str.replace("$",".")
    spoon["method"]=spoon["method"].str.replace(" ","")

    join = spoon.merge(CK, how="inner", on=["file", "class", "method"])
    join["class"]=join["class"].str.replace("$",".")
    join.to_csv(
        outPutFile,mode="w",index=False,encoding=encoding
    )
  
def editFile(saveMetricsFile):
    CKClass=saveMetricsFile+"class.csv"
    CKMethod=saveMetricsFile+"method.csv"
    SpoonClass=saveMetricsFile+"spoonClass.csv"
    SpoonMethod=saveMetricsFile+"spoonMethod.csv"
    saveClass=saveMetricsFile+"joinClass.csv"
    saveMethod=saveMetricsFile+"joinMethod.csv"
    
    editClass(SpoonClass,CKClass,saveClass)
    editMethod(SpoonMethod,CKMethod,saveMethod)

def spoonAnalyze(textFile,saveFolder):
    SpoonAnalyze=["java","-jar","demo-1.0-snapshot.jar",textFile,saveFolder]
    subprocess.run(SpoonAnalyze,cwd=os.path.join(os.getcwd(),"spoon\\target"),check=True)

def ckAnalyze(gitProject,saveFolder):
    CKAnalyze=["java","-jar","ck-0.7.1-SNAPSHOT-jar-with-dependencies.jar",gitProject,"False","0","True",saveFolder]
    subprocess.run(CKAnalyze,cwd=os.path.join(RootFolder,"ck_anonymous\\ck\\target"),check=True)
    
def gitCheckout(gitProject,version):
    checkout=["git","checkout",version]
    subprocess.run(checkout,cwd=gitProject,check=True)
    
def getCodeSmellClass():
    dfGod=pd.read_csv("ground truth\\GodClass.csv")
    dfRefused=pd.read_csv("ground truth\\RefusedBequest.csv")
    dfGod=dfGod[["nomeSistema","class"]]
    dfRefused=dfRefused[["nomeSistema","class"]]
    allClass=pd.concat([dfGod,dfRefused])
    allClass=allClass.drop_duplicates()
    return allClass

def supportSmellNum(smellFileName,saveFile):
    truthDF=getCodeSmellClass()
    group=truthDF.groupby("nomeSistema")
    classes=group.get_group(smellFileName)
    print(smellFileName)
    df=pd.read_csv(saveFile+"joinClass.csv",encoding=encoding)
    print(len(set(df['class']) & set(classes['class'])),"/",len(classes))
    data=(set(df['class']) & set(classes['class']))
    print(classes[~classes['class'].isin(df['class'])])
    

In [None]:
with open("version.txt","r") as f:
    versionString=f.read()
    
projects=versionString.split("\n")
for project in projects:
    tmpFactor=project.split(",")
    projectName=tmpFactor[0]
    versions=[tmpFactor[1],tmpFactor[2]]
    smellName=tmpFactor[3]
    
    gitProject=os.path.join(RootFolder,"gitProject",projectName)
    saveFolder=os.path.join(os.getcwd(),"data1",projectName)
    textFile=os.path.join(os.getcwd(),"data1",projectName,"full.txt")
    if os.path.isdir(saveFolder):
        shutil.rmtree(saveFolder)
    os.makedirs(saveFolder)
    
    i=1
    for version in versions:
        saveFile=os.path.join(saveFolder,str(i))
        gitCheckout(gitProject,version)
        searchFile(gitProject,textFile)
        spoonAnalyze(textFile,saveFile)
        ckAnalyze(gitProject,saveFile)
        editFile(saveFile)
        if i==1:
            supportSmellNum(smellName,saveFile)
        i=i+1

        
    
    



In [None]:
with open("firstStep.txt","r") as f:
    stringProject=f.read()
folders=stringProject.split("\n")
for folder in folders:
    tmp=folder.split(",")
    folderName=tmp[0]
    df1=pd.read_csv("data1\\"+folderName+"\\1joinClass.csv")
    df2=pd.read_csv("data1\\"+folderName+"\\1joinMethod.csv")
    print(folderName)
    #print("class : ",len(df1))
    print("method : ",len(df2))
    print()

In [None]:
with open("firstStep.txt","r")as f:
    modifyString=f.read()
    
modifyFolders=modifyString.split("\n")
for modifyFolder in modifyFolders:
    tmpModify=modifyFolder.split(",")
    folder=tmpModify[0]
    i=1
    while i<=2:
        spoonFile="data1\\"+folder+"\\"+str(i)+"spoonMethod.csv"
        ckFile="data1\\"+folder+"\\"+str(i)+"method.csv"
        outPutFile="data1\\"+folder+"\\"+str(i)+"joinMethod.csv"
        editMethod(spoonFile,ckFile,outPutFile)
        i=i+1

In [4]:
import pandas as pd
pd.set_option("display.max_columns",None)
pd.set_option("display.max_rows",None)
pd.set_option("display.max_colwidth",None)

df1=pd.read_csv("data1\\mockserver\\1spoonMethod.csv")
df2=pd.read_csv("data1\\mockserver\\1method.csv")
df1["flag"]=1
df2["flag"]=2
df1=df1[["file","class","method","line","flag"]]
df2=df2[["file","class","method","line","flag"]]
df2["class"]=df2["class"].str.replace("$Anonymous","$")
df1["method"]=df1["method"].str.replace("$",".")
df1["method"]=df1["method"].str.replace(" ","")
df2["method"]=df2["method"].str.replace(r"<[^>]*>", "", regex=True)
join=pd.concat([df1,df2])
drop=join.drop_duplicates(keep=False,subset=["file","class","method"])
drop2=drop.drop_duplicates(keep=False,subset=["file","class","line"])
drop3=drop2
drop3.loc[drop3["flag"] == 1, "line"] += 1
drop3=drop3.drop_duplicates(keep=False,subset=["file","class","line"])
print("ck method数 : ",len(df2))
print("結合完了数 : ",int((len(join)-len(drop3))/2))

ck method数 :  7226
結合完了数 :  7214


In [None]:
pd.set_option("display.max_columns",None)
pd.set_option("display.max_colwidth",None)
pd.set_option("display.max_rows",None)
df1=pd.read_csv("data1\\guava\\1spoonClass.csv")
df2=pd.read_csv("data1\\guava\\1class.csv")
df1["flag"]=1
df2["flag"]=2
df1=df1[["file","class","flag"]]
df2=df2[["file","class","flag"]]
df2["class"]=df2["class"].str.replace("$Anonymous","$")
join=pd.concat([df1,df2])
drop=join.drop_duplicates(keep=False,subset=["file","class"])
print(len(df1),len(df2),len(join),len(drop))
print(len(drop[drop["flag"]==1]),len(drop[drop["flag"]==2]))
drop