In [1]:
import json
import pandas as pd
import numpy as np
import networkx as nx
import os
import shutil
import subprocess
from scipy.cluster.hierarchy import dendrogram, linkage
from matplotlib import pyplot as plt
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score
from sklearn import preprocessing
from sklearn.cluster import AgglomerativeClustering
from zipfile import ZipFile
from filecmp import dircmp
import h2o

In [80]:
is_ck_from_server = False

In [81]:
github_link = 'https://github.com'

In [82]:
project_name = 'Redisson'
version_name = 'redisson-3.16.0'

n_cluster_divisible_arr = [5,7,10,15,20,25]
affinity_arr = ['euclidean', 'manhattan', 'cosine']
linkage_arr = ['complete', 'average', 'single']

n_cluster = 300
affinity = 'cosine'
linkage = 'single'

In [83]:
depends_dir = f'C:/Users/tanji/Desktop/SoftwareRemodularization/raw_depends/{project_name}/{project_name}_{version_name}.json'
rootdir = f'C:/Users/tanji/Desktop/SoftwareRemodularization/raw_sourcecode/{project_name}/{project_name}_{version_name}'

In [84]:
print(rootdir)

C:/Users/tanji/Desktop/SoftwareRemodularization/raw_sourcecode/Redisson/Redisson_redisson-3.16.0


In [85]:
full_dir_arr = []
for root, dirs, files in os.walk(rootdir):
    #print(root)
    #print(dirs)
    for element in files:
        if '.java' in element:
            dir_string = root + '\\' + element
            full_dir_arr.append(dir_string)

cluster_dict = {}
cluster_tree = {}


for element in full_dir_arr:
    element = element.split('\\')
    child = element[-1]
    parent = element[-2]
    cluster_tree[child] = parent

In [86]:
cluster_tree

{'ElementsSubscribeService.java': 'redisson',
 'JndiRedissonFactory.java': 'redisson',
 'MapWriteBehindTask.java': 'redisson',
 'MapWriterTask.java': 'redisson',
 'PubSubEntry.java': 'handler',
 'PubSubMessageListener.java': 'redisson',
 'PubSubPatternMessageListener.java': 'redisson',
 'PubSubPatternStatusListener.java': 'redisson',
 'PubSubStatusListener.java': 'redisson',
 'QueueTransferService.java': 'redisson',
 'QueueTransferTask.java': 'redisson',
 'RedisClusterNodes.java': 'redisson',
 'RedisNodes.java': 'redisnode',
 'Redisson.java': 'redisson',
 'RedissonAtomicDouble.java': 'redisson',
 'RedissonAtomicLong.java': 'redisson',
 'RedissonBaseAdder.java': 'redisson',
 'RedissonBaseLock.java': 'redisson',
 'RedissonBatch.java': 'redisson',
 'RedissonBinaryStream.java': 'redisson',
 'RedissonBitSet.java': 'redisson',
 'RedissonBlockingDeque.java': 'redisson',
 'RedissonBlockingQueue.java': 'redisson',
 'RedissonBloomFilter.java': 'redisson',
 'RedissonBoundedBlockingQueue.java': 'r

In [87]:
len(cluster_tree)

1295

In [88]:
try:
    os.mkdir('raw_depends/' + project_name)
except:
    pass

command = 'cd C:/Users/tanji/Desktop/SoftwareRemodularization/depends-0.9.2 & ' + f"java -jar depends.jar java C:/Users/tanji/Desktop/SoftwareRemodularization/raw_sourcecode/{project_name}/{project_name}_{version_name}" +  f' ../raw_depends/{project_name}/{project_name}_{version_name}' 
print(command)
os.system(command)

cd C:/Users/tanji/Desktop/SoftwareRemodularization/depends-0.9.2 & java -jar depends.jar java C:/Users/tanji/Desktop/SoftwareRemodularization/raw_sourcecode/Redisson/Redisson_redisson-3.16.0 ../raw_depends/Redisson/Redisson_redisson-3.16.0


0

In [89]:
with open(depends_dir) as f:
    depends_results = json.load(f)
    index  = 0
    var_array = []
    print(len(depends_results['variables']))
    for value in depends_results['variables']:
        var_array.append([index, value.split('\\')[-1]])
        #print(index, value)
        index += 1

1849


In [90]:
with open(depends_dir) as f:
    depends_results = json.load(f)
    index  = 0
    var_array = []
    print(len(depends_results['variables']))
    print(len(depends_results['cells']))
    for value in depends_results['variables']:
        var_array.append([index, value.split('\\')[-1]])
        #print(index, value)
        index += 1

    var_df = pd.DataFrame(var_array)
    var_df.columns = ['index_val', 'name']

    feature_list = {}
    feature_index = 2
    for element in depends_results['cells']:
        #print(element)
        try:
            for a in element['values']:
                if a not in feature_list:
                    feature_list[a] = feature_index
                    feature_index += 1
                #print(a['Call'])


        except:
            pass

    feature_arr = []
    for element in depends_results['cells']:
        #print(array)
        array = [0] * (len(feature_list) + 2)
        values = dict(element['values'])
        #print(element)
        array[0] = element['src']
        array[1] = element['dest']
        for feature in feature_list:
            try:
                value = values[feature]
                array[feature_list[feature]] = value
            except:
                pass
        #print(array)
        feature_arr.append(array)

    feature_df = pd.DataFrame(feature_arr)
    col_names = ['src', 'dest']
    for element in feature_list:
        col_names.append(element)
    feature_df.columns = col_names

    feature_df['sum'] = feature_df.sum(axis=1) - feature_df['src'] - feature_df['dest']
    #G = nx.Graph()
    G = nx.path_graph(len(depends_results['variables']))
    for index, row in feature_df.iterrows():
        G.add_edge(row['src'], row['dest'], weight=row['sum'])

    adj_mat = nx.adjacency_matrix(G)
    adj_mat_df = pd.DataFrame(adj_mat.toarray())
    np.fill_diagonal(adj_mat_df.values, adj_mat_df.values.max())
    x = adj_mat_df.values
    min_max_scaler = preprocessing.MinMaxScaler()
    x_scaled = min_max_scaler.fit_transform(x)
    adj_mat_df = pd.DataFrame(x_scaled)
    
    sum_dependency_df = nx.to_pandas_adjacency(G)
    sum_dependency_df['sum'] = sum_dependency_df.sum(axis=1)
    final_dependency_df = sum_dependency_df['sum']

1849
32182


In [91]:
final_dependency_df

0       10.0
1       26.0
2        7.0
3       80.0
4        3.0
        ... 
1844    76.0
1845    32.0
1846     9.0
1847    66.0
1848    39.0
Name: sum, Length: 1849, dtype: float64

In [92]:
adj_mat_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1839,1840,1841,1842,1843,1844,1845,1846,1847,1848
0,1.00000,0.00189,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,...,0.0,0.00000,0.0,0.0,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000
1,0.00189,1.00000,0.00189,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,...,0.0,0.00000,0.0,0.0,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000
2,0.00000,0.00189,1.00000,0.00189,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,...,0.0,0.00000,0.0,0.0,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000
3,0.00000,0.00000,0.00189,1.00000,0.00189,0.00189,0.00189,0.00189,0.00189,0.00189,...,0.0,0.00000,0.0,0.0,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000
4,0.00000,0.00000,0.00000,0.00189,1.00000,0.00189,0.00000,0.00000,0.00000,0.00000,...,0.0,0.00000,0.0,0.0,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1844,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,...,0.0,0.00000,0.0,0.0,0.00189,1.00000,0.00189,0.00000,0.00000,0.00000
1845,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,...,0.0,0.00189,0.0,0.0,0.00000,0.00189,1.00000,0.00189,0.00000,0.00000
1846,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,...,0.0,0.00189,0.0,0.0,0.00000,0.00000,0.00189,1.00000,0.00189,0.00000
1847,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,...,0.0,0.00000,0.0,0.0,0.00000,0.00000,0.00000,0.00189,1.00000,0.00189


In [93]:
feature_df

Unnamed: 0,src,dest,Import,Use,Call,Parameter,Contain,Return,Create,Implement,Cast,Throw,Extend,Annotation,sum
0,288,1703,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,288,1702,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,288,1705,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,288,1704,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,288,1701,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32177,1569,932,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
32178,748,820,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
32179,1569,931,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
32180,1569,934,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [94]:
if is_ck_from_server is True:
    ck_metrics = pd.read_csv(f'ck_results_invi/{project_name}_{version_name}_class.csv')
    ck_metrics['class_name'] = ck_metrics['file'].str.split('\\').str[-1]
    ck_metrics['file'] = ck_metrics['file'].str.replace(r'E:\\', r'C:\\Users\\tanji\\Desktop\\',regex=True)
    ck_metrics
else:
    print('Currently processing: ' + project_name + version_name)
    command = f'java -jar ck-0.3.3-SNAPSHOT-jar-with-dependencies.jar raw_sourcecode/{project_name}/{project_name}_{version_name}'
    print(command)
    os.system(command)
    os.rename('class.csv', project_name + '_' + version_name + '_class.csv')
    os.rename('field.csv',  project_name + '_' + version_name + '_field.csv')
    os.rename('method.csv', project_name + '_' + version_name + '_method.csv')

    shutil.move(project_name + '_' + version_name + '_class.csv', 'ck_results_invi/' + project_name + '_' + version_name + '_class.csv')
    shutil.move(project_name + '_' + version_name + '_field.csv', 'ck_results_invi/' + project_name + '_' + version_name + '_field.csv')
    shutil.move(project_name + '_' + version_name + '_method.csv', 'ck_results_invi/' + project_name + '_' + version_name + '_method.csv')
    ck_metrics = pd.read_csv(f'ck_results_invi/{project_name}_{version_name}_class.csv')
    ck_metrics['class_name'] = ck_metrics['file'].str.split('\\').str[-1]

Currently processing: Redissonredisson-3.16.0
java -jar ck-0.3.3-SNAPSHOT-jar-with-dependencies.jar raw_sourcecode/Redisson/Redisson_redisson-3.16.0


In [95]:
ck_metrics.head()

Unnamed: 0,file,class,type,cbo,wmc,dit,rfc,lcom,totalMethods,staticMethods,...,assignmentsQty,mathOperationsQty,variablesQty,maxNestedBlocks,anonymousClassesQty,subClassesQty,lambdasQty,uniqueWordsQty,modifiers,class_name
0,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,org.redisson.api.annotation.REntity$DEFAULT,class,3,2,2,0,1,2,0,...,0,0,0,0,0,0,0,14,16,REntity.java
1,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,org.redisson.api.annotation.RObjectField$DEFAULT,class,3,2,2,0,1,2,0,...,0,0,0,0,0,0,0,14,16,RObjectField.java
2,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,org.redisson.api.AutoClaimResult,class,3,4,1,0,2,4,0,...,3,0,3,0,0,0,0,23,1,AutoClaimResult.java
3,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,org.redisson.api.BatchOptions,class,2,16,1,1,102,16,1,...,8,1,7,0,0,0,0,119,17,BatchOptions.java
4,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,org.redisson.api.BatchResult,class,1,3,1,0,0,3,0,...,2,0,2,0,0,0,0,23,1,BatchResult.java


In [96]:
cluster = AgglomerativeClustering(n_clusters=n_cluster, affinity=affinity, linkage=linkage)
cluster_result = cluster.fit_predict(adj_mat_df)

#ck_metrics = pd.read_csv(f'ck_results_invi/{project_name}_{version_name}_class.csv')
#ck_metrics['class_name'] = ck_metrics['file'].str.split('\\').str[-1]
filename_a = 'C:/Users/tanji/Desktop/SoftwareRemodularization/MoJo_1.2.1/' + project_name + '/' + project_name + '_' + version_name + '_' + str(n_cluster) + '_' +str(affinity) + '_' + str(linkage) + '_a.rsf'
filename_b = 'C:/Users/tanji/Desktop/SoftwareRemodularization/MoJo_1.2.1/' + project_name + '/' + project_name + '_' + version_name + '_' + str(n_cluster) + '_' +str(affinity) + '_' + str(linkage) + '_b.rsf'

ck_metrics.head()

with open(filename_a, 'w') as f:
    for key, value in cluster_tree.items():
        f.write('contain ' + str(value).replace(" ",'') + ' ' + str(key).replace(' ','') + '\n')

arr_a_rsf = []
arr_b_rsf = []
with open(filename_b, 'w') as f:
    len_b_rsf = 0
    duplicate_array = []
    refactor_arr_pretty_print = []
    refactor_arr = []
    main_data_arr = []
    clustering_result_dict = {}
    num_line_affected = 0
    num_dependency = 0
    actual_num_of_classes_touched = 0
    for i in range(len(cluster_result)):
        #print(i)
        try:
            subject = var_df[var_df['index_val'] == i]['name'].values[0]
            #print(subject)
            #print(subject in cluster_tree)
            if subject in cluster_tree and subject not in duplicate_array:
                #print(element)
                duplicate_array.append(subject)
                string = "contain " + str(cluster_result[i]) + " " + subject + "\n"
                arr_b_rsf.append(subject)
                len_b_rsf += 1
                f.write(string)
                #print(subject)
                class_metrics = ck_metrics.loc[ck_metrics['class_name'] == subject].values.tolist()[0]
                
                #print(ck_metrics.loc[ck_metrics['class_name'] == subject]['file'].values[0])
                
                subjected_file = ck_metrics.loc[ck_metrics['class_name'] == subject]['file'].values[0]
                
                #print(subjected_file)
                
                num_dependency = final_dependency_df.iloc[cluster_result[i]]
                num_line_affected += sum(1 for line in open(subjected_file))
                
                
                class_metrics.append(num_dependency)
                class_metrics.append(num_line_affected)
                main_data_arr.append(class_metrics)
                
                
                ## Important
                ## This is assuming that during Depends, the file structure is already sorted
                ## I.e. going through the directory structure depth wise, not breath wise
                original_parent = cluster_tree[subject]
                try:
                    to_check_parent = clustering_result_dict[cluster_result[i]]
                    #print(original_parent, to_check_parent)
                    if original_parent != to_check_parent:
                        #print(i)
                        #refactor_arr.append(f'Move {subject} to {str(i)}')
                        #refactor_arr.append(f'Move {subject} to {str(cluster_result[i])}')
                        temp_arr = []
                        temp_arr.append(subject)
                        temp_arr.append(cluster_result[i-1])
                        refactor_arr.append(temp_arr)
                        refactor_arr_pretty_print.append(f'Move {subject} to {str(cluster_result[i-1])}')
                        actual_num_of_classes_touched += 1
                    
                    clustering_result_dict[cluster_result[i]] = to_check_parent
                except:
                    clustering_result_dict[cluster_result[i]] = original_parent
                
            else:
                #print(subject)
                pass
        except Exception as e:
            #print(e)
            pass
f.close()

command = 'cd C:/Users/tanji/Desktop/SoftwareRemodularization/MoJo_1.2.1 & ' + 'java MoJo ' + filename_a + ' ' + filename_b + ' >> ' + project_name + '/' + project_name + '_results.txt'
#print(command)
os.system(command)

0

In [97]:
cluster_result

array([0, 0, 0, ..., 0, 0, 0], dtype=int64)

In [98]:
print(len(refactor_arr_pretty_print))
refactor_arr_pretty_print

1135


['Move RedissonExtensionTest.java to 0',
 'Move JndiRedissonRegionFactory.java to 0',
 'Move RedissonRegionFactory.java to 0',
 'Move BaseRegion.java to 151',
 'Move RedissonCollectionRegion.java to 0',
 'Move RedissonEntityRegion.java to 0',
 'Move RedissonNaturalIdRegion.java to 0',
 'Move RedissonQueryRegion.java to 0',
 'Move RedissonTimestampsRegion.java to 0',
 'Move AbstractReadWriteAccessStrategy.java to 0',
 'Move NonStrictReadWriteCollectionRegionAccessStrategy.java to 159',
 'Move NonStrictReadWriteEntityRegionAccessStrategy.java to 0',
 'Move NonStrictReadWriteNaturalIdRegionAccessStrategy.java to 0',
 'Move ReadOnlyCollectionRegionAccessStrategy.java to 0',
 'Move ReadOnlyEntityRegionAccessStrategy.java to 0',
 'Move ReadOnlyNaturalIdRegionAccessStrategy.java to 0',
 'Move ReadWriteCollectionRegionAccessStrategy.java to 0',
 'Move ReadWriteEntityRegionAccessStrategy.java to 0',
 'Move ReadWriteNaturalIdRegionAccessStrategy.java to 0',
 'Move TransactionalCollectionRegionAc

In [99]:
refactor_arr

[['RedissonExtensionTest.java', 0],
 ['JndiRedissonRegionFactory.java', 0],
 ['RedissonRegionFactory.java', 0],
 ['BaseRegion.java', 151],
 ['RedissonCollectionRegion.java', 0],
 ['RedissonEntityRegion.java', 0],
 ['RedissonNaturalIdRegion.java', 0],
 ['RedissonQueryRegion.java', 0],
 ['RedissonTimestampsRegion.java', 0],
 ['AbstractReadWriteAccessStrategy.java', 0],
 ['NonStrictReadWriteCollectionRegionAccessStrategy.java', 159],
 ['NonStrictReadWriteEntityRegionAccessStrategy.java', 0],
 ['NonStrictReadWriteNaturalIdRegionAccessStrategy.java', 0],
 ['ReadOnlyCollectionRegionAccessStrategy.java', 0],
 ['ReadOnlyEntityRegionAccessStrategy.java', 0],
 ['ReadOnlyNaturalIdRegionAccessStrategy.java', 0],
 ['ReadWriteCollectionRegionAccessStrategy.java', 0],
 ['ReadWriteEntityRegionAccessStrategy.java', 0],
 ['ReadWriteNaturalIdRegionAccessStrategy.java', 0],
 ['TransactionalCollectionRegionAccessStrategy.java', 0],
 ['TransactionalEntityRegionAccessStrategy.java', 0],
 ['TransactionalNatur

In [100]:
refactor_df = pd.DataFrame(refactor_arr)
refactor_df.columns = ['class_name', 'refactor_destination']
refactor_df.head()

Unnamed: 0,class_name,refactor_destination
0,RedissonExtensionTest.java,0
1,JndiRedissonRegionFactory.java,0
2,RedissonRegionFactory.java,0
3,BaseRegion.java,151
4,RedissonCollectionRegion.java,0


In [101]:
temp_dataframe = pd.DataFrame(main_data_arr)
temp_columns = list(ck_metrics.columns)
temp_columns.append('num_dependency')
temp_columns.append('num_line_affected')
temp_dataframe.columns = temp_columns

## For the groupby operation below
#temp_dataframe['placeholder_key'] = 'placeholder_key'

In [102]:
temp_dataframe.head()

Unnamed: 0,file,class,type,cbo,wmc,dit,rfc,lcom,totalMethods,staticMethods,...,variablesQty,maxNestedBlocks,anonymousClassesQty,subClassesQty,lambdasQty,uniqueWordsQty,modifiers,class_name,num_dependency,num_line_affected
0,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,org.redisson.helidon.RedissonExtension,class,9,7,1,9,0,2,0,...,3,1,0,0,2,35,1,RedissonExtension.java,10.0,90
1,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,org.redisson.RedissonExtensionTest,class,12,3,1,15,3,3,0,...,4,0,0,0,0,22,1,RedissonExtensionTest.java,10.0,141
2,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,org.redisson.hibernate.JndiRedissonRegionFactory,class,4,6,2,4,1,2,0,...,5,3,0,0,0,56,1,JndiRedissonRegionFactory.java,10.0,211
3,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,org.redisson.hibernate.RedissonRegionFactory,class,23,29,1,27,19,14,0,...,29,5,0,0,0,115,1,RedissonRegionFactory.java,10.0,448
4,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,org.redisson.hibernate.RedissonStrategyRegistr...,class,5,1,1,3,0,1,0,...,0,0,0,0,0,11,1,RedissonStrategyRegistrationProvider.java,69.0,490


In [103]:
temp_dataframe = pd.merge(refactor_df, temp_dataframe, on='class_name', how='left')

In [104]:
temp_dataframe.head()

Unnamed: 0,class_name,refactor_destination,file,class,type,cbo,wmc,dit,rfc,lcom,...,mathOperationsQty,variablesQty,maxNestedBlocks,anonymousClassesQty,subClassesQty,lambdasQty,uniqueWordsQty,modifiers,num_dependency,num_line_affected
0,RedissonExtensionTest.java,0,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,org.redisson.RedissonExtensionTest,class,12,3,1,15,3,...,0,4,0,0,0,0,22,1,10.0,141
1,JndiRedissonRegionFactory.java,0,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,org.redisson.hibernate.JndiRedissonRegionFactory,class,4,6,2,4,1,...,3,5,3,0,0,0,56,1,10.0,211
2,RedissonRegionFactory.java,0,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,org.redisson.hibernate.RedissonRegionFactory,class,23,29,1,27,19,...,10,29,5,0,0,0,115,1,10.0,448
3,BaseRegion.java,151,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,org.redisson.hibernate.region.BaseRegion,class,13,41,1,24,27,...,4,16,2,0,0,2,71,1,10.0,740
4,RedissonCollectionRegion.java,0,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,org.redisson.hibernate.region.RedissonCollecti...,class,15,6,2,0,0,...,1,1,1,0,0,0,29,1,10.0,807


In [105]:
project_refactoring_data_agg = temp_dataframe.groupby(['class_name'], as_index=False).agg({
       'cbo': ['mean', 'min', 'max','std'], 
        'wmc': ['mean', 'min', 'max','std'], 
        'dit': ['mean', 'min', 'max','std'], 
        'rfc': ['mean', 'min', 'max','std'], 
        'lcom': ['mean', 'min', 'max','std'],
        'totalMethods': ['mean', 'min', 'max','std'], 
        'staticMethods': ['mean', 'min', 'max','std'],
        'publicMethods': ['mean', 'min', 'max','std'], 
        'privateMethods': ['mean', 'min', 'max','std'],
        'protectedMethods': ['mean', 'min', 'max','std'], 
        'defaultMethods': ['mean', 'min', 'max','std'], 
        'abstractMethods': ['mean', 'min', 'max','std'], 
        'finalMethods': ['mean', 'min', 'max','std'],
        'synchronizedMethods': ['mean', 'min', 'max','std'],
        'totalFields': ['mean', 'min', 'max','std'], 
        'staticFields': ['mean', 'min', 'max','std'], 
        'publicFields': ['mean', 'min', 'max','std'],
        'privateFields': ['mean', 'min', 'max','std'],
        'protectedFields': ['mean', 'min', 'max','std'],
        'defaultFields': ['mean', 'min', 'max','std'], 
        'finalFields': ['mean', 'min', 'max','std'],
        'synchronizedFields': ['mean', 'min', 'max','std'],
        'nosi': ['mean', 'min', 'max','std'], 
        'loc': ['mean', 'min', 'max','std'], 
        'returnQty': ['mean', 'min', 'max','std'],
        'loopQty': ['mean', 'min', 'max','std'],
        'comparisonsQty': ['mean', 'min', 'max','std'],
        'tryCatchQty': ['mean', 'min', 'max','std'],
        'parenthesizedExpsQty': ['mean', 'min', 'max','std'],
        'stringLiteralsQty': ['mean', 'min', 'max','std'],
        'numbersQty': ['mean', 'min', 'max','std'], 
        'assignmentsQty': ['mean', 'min', 'max','std'],
        'mathOperationsQty': ['mean', 'min', 'max','std'], 
        'variablesQty': ['mean', 'min', 'max','std'], 
        'maxNestedBlocks': ['mean', 'min', 'max','std'],
        'anonymousClassesQty': ['mean', 'min', 'max','std'],
        'subClassesQty': ['mean', 'min', 'max','std'],
        'lambdasQty': ['mean', 'min', 'max','std'],
        'uniqueWordsQty': ['mean', 'min', 'max','std'],
        'modifiers': ['mean', 'min', 'max','std'],
        'num_dependency': ['mean'],
        'num_line_affected': ['mean'],
})

In [106]:
project_refactoring_data_agg

Unnamed: 0_level_0,class_name,cbo,cbo,cbo,cbo,wmc,wmc,wmc,wmc,dit,...,uniqueWordsQty,uniqueWordsQty,uniqueWordsQty,uniqueWordsQty,modifiers,modifiers,modifiers,modifiers,num_dependency,num_line_affected
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,min,max,std,mean,min,max,std,mean,...,mean,min,max,std,mean,min,max,std,mean,mean
0,ANDCondition.java,1,1,1,,2,2,2,,1,...,9,9,9,,1,1,1,,10.0,141583
1,AbstractCacheMap.java,10,10,10,,72,72,72,,6,...,78,78,78,,1025,1025,1025,,10.0,105696
2,AbstractNamingScheme.java,2,2,2,,2,2,2,,1,...,9,9,9,,1025,1025,1025,,10.0,143309
3,AbstractReadWriteAccessStrategy.java,5,5,5,,5,5,5,,2,...,34,34,34,,1,1,1,,10.0,1075
4,AbstractRedissonNamespaceDefinitionParser.java,10,10,10,,7,7,7,,2,...,42,42,42,,1025,1025,1025,,10.0,157019
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1130,WeightedRoundRobinBalancer.java,6,6,6,,10,10,10,,1,...,66,66,66,,1,1,1,,10.0,131108
1131,WeightedRoundRobinBalancerTest.java,4,4,4,,2,2,2,,1,...,34,34,34,,1,1,1,,10.0,200236
1132,WorkerOptions.java,3,3,3,,12,12,12,,1,...,53,53,53,,17,17,17,,10.0,100547
1133,WriteBehindService.java,3,3,3,,5,5,5,,1,...,19,19,19,,1,1,1,,10.0,56899


In [107]:
columns_to_agg = [
    'cbo', 'wmc', 'dit', 'rfc', 'lcom',
       'totalMethods', 'staticMethods', 'publicMethods', 'privateMethods',
       'protectedMethods', 'defaultMethods', 'abstractMethods', 'finalMethods',
       'synchronizedMethods', 'totalFields', 'staticFields', 'publicFields',
       'privateFields', 'protectedFields', 'defaultFields', 'finalFields',
       'synchronizedFields', 'nosi', 'loc', 'returnQty', 'loopQty',
       'comparisonsQty', 'tryCatchQty', 'parenthesizedExpsQty',
       'stringLiteralsQty', 'numbersQty', 'assignmentsQty',
       'mathOperationsQty', 'variablesQty', 'maxNestedBlocks',
       'anonymousClassesQty', 'subClassesQty', 'lambdasQty', 'uniqueWordsQty',
       'modifiers'
]


project_refactoring_data_agg_column_names = ['class_name']
for element in columns_to_agg:
    project_refactoring_data_agg_column_names.append(element + '_mean')
    project_refactoring_data_agg_column_names.append(element + '_min')
    project_refactoring_data_agg_column_names.append(element + '_max')
    project_refactoring_data_agg_column_names.append(element + '_std')
    
    
project_refactoring_data_agg_column_names.append('num_dependency_mean')
project_refactoring_data_agg_column_names.append('num_line_affected_mean')

In [108]:
project_refactoring_data_agg.columns = project_refactoring_data_agg_column_names

In [109]:
project_refactoring_data_agg['refactor_destination'] = temp_dataframe['refactor_destination']
project_refactoring_data_agg['file'] = temp_dataframe['file']
project_refactoring_data_agg['class'] = temp_dataframe['class']

In [110]:
project_refactoring_data_agg

Unnamed: 0,class_name,cbo_mean,cbo_min,cbo_max,cbo_std,wmc_mean,wmc_min,wmc_max,wmc_std,dit_mean,...,uniqueWordsQty_std,modifiers_mean,modifiers_min,modifiers_max,modifiers_std,num_dependency_mean,num_line_affected_mean,refactor_destination,file,class
0,ANDCondition.java,1,1,1,,2,2,2,,1,...,,1,1,1,,10.0,141583,0,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,org.redisson.RedissonExtensionTest
1,AbstractCacheMap.java,10,10,10,,72,72,72,,6,...,,1025,1025,1025,,10.0,105696,0,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,org.redisson.hibernate.JndiRedissonRegionFactory
2,AbstractNamingScheme.java,2,2,2,,2,2,2,,1,...,,1025,1025,1025,,10.0,143309,0,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,org.redisson.hibernate.RedissonRegionFactory
3,AbstractReadWriteAccessStrategy.java,5,5,5,,5,5,5,,2,...,,1,1,1,,10.0,1075,151,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,org.redisson.hibernate.region.BaseRegion
4,AbstractRedissonNamespaceDefinitionParser.java,10,10,10,,7,7,7,,2,...,,1025,1025,1025,,10.0,157019,0,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,org.redisson.hibernate.region.RedissonCollecti...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1130,WeightedRoundRobinBalancer.java,6,6,6,,10,10,10,,1,...,,1,1,1,,10.0,131108,0,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,org.redisson.transaction.RedissonTransactional...
1131,WeightedRoundRobinBalancerTest.java,4,4,4,,2,2,2,,1,...,,1,1,1,,10.0,200236,0,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,org.redisson.transaction.RedissonTransactional...
1132,WorkerOptions.java,3,3,3,,12,12,12,,1,...,,17,17,17,,10.0,100547,0,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,org.redisson.transaction.RedissonTransactional...
1133,WriteBehindService.java,3,3,3,,5,5,5,,1,...,,1,1,1,,10.0,56899,0,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,org.redisson.transaction.RedissonTransactional...


In [111]:
project_refactoring_data_agg = project_refactoring_data_agg.fillna(0)

In [112]:
h2o.init()

Checking whether there is an H2O instance running at http://localhost:54321 . connected.


0,1
H2O_cluster_uptime:,1 hour 6 mins
H2O_cluster_timezone:,Asia/Kuala_Lumpur
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.32.1.7
H2O_cluster_version_age:,9 months and 11 days !!!
H2O_cluster_name:,H2O_from_python_tanji_wxih4v
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,7.951 Gb
H2O_cluster_total_cores:,12
H2O_cluster_allowed_cores:,12


In [113]:
model_path = "models/EffortEstimationModelv3/Log_Regression_GBM_grid__1_AutoML_20220228_154246_model_3"
saved_model = h2o.load_model(model_path)

In [114]:
#project_refactoring_data_agg.drop(['placeholder_key'],axis=1, inplace=True)

In [115]:
project_refactoring_data_agg

Unnamed: 0,class_name,cbo_mean,cbo_min,cbo_max,cbo_std,wmc_mean,wmc_min,wmc_max,wmc_std,dit_mean,...,uniqueWordsQty_std,modifiers_mean,modifiers_min,modifiers_max,modifiers_std,num_dependency_mean,num_line_affected_mean,refactor_destination,file,class
0,ANDCondition.java,1,1,1,0.0,2,2,2,0.0,1,...,0.0,1,1,1,0.0,10.0,141583,0,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,org.redisson.RedissonExtensionTest
1,AbstractCacheMap.java,10,10,10,0.0,72,72,72,0.0,6,...,0.0,1025,1025,1025,0.0,10.0,105696,0,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,org.redisson.hibernate.JndiRedissonRegionFactory
2,AbstractNamingScheme.java,2,2,2,0.0,2,2,2,0.0,1,...,0.0,1025,1025,1025,0.0,10.0,143309,0,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,org.redisson.hibernate.RedissonRegionFactory
3,AbstractReadWriteAccessStrategy.java,5,5,5,0.0,5,5,5,0.0,2,...,0.0,1,1,1,0.0,10.0,1075,151,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,org.redisson.hibernate.region.BaseRegion
4,AbstractRedissonNamespaceDefinitionParser.java,10,10,10,0.0,7,7,7,0.0,2,...,0.0,1025,1025,1025,0.0,10.0,157019,0,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,org.redisson.hibernate.region.RedissonCollecti...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1130,WeightedRoundRobinBalancer.java,6,6,6,0.0,10,10,10,0.0,1,...,0.0,1,1,1,0.0,10.0,131108,0,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,org.redisson.transaction.RedissonTransactional...
1131,WeightedRoundRobinBalancerTest.java,4,4,4,0.0,2,2,2,0.0,1,...,0.0,1,1,1,0.0,10.0,200236,0,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,org.redisson.transaction.RedissonTransactional...
1132,WorkerOptions.java,3,3,3,0.0,12,12,12,0.0,1,...,0.0,17,17,17,0.0,10.0,100547,0,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,org.redisson.transaction.RedissonTransactional...
1133,WriteBehindService.java,3,3,3,0.0,5,5,5,0.0,1,...,0.0,1,1,1,0.0,10.0,56899,0,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,org.redisson.transaction.RedissonTransactional...


In [116]:
prediction_df = h2o.H2OFrame(project_refactoring_data_agg)

Parse progress: |█████████████████████████████████████████████████████████| 100%


In [117]:
prediction_df

class_name,cbo_mean,cbo_min,cbo_max,cbo_std,wmc_mean,wmc_min,wmc_max,wmc_std,dit_mean,dit_min,dit_max,dit_std,rfc_mean,rfc_min,rfc_max,rfc_std,lcom_mean,lcom_min,lcom_max,lcom_std,totalMethods_mean,totalMethods_min,totalMethods_max,totalMethods_std,staticMethods_mean,staticMethods_min,staticMethods_max,staticMethods_std,publicMethods_mean,publicMethods_min,publicMethods_max,publicMethods_std,privateMethods_mean,privateMethods_min,privateMethods_max,privateMethods_std,protectedMethods_mean,protectedMethods_min,protectedMethods_max,protectedMethods_std,defaultMethods_mean,defaultMethods_min,defaultMethods_max,defaultMethods_std,abstractMethods_mean,abstractMethods_min,abstractMethods_max,abstractMethods_std,finalMethods_mean,finalMethods_min,finalMethods_max,finalMethods_std,synchronizedMethods_mean,synchronizedMethods_min,synchronizedMethods_max,synchronizedMethods_std,totalFields_mean,totalFields_min,totalFields_max,totalFields_std,staticFields_mean,staticFields_min,staticFields_max,staticFields_std,publicFields_mean,publicFields_min,publicFields_max,publicFields_std,privateFields_mean,privateFields_min,privateFields_max,privateFields_std,protectedFields_mean,protectedFields_min,protectedFields_max,protectedFields_std,defaultFields_mean,defaultFields_min,defaultFields_max,defaultFields_std,finalFields_mean,finalFields_min,finalFields_max,finalFields_std,synchronizedFields_mean,synchronizedFields_min,synchronizedFields_max,synchronizedFields_std,nosi_mean,nosi_min,nosi_max,nosi_std,loc_mean,loc_min,loc_max,loc_std,returnQty_mean,returnQty_min,returnQty_max,returnQty_std,loopQty_mean,loopQty_min,loopQty_max,loopQty_std,comparisonsQty_mean,comparisonsQty_min,comparisonsQty_max,comparisonsQty_std,tryCatchQty_mean,tryCatchQty_min,tryCatchQty_max,tryCatchQty_std,parenthesizedExpsQty_mean,parenthesizedExpsQty_min,parenthesizedExpsQty_max,parenthesizedExpsQty_std,stringLiteralsQty_mean,stringLiteralsQty_min,stringLiteralsQty_max,stringLiteralsQty_std,numbersQty_mean,numbersQty_min,numbersQty_max,numbersQty_std,assignmentsQty_mean,assignmentsQty_min,assignmentsQty_max,assignmentsQty_std,mathOperationsQty_mean,mathOperationsQty_min,mathOperationsQty_max,mathOperationsQty_std,variablesQty_mean,variablesQty_min,variablesQty_max,variablesQty_std,maxNestedBlocks_mean,maxNestedBlocks_min,maxNestedBlocks_max,maxNestedBlocks_std,anonymousClassesQty_mean,anonymousClassesQty_min,anonymousClassesQty_max,anonymousClassesQty_std,subClassesQty_mean,subClassesQty_min,subClassesQty_max,subClassesQty_std,lambdasQty_mean,lambdasQty_min,lambdasQty_max,lambdasQty_std,uniqueWordsQty_mean,uniqueWordsQty_min,uniqueWordsQty_max,uniqueWordsQty_std,modifiers_mean,modifiers_min,modifiers_max,modifiers_std,num_dependency_mean,num_line_affected_mean,refactor_destination,file,class
ANDCondition.java,1,1,1,0,2,2,2,0,1,1,1,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,11,11,11,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,9,9,0,1,1,1,0,10,141583,0,C:\Users\tanji\Desktop\SoftwareRemodularization\raw_sourcecode\Redisson\Redisson_redisson-3.16.0\redisson-helidon\src\test\java\org\redisson\RedissonExtensionTest.java,org.redisson.RedissonExtensionTest
AbstractCacheMap.java,10,10,10,0,72,72,72,0,6,6,6,0,33,33,33,0,147,147,147,0,27,27,27,0,0,0,0,0,17,17,17,0,2,2,2,0,8,8,8,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,4,4,4,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,4,4,4,0,0,0,0,0,0,0,0,0,403,403,403,0,38,38,38,0,3,3,3,0,8,8,8,0,0,0,0,0,1,1,1,0,1,1,1,0,4,4,4,0,25,25,25,0,1,1,1,0,21,21,21,0,4,4,4,0,3,3,3,0,4,4,4,0,0,0,0,0,78,78,78,0,1025,1025,1025,0,10,105696,0,C:\Users\tanji\Desktop\SoftwareRemodularization\raw_sourcecode\Redisson\Redisson_redisson-3.16.0\redisson-hibernate\redisson-hibernate-4\src\main\java\org\redisson\hibernate\JndiRedissonRegionFactory.java,org.redisson.hibernate.JndiRedissonRegionFactory
AbstractNamingScheme.java,2,2,2,0,2,2,2,0,1,1,1,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,12,12,12,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,9,9,0,1025,1025,1025,0,10,143309,0,C:\Users\tanji\Desktop\SoftwareRemodularization\raw_sourcecode\Redisson\Redisson_redisson-3.16.0\redisson-hibernate\redisson-hibernate-4\src\main\java\org\redisson\hibernate\RedissonRegionFactory.java,org.redisson.hibernate.RedissonRegionFactory
AbstractReadWriteAccessStrategy.java,5,5,5,0,5,5,5,0,2,2,2,0,3,3,3,0,10,10,10,0,5,5,5,0,0,0,0,0,5,5,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,31,31,31,0,3,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,34,34,34,0,1,1,1,0,10,1075,151,C:\Users\tanji\Desktop\SoftwareRemodularization\raw_sourcecode\Redisson\Redisson_redisson-3.16.0\redisson-hibernate\redisson-hibernate-4\src\main\java\org\redisson\hibernate\region\BaseRegion.java,org.redisson.hibernate.region.BaseRegion
AbstractRedissonNamespaceDefinitionParser.java,10,10,10,0,7,7,7,0,2,2,2,0,14,14,14,0,9,9,9,0,6,6,6,0,0,0,0,0,1,1,1,0,0,0,0,0,5,5,5,0,0,0,0,0,1,1,1,0,3,3,3,0,0,0,0,0,3,3,3,0,0,0,0,0,0,0,0,0,2,2,2,0,1,1,1,0,0,0,0,0,3,3,3,0,0,0,0,0,0,0,0,0,54,54,54,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,6,6,0,0,0,0,0,7,7,7,0,2,2,2,0,4,4,4,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,42,42,42,0,1025,1025,1025,0,10,157019,0,C:\Users\tanji\Desktop\SoftwareRemodularization\raw_sourcecode\Redisson\Redisson_redisson-3.16.0\redisson-hibernate\redisson-hibernate-4\src\main\java\org\redisson\hibernate\region\RedissonCollectionRegion.java,org.redisson.hibernate.region.RedissonCollectionRegion
AccessorInterceptor.java,30,30,30,0,47,47,47,0,1,1,1,0,64,64,64,0,24,24,24,0,9,9,9,0,1,1,1,0,2,2,2,0,7,7,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,0,4,4,4,0,0,0,0,0,5,5,5,0,0,0,0,0,0,0,0,0,5,5,5,0,0,0,0,0,15,15,15,0,228,228,228,0,18,18,18,0,0,0,0,0,3,3,3,0,1,1,1,0,14,14,14,0,20,20,20,0,5,5,5,0,38,38,38,0,2,2,2,0,33,33,33,0,4,4,4,0,0,0,0,0,0,0,0,0,0,0,0,0,113,113,113,0,1,1,1,0,10,142123,0,C:\Users\tanji\Desktop\SoftwareRemodularization\raw_sourcecode\Redisson\Redisson_redisson-3.16.0\redisson-hibernate\redisson-hibernate-4\src\main\java\org\redisson\hibernate\region\RedissonEntityRegion.java,org.redisson.hibernate.region.RedissonEntityRegion
AddCacheOperation.java,9,9,9,0,9,9,9,0,3,3,3,0,6,6,6,0,0,0,0,0,8,8,8,0,0,0,0,0,8,8,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,4,4,0,0,0,0,0,0,0,0,0,4,4,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,50,50,50,0,3,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,6,6,6,0,0,0,0,0,6,6,6,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,29,29,29,0,1,1,1,0,10,165219,0,C:\Users\tanji\Desktop\SoftwareRemodularization\raw_sourcecode\Redisson\Redisson_redisson-3.16.0\redisson-hibernate\redisson-hibernate-4\src\main\java\org\redisson\hibernate\region\RedissonNaturalIdRegion.java,org.redisson.hibernate.region.RedissonNaturalIdRegion
AddOperation.java,8,8,8,0,5,5,5,0,3,3,3,0,5,5,5,0,0,0,0,0,5,5,5,0,0,0,0,0,5,5,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,32,32,32,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,4,4,0,0,0,0,0,4,4,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,21,21,21,0,1,1,1,0,10,165280,0,C:\Users\tanji\Desktop\SoftwareRemodularization\raw_sourcecode\Redisson\Redisson_redisson-3.16.0\redisson-hibernate\redisson-hibernate-4\src\main\java\org\redisson\hibernate\region\RedissonQueryRegion.java,org.redisson.hibernate.region.RedissonQueryRegion
AdvBeanCopy.java,2,2,2,0,2,2,2,0,2,2,2,0,3,3,3,0,1,1,1,0,2,2,2,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,15,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13,13,13,0,1,1,1,0,10,142938,0,C:\Users\tanji\Desktop\SoftwareRemodularization\raw_sourcecode\Redisson\Redisson_redisson-3.16.0\redisson-hibernate\redisson-hibernate-4\src\main\java\org\redisson\hibernate\region\RedissonTimestampsRegion.java,org.redisson.hibernate.region.RedissonTimestampsRegion
AsyncCountDownLatch.java,0,0,0,0,5,5,5,0,1,1,1,0,3,3,3,0,0,0,0,0,2,2,2,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,20,20,20,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,1,1,1,0,2,2,2,0,2,2,2,0,0,0,0,0,2,2,2,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,20,20,20,0,1,1,1,0,10,144729,0,C:\Users\tanji\Desktop\SoftwareRemodularization\raw_sourcecode\Redisson\Redisson_redisson-3.16.0\redisson-hibernate\redisson-hibernate-4\src\main\java\org\redisson\hibernate\strategy\AbstractReadWriteAccessStrategy.java,org.redisson.hibernate.strategy.AbstractReadWriteAccessStrategy




In [118]:
preds = saved_model.predict(prediction_df)

gbm prediction progress: |████████████████████████████████████████████████| 100%


In [119]:
preds

predict
-0.401469
0.883448
-0.316169
0.552962
0.682665
1.31037
-0.189756
-0.0694651
-0.154463
-0.0570971




In [120]:
pd_preds = preds.as_data_frame()

In [121]:
prediction_df['refactoring_perc_time_taken_log']= preds['predict']
prediction_df.head()

class_name,cbo_mean,cbo_min,cbo_max,cbo_std,wmc_mean,wmc_min,wmc_max,wmc_std,dit_mean,dit_min,dit_max,dit_std,rfc_mean,rfc_min,rfc_max,rfc_std,lcom_mean,lcom_min,lcom_max,lcom_std,totalMethods_mean,totalMethods_min,totalMethods_max,totalMethods_std,staticMethods_mean,staticMethods_min,staticMethods_max,staticMethods_std,publicMethods_mean,publicMethods_min,publicMethods_max,publicMethods_std,privateMethods_mean,privateMethods_min,privateMethods_max,privateMethods_std,protectedMethods_mean,protectedMethods_min,protectedMethods_max,protectedMethods_std,defaultMethods_mean,defaultMethods_min,defaultMethods_max,defaultMethods_std,abstractMethods_mean,abstractMethods_min,abstractMethods_max,abstractMethods_std,finalMethods_mean,finalMethods_min,finalMethods_max,finalMethods_std,synchronizedMethods_mean,synchronizedMethods_min,synchronizedMethods_max,synchronizedMethods_std,totalFields_mean,totalFields_min,totalFields_max,totalFields_std,staticFields_mean,staticFields_min,staticFields_max,staticFields_std,publicFields_mean,publicFields_min,publicFields_max,publicFields_std,privateFields_mean,privateFields_min,privateFields_max,privateFields_std,protectedFields_mean,protectedFields_min,protectedFields_max,protectedFields_std,defaultFields_mean,defaultFields_min,defaultFields_max,defaultFields_std,finalFields_mean,finalFields_min,finalFields_max,finalFields_std,synchronizedFields_mean,synchronizedFields_min,synchronizedFields_max,synchronizedFields_std,nosi_mean,nosi_min,nosi_max,nosi_std,loc_mean,loc_min,loc_max,loc_std,returnQty_mean,returnQty_min,returnQty_max,returnQty_std,loopQty_mean,loopQty_min,loopQty_max,loopQty_std,comparisonsQty_mean,comparisonsQty_min,comparisonsQty_max,comparisonsQty_std,tryCatchQty_mean,tryCatchQty_min,tryCatchQty_max,tryCatchQty_std,parenthesizedExpsQty_mean,parenthesizedExpsQty_min,parenthesizedExpsQty_max,parenthesizedExpsQty_std,stringLiteralsQty_mean,stringLiteralsQty_min,stringLiteralsQty_max,stringLiteralsQty_std,numbersQty_mean,numbersQty_min,numbersQty_max,numbersQty_std,assignmentsQty_mean,assignmentsQty_min,assignmentsQty_max,assignmentsQty_std,mathOperationsQty_mean,mathOperationsQty_min,mathOperationsQty_max,mathOperationsQty_std,variablesQty_mean,variablesQty_min,variablesQty_max,variablesQty_std,maxNestedBlocks_mean,maxNestedBlocks_min,maxNestedBlocks_max,maxNestedBlocks_std,anonymousClassesQty_mean,anonymousClassesQty_min,anonymousClassesQty_max,anonymousClassesQty_std,subClassesQty_mean,subClassesQty_min,subClassesQty_max,subClassesQty_std,lambdasQty_mean,lambdasQty_min,lambdasQty_max,lambdasQty_std,uniqueWordsQty_mean,uniqueWordsQty_min,uniqueWordsQty_max,uniqueWordsQty_std,modifiers_mean,modifiers_min,modifiers_max,modifiers_std,num_dependency_mean,num_line_affected_mean,refactor_destination,file,class,refactoring_perc_time_taken_log
ANDCondition.java,1,1,1,0,2,2,2,0,1,1,1,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,11,11,11,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,9,9,0,1,1,1,0,10,141583,0,C:\Users\tanji\Desktop\SoftwareRemodularization\raw_sourcecode\Redisson\Redisson_redisson-3.16.0\redisson-helidon\src\test\java\org\redisson\RedissonExtensionTest.java,org.redisson.RedissonExtensionTest,-0.401469
AbstractCacheMap.java,10,10,10,0,72,72,72,0,6,6,6,0,33,33,33,0,147,147,147,0,27,27,27,0,0,0,0,0,17,17,17,0,2,2,2,0,8,8,8,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,4,4,4,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,4,4,4,0,0,0,0,0,0,0,0,0,403,403,403,0,38,38,38,0,3,3,3,0,8,8,8,0,0,0,0,0,1,1,1,0,1,1,1,0,4,4,4,0,25,25,25,0,1,1,1,0,21,21,21,0,4,4,4,0,3,3,3,0,4,4,4,0,0,0,0,0,78,78,78,0,1025,1025,1025,0,10,105696,0,C:\Users\tanji\Desktop\SoftwareRemodularization\raw_sourcecode\Redisson\Redisson_redisson-3.16.0\redisson-hibernate\redisson-hibernate-4\src\main\java\org\redisson\hibernate\JndiRedissonRegionFactory.java,org.redisson.hibernate.JndiRedissonRegionFactory,0.883448
AbstractNamingScheme.java,2,2,2,0,2,2,2,0,1,1,1,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,12,12,12,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,9,9,0,1025,1025,1025,0,10,143309,0,C:\Users\tanji\Desktop\SoftwareRemodularization\raw_sourcecode\Redisson\Redisson_redisson-3.16.0\redisson-hibernate\redisson-hibernate-4\src\main\java\org\redisson\hibernate\RedissonRegionFactory.java,org.redisson.hibernate.RedissonRegionFactory,-0.316169
AbstractReadWriteAccessStrategy.java,5,5,5,0,5,5,5,0,2,2,2,0,3,3,3,0,10,10,10,0,5,5,5,0,0,0,0,0,5,5,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,31,31,31,0,3,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,34,34,34,0,1,1,1,0,10,1075,151,C:\Users\tanji\Desktop\SoftwareRemodularization\raw_sourcecode\Redisson\Redisson_redisson-3.16.0\redisson-hibernate\redisson-hibernate-4\src\main\java\org\redisson\hibernate\region\BaseRegion.java,org.redisson.hibernate.region.BaseRegion,0.552962
AbstractRedissonNamespaceDefinitionParser.java,10,10,10,0,7,7,7,0,2,2,2,0,14,14,14,0,9,9,9,0,6,6,6,0,0,0,0,0,1,1,1,0,0,0,0,0,5,5,5,0,0,0,0,0,1,1,1,0,3,3,3,0,0,0,0,0,3,3,3,0,0,0,0,0,0,0,0,0,2,2,2,0,1,1,1,0,0,0,0,0,3,3,3,0,0,0,0,0,0,0,0,0,54,54,54,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,6,6,0,0,0,0,0,7,7,7,0,2,2,2,0,4,4,4,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,42,42,42,0,1025,1025,1025,0,10,157019,0,C:\Users\tanji\Desktop\SoftwareRemodularization\raw_sourcecode\Redisson\Redisson_redisson-3.16.0\redisson-hibernate\redisson-hibernate-4\src\main\java\org\redisson\hibernate\region\RedissonCollectionRegion.java,org.redisson.hibernate.region.RedissonCollectionRegion,0.682665
AccessorInterceptor.java,30,30,30,0,47,47,47,0,1,1,1,0,64,64,64,0,24,24,24,0,9,9,9,0,1,1,1,0,2,2,2,0,7,7,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,0,4,4,4,0,0,0,0,0,5,5,5,0,0,0,0,0,0,0,0,0,5,5,5,0,0,0,0,0,15,15,15,0,228,228,228,0,18,18,18,0,0,0,0,0,3,3,3,0,1,1,1,0,14,14,14,0,20,20,20,0,5,5,5,0,38,38,38,0,2,2,2,0,33,33,33,0,4,4,4,0,0,0,0,0,0,0,0,0,0,0,0,0,113,113,113,0,1,1,1,0,10,142123,0,C:\Users\tanji\Desktop\SoftwareRemodularization\raw_sourcecode\Redisson\Redisson_redisson-3.16.0\redisson-hibernate\redisson-hibernate-4\src\main\java\org\redisson\hibernate\region\RedissonEntityRegion.java,org.redisson.hibernate.region.RedissonEntityRegion,1.31037
AddCacheOperation.java,9,9,9,0,9,9,9,0,3,3,3,0,6,6,6,0,0,0,0,0,8,8,8,0,0,0,0,0,8,8,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,4,4,0,0,0,0,0,0,0,0,0,4,4,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,50,50,50,0,3,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,6,6,6,0,0,0,0,0,6,6,6,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,29,29,29,0,1,1,1,0,10,165219,0,C:\Users\tanji\Desktop\SoftwareRemodularization\raw_sourcecode\Redisson\Redisson_redisson-3.16.0\redisson-hibernate\redisson-hibernate-4\src\main\java\org\redisson\hibernate\region\RedissonNaturalIdRegion.java,org.redisson.hibernate.region.RedissonNaturalIdRegion,-0.189756
AddOperation.java,8,8,8,0,5,5,5,0,3,3,3,0,5,5,5,0,0,0,0,0,5,5,5,0,0,0,0,0,5,5,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,32,32,32,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,4,4,0,0,0,0,0,4,4,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,21,21,21,0,1,1,1,0,10,165280,0,C:\Users\tanji\Desktop\SoftwareRemodularization\raw_sourcecode\Redisson\Redisson_redisson-3.16.0\redisson-hibernate\redisson-hibernate-4\src\main\java\org\redisson\hibernate\region\RedissonQueryRegion.java,org.redisson.hibernate.region.RedissonQueryRegion,-0.0694651
AdvBeanCopy.java,2,2,2,0,2,2,2,0,2,2,2,0,3,3,3,0,1,1,1,0,2,2,2,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,15,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13,13,13,0,1,1,1,0,10,142938,0,C:\Users\tanji\Desktop\SoftwareRemodularization\raw_sourcecode\Redisson\Redisson_redisson-3.16.0\redisson-hibernate\redisson-hibernate-4\src\main\java\org\redisson\hibernate\region\RedissonTimestampsRegion.java,org.redisson.hibernate.region.RedissonTimestampsRegion,-0.154463
AsyncCountDownLatch.java,0,0,0,0,5,5,5,0,1,1,1,0,3,3,3,0,0,0,0,0,2,2,2,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,20,20,20,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,1,1,1,0,2,2,2,0,2,2,2,0,0,0,0,0,2,2,2,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,20,20,20,0,1,1,1,0,10,144729,0,C:\Users\tanji\Desktop\SoftwareRemodularization\raw_sourcecode\Redisson\Redisson_redisson-3.16.0\redisson-hibernate\redisson-hibernate-4\src\main\java\org\redisson\hibernate\strategy\AbstractReadWriteAccessStrategy.java,org.redisson.hibernate.strategy.AbstractReadWriteAccessStrategy,-0.0570971




In [122]:
#saved_model.explain(prediction_df)

In [123]:
pd_preds['predicted_time'] = np.exp(pd_preds['predict'])
pd_preds['class_name'] = project_refactoring_data_agg['class_name']
pd_preds['refactor_destination'] = project_refactoring_data_agg['refactor_destination']
pd_preds['file'] = project_refactoring_data_agg['file']
pd_preds['class'] = project_refactoring_data_agg['class']

In [124]:
pd_preds['outer_class_1'] = pd_preds['class'].str.split('.').str[-2]

In [125]:
dev_feedback = pd_preds[['outer_class_1','predicted_time']].groupby('outer_class_1').agg(['sum','count']).reset_index()
dev_feedback.columns = ['outer_class_1', 'sum', 'count']
dev_feedback.sort_values('count', ascending=False)

Unnamed: 0,outer_class_1,sum,count
1,api,354.074466,224
41,redisson,274.251507,202
13,connection,91.868517,58
16,decoder,92.552954,55
46,rx,91.325702,43
54,transaction,94.307661,41
5,cache,82.120459,37
20,executor,61.440236,33
8,codec,77.658576,32
39,reactive,76.420171,29


In [126]:
dev_feedback.sort_values('sum', ascending=False).head(30)

Unnamed: 0,outer_class_1,sum,count
1,api,354.074466,224
41,redisson,274.251507,202
54,transaction,94.307661,41
16,decoder,92.552954,55
13,connection,91.868517,58
46,rx,91.325702,43
5,cache,82.120459,37
8,codec,77.658576,32
39,reactive,76.420171,29
32,misc,71.388024,27


In [127]:
print(pd_preds.shape)
pd_preds.head()

(1135, 7)


Unnamed: 0,predict,predicted_time,class_name,refactor_destination,file,class,outer_class_1
0,-0.401469,0.669336,ANDCondition.java,0,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,org.redisson.RedissonExtensionTest,redisson
1,0.883448,2.419226,AbstractCacheMap.java,0,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,org.redisson.hibernate.JndiRedissonRegionFactory,hibernate
2,-0.316169,0.728936,AbstractNamingScheme.java,0,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,org.redisson.hibernate.RedissonRegionFactory,hibernate
3,0.552962,1.738394,AbstractReadWriteAccessStrategy.java,151,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,org.redisson.hibernate.region.BaseRegion,region
4,0.682665,1.979146,AbstractRedissonNamespaceDefinitionParser.java,0,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,org.redisson.hibernate.region.RedissonCollecti...,region


In [128]:
pd_preds.to_csv(f'pipeline_prediction_output/{project_name}_{version_name}.csv', index=False)


In [45]:
#refactor_arr