In [2]:
import json
import pandas as pd
import numpy as np
import networkx as nx
import os
import shutil
import subprocess
import requests
from github import Github

from scipy.cluster.hierarchy import dendrogram, linkage
from matplotlib import pyplot as plt
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score
from sklearn import preprocessing
from sklearn.cluster import AgglomerativeClustering
from zipfile import ZipFile
from filecmp import dircmp
import configparser

In [3]:
config = configparser.ConfigParser()
config.read('authentication.ini')
print(config.sections())
github_oauth = config['github']['api_key']

['github']


In [4]:
project_releases = pd.read_csv('volatile_projects_complete_links_limit10_filtered.csv')

In [5]:
project_releases.head()

Unnamed: 0,project_name,project_link,version_name,commit,timestamp
0,Dbeaver,https://github.com/dbeaver/dbeaver,21.1.4,"Commit(sha=""113a0a672f277a6e8181757a0c54f92d42...",29/7/2021 11:08
1,Dbeaver,https://github.com/dbeaver/dbeaver,21.1.3,"Commit(sha=""4430459a3fe06c6140aa40b71ddc41ddf8...",15/7/2021 8:06
2,Dbeaver,https://github.com/dbeaver/dbeaver,21.1.2,"Commit(sha=""b0693d44048a9c50e750b6df69cfe83fcb...",2/7/2021 13:34
3,Dbeaver,https://github.com/dbeaver/dbeaver,21.1.1,"Commit(sha=""073dfc26c7a065f5d5abf18be8cce8258a...",18/6/2021 13:50
4,Dbeaver,https://github.com/dbeaver/dbeaver,21.1.0,"Commit(sha=""17ce2d14317b1160ec9480da549028d182...",28/5/2021 5:16


In [6]:
len(project_releases['project_name'].unique())

28

In [7]:
#project_releases = project_releases[project_releases['project_name'] == 'Redisson']

In [8]:
project_releases.head(10)

Unnamed: 0,project_name,project_link,version_name,commit,timestamp
0,Dbeaver,https://github.com/dbeaver/dbeaver,21.1.4,"Commit(sha=""113a0a672f277a6e8181757a0c54f92d42...",29/7/2021 11:08
1,Dbeaver,https://github.com/dbeaver/dbeaver,21.1.3,"Commit(sha=""4430459a3fe06c6140aa40b71ddc41ddf8...",15/7/2021 8:06
2,Dbeaver,https://github.com/dbeaver/dbeaver,21.1.2,"Commit(sha=""b0693d44048a9c50e750b6df69cfe83fcb...",2/7/2021 13:34
3,Dbeaver,https://github.com/dbeaver/dbeaver,21.1.1,"Commit(sha=""073dfc26c7a065f5d5abf18be8cce8258a...",18/6/2021 13:50
4,Dbeaver,https://github.com/dbeaver/dbeaver,21.1.0,"Commit(sha=""17ce2d14317b1160ec9480da549028d182...",28/5/2021 5:16
5,Dbeaver,https://github.com/dbeaver/dbeaver,21.0.5,"Commit(sha=""6176d108f6cea5e290f6913550f1c21563...",14/5/2021 14:25
6,Dbeaver,https://github.com/dbeaver/dbeaver,21.0.4,"Commit(sha=""3193b6d4ad7a60c02c227ee7531e0a3d4a...",2/5/2021 15:13
7,Dbeaver,https://github.com/dbeaver/dbeaver,21.0.3,"Commit(sha=""c5f672c865bdfacb6e243f0bf7821a691f...",17/4/2021 15:22
8,Dbeaver,https://github.com/dbeaver/dbeaver,21.0.2,"Commit(sha=""cab215bccefba49956e7a1ec1878a810d1...",4/4/2021 20:23
9,Dbeaver,https://github.com/dbeaver/dbeaver,21.0.1,"Commit(sha=""8cff034373319ebb496de5ca6dff7e5ba6...",18/3/2021 10:44


In [9]:
finished_project_arr = []
with open('finished_projects.txt','r') as finished_project:
    for line in finished_project:
        finished_project_arr.append(line.split('\n')[0])
    finished_project.close()
    
for project_name in project_releases['project_name'].unique():
    if project_name not in finished_project_arr:
        print("Currently running: " + project_name)
        current_project = pd.DataFrame(project_releases[project_releases['project_name'] == project_name])
        current_project['rank'] = current_project['timestamp'].rank()
        current_project = current_project.sort_values(by=['rank'], ascending=False)
        current_project['previous_version'] = current_project['version_name'].shift(-1)
        current_project['previous_version_timestamp'] = current_project['timestamp'].shift(-1)
        current_project['previous_version_timestamp'].fillna(current_project['timestamp'], inplace=True)
        current_project['timestamp']= pd.to_datetime(current_project['timestamp'])
        current_project['previous_version_timestamp']= pd.to_datetime(current_project['previous_version_timestamp'])
        current_project['timestamp_diff'] = current_project['timestamp'] - current_project['previous_version_timestamp']
        current_project['timestamp_diff_hours'] = current_project['timestamp_diff'].astype('timedelta64[h]')
        current_project.tail()
        
        """
        ### Download dataset
        try:
            os.mkdir('raw_sourcecode/' + project_name)
        except:
            pass
        for row in current_project.iterrows():
            command = 'cd E:/SoftwareRemodularization/raw_sourcecode/' + project_name +' & mkdir ' + project_name + '_' +row[1]['version_name']

            #print(command)
            os.system(command)

            command = 'git clone ' + row[1]['project_link'] +  ' E:/SoftwareRemodularization/raw_sourcecode/' + project_name + '/' + project_name + '_' + row[1]['version_name']

            #print(command)
            os.system(command)
            commit = row[1]['commit'].replace('Commit(sha="','')
            commit = commit.replace('")','')
            command = 'cd E:/SoftwareRemodularization/raw_sourcecode/' + project_name + '/' + project_name +'_' + row[1]['version_name'] + ' & git checkout ' + commit
            #print(command)
            os.system(command)
        
        try:
            os.mkdir('raw_depends/' + project_name)
        except:
            pass
        for row in current_project.iterrows():
            #command = 'cd E:/SoftwareRemodularization/raw_depends/' + project_name +' & mkdir ' + project_name + '_' +row[1]['version_name']
            #os.system(command)
            #print(command)
            command = 'cd E:/SoftwareRemodularization/depends-0.9.2 & ' + 'java -jar depends.jar java E:/SoftwareRemodularization/raw_sourcecode/'  + project_name + '/' + project_name + '_' + row[1]['version_name']+  ' ../raw_depends/' + project_name + "/" + project_name + '_' + row[1]['version_name'] 
            #print(command)
            os.system(command)

        first_tag = current_project.head(1)['version_name'].values[0]
        last_tag= current_project.tail(1)['version_name'].values[0]

        #print(first_tag)
        #print(last_tag)

        
        try:
            os.mkdir('raw_refactoringMiner/' + project_name)
        except:
            pass
        
        try:
            for row in current_project.iterrows():
                #command = 'cd E:/SoftwareRemodularization/raw_depends/' + project_name +' & mkdir ' + project_name + '_' +row[1]['version_name']
                #os.system(command)
                #print(command)
                command = 'cd E:/SoftwareRemodularization/RefactoringMiner-2.1.0/bin & ' + 'RefactoringMiner -bt E:/SoftwareRemodularization/raw_sourcecode/'  + project_name + '/' + project_name + '_' + row[1]['version_name'] + ' ' + row[1]['previous_version'] + ' ' + row[1]['version_name'] + ' -json ' + 'E:/SoftwareRemodularization/raw_refactoringMiner/' +project_name + '/'+ project_name + '_' + row[1]['version_name'] + '.json'
                #print(command)
                os.system(command)
        except:
            pass
        """
       
        
        
        
        
        main_data_arr = []
        
        for row in current_project.iterrows():
            try:
                refactoring_miner_filename = f'raw_refactoringMiner/{project_name}/' + project_name + '_' + row[1]['version_name'] + '.json'
                depends_filename = f'raw_depends/{project_name}/' + project_name + '_' + row[1]['version_name'] + '.json'
                version_name = row[1]['version_name']
                #ck_metrics = pd.read_csv(f'ck_results_invi/{project_name}' + '-' + row[1]['version_name'] + '_class.csv')
                #command = 'java -jar ck-0.3.3-SNAPSHOT-jar-with-dependencies.jar raw_sourcecode/' + project_name + '/' + project_name + '_' + row[1]['version_name']
                #print(command)
                #os.system(command)
                ck_metrics_filename = f'ck_results_invi/{project_name}_{version_name}_class.csv'
                ck_metrics = pd.read_csv(ck_metrics_filename)
                
                
                latest_commit = row[1]['commit'].replace('Commit(sha="','')
                latest_commit = latest_commit.replace('")','')
                #print(latest_commit)
                #print(refactoring_miner_filename)

                #print(depends_filename)
                with open(depends_filename) as f:
                    depends_results = json.load(f)
                    index  = 0
                    file_dict = {}
                    for value in depends_results['variables']:
                        file_path = value.replace('\\','/')
                        string = f"E:/SoftwareRemodularization/raw_sourcecode/{project_name}/{project_name}_{row[1]['version_name']}/"
                        #print(string)
                        file_path = file_path.replace(string, '')
                        #var_array.append([index, value.replace('\\','/')])
                        #var_array.append([index,file_path])
                        file_dict[file_path] = index
                        #var_array.append(file_path)
                        #var_array.append([index, value.split('\\')[-1]])
                        #print(index, value)
                        index += 1

                    #var_df = pd.DataFrame(var_array)
                    #var_df.columns = ['index_val', 'name']
                    #var_df = var_df.set_index('name')

                    feature_list = {}
                    feature_index = 2
                    for element in depends_results['cells']:
                        #print(element)
                        try:
                            for a in element['values']:
                                if a not in feature_list:
                                    feature_list[a] = feature_index
                                    feature_index += 1
                                #print(a['Call'])


                        except:
                            pass

                    feature_arr = []
                    for element in depends_results['cells']:
                        #print(array)
                        array = [0] * (len(feature_list) + 2)
                        values = dict(element['values'])
                        #print(element)
                        array[0] = element['src']
                        array[1] = element['dest']
                        for feature in feature_list:
                            try:
                                value = values[feature]
                                array[feature_list[feature]] = value
                            except:
                                pass
                        #print(array)
                        feature_arr.append(array)

                    feature_df = pd.DataFrame(feature_arr)
                    col_names = ['src', 'dest']
                    for element in feature_list:
                        col_names.append(element)
                    feature_df.columns = col_names

                    feature_df['sum'] = feature_df.sum(axis=1) - feature_df['src'] - feature_df['dest']

                G = nx.Graph()
                for index, row in feature_df.iterrows():
                    G.add_edge(row['src'], row['dest'], weight=row['sum'])

                sum_dependency_df = nx.to_pandas_adjacency(G)
                sum_dependency_df['sum'] = sum_dependency_df.sum(axis=1)
                #sum_dependency_df
                final_dependency_df = sum_dependency_df['sum']
                
               

                try:
                    with open(refactoring_miner_filename) as f:
                        #print(refactoring_miner_filename, 'stuck here')
                        refactoring_miner = json.load(f)
                        
                        

                        for i in refactoring_miner['commits']:

                            if len(i['refactorings']) != 0:
                                commit_compared_with = i['sha1']
                                file_arr = []
                                num_line_affected = 0
                                num_dependency = 0
                                actual_num_of_classes_touched = 0
                                for refactor in i['refactorings']:
                                    #print(refactor)
                                    for file in refactor['rightSideLocations']:

                                        #file_arr.append(file['filePath'])
                                        #print(file['filePath'], 'here')
                                        #print(file_dict[file['filePath']])
                                        actual_num_of_classes_touched += 1
                                        try:
                                            num_dependency = final_dependency_df.iloc[file_dict[file['filePath']]]
                                            num_line_affected = file['endLine'] - file['startLine']
                                            
                                            class_metrics = ck_metrics.loc[ck_metrics['class'] == file['codeElement']].values.tolist()[0]
                                            #print(class_metrics)
                                            if num_dependency != 0 and num_line_affected != 0:
                                                #print('added to main_data_arr')
                                                class_metrics.extend([latest_commit, commit_compared_with, num_dependency, num_line_affected, actual_num_of_classes_touched])
                                                main_data_arr.append(class_metrics)
                                                #main_data_arr.append([latest_commit, commit_compared_with, num_dependency, num_line_affected, actual_num_of_classes_touched, class_metrics])
                                                
                                        except:
                                            pass

                                    for file in refactor['leftSideLocations']:

                                        #file_arr.append(file['filePath'])
                                        #print(file['filePath'], 'here')
                                        #print(file_dict[file['filePath']])
                                        #actual_num_of_classes_touched += 1
                                        try:
                                            num_dependency = final_dependency_df.iloc[file_dict[file['filePath']]]
                                            num_line_affected = file['endLine'] - file['startLine']
                                            
                                            class_metrics = ck_metrics.loc[ck_metrics['class'] == file['codeElement']].tolist()[0]
                                            #print(class_metrics)
                                            
                                            if num_dependency != 0 and num_line_affected != 0:
                                                #print('added to main_data_arr')
                                                class_metrics.extend([latest_commit, commit_compared_with, num_dependency, num_line_affected, actual_num_of_classes_touched])
                                                main_data_arr.append(class_metrics)
                                                #main_data_arr.append([latest_commit, commit_compared_with, num_dependency, num_line_affected, actual_num_of_classes_touched,class_metrics])
                                                
                                        except:
                                            pass

                               
                
                except Exception as e:
                    print(e)

            except Exception as e:
                print(e)

        #print(main_data_arr, 'main_data_arr')
        effort_data_df = pd.DataFrame(main_data_arr)
        column_names = []
        for element in ck_metrics.columns:
            column_names.append(element)
        column_names.extend(['latest_commit', 'commit_compared_with', 'num_dependency', 'num_line_affected', 'actual_num_of_classes_touched'])
        effort_data_df.columns = column_names
        #effort_data_df.columns = ['latest_commit', 'commit_compared_with', 'num_dependency', 'num_line_affected', 'actual_num_of_classes_touched']

        latest_commit_arr = []
        commit_compared_with_arr = []
        author_arr = []
        author_email_arr = []
        
        """
        g = Github(github_oauth, timeout=3000)
        repo = g.get_repo(current_project.iloc[0]['project_link'].replace('https://github.com/',''))
        
        for row in effort_data_df.iterrows():
            sha = row[1]['latest_commit']
            commit = repo.get_commit(sha=sha)
            #print(commit)
            date = commit.commit.author.date
            #print(date)
            latest_commit_arr.append(date)

            sha = row[1]['commit_compared_with']
            commit = repo.get_commit(sha=sha)
            date = commit.commit.author.date
            commit_compared_with_arr.append(date)
            
            author_arr.append(commit.commit.author.name)
            author_email_arr.append(commit.commit.author.email)
            

        effort_data_df['latest_commit_date'] = latest_commit_arr
        effort_data_df['commit_compared_with_date'] = commit_compared_with_arr
        effort_data_df['author'] = author_arr
        effort_data_df['author_email'] = author_email_arr
        
        """
        effort_data_df.to_csv('Effort_Estimation_Results/'+ project_name + '.csv', index=False)
        
        
        with open('finished_projects.txt','a+') as finished_project:
            finished_project.write(project_name + '\n')
            finished_project.close()
            print('Finished running: ' + project_name)

Currently running: Camel
[Errno 2] No such file or directory: 'raw_refactoringMiner/Camel/Camel_camel-3.6.0.json'
Finished running: Camel
Currently running: Cxf
[Errno 2] No such file or directory: 'raw_refactoringMiner/Cxf/Cxf_cxf-3.3.5.json'
Finished running: Cxf
Currently running: Hadoop
[Errno 2] No such file or directory: 'raw_refactoringMiner/Hadoop/Hadoop_submarine-0.2.0-RC0.json'
Finished running: Hadoop
Currently running: Alluxio
[Errno 2] No such file or directory: 'raw_refactoringMiner/Alluxio/Alluxio_v2.5.0-1.json'
Finished running: Alluxio
Currently running: Beam
[Errno 2] No such file or directory: 'raw_refactoringMiner/Beam/Beam_website-to-hugo.json'
Finished running: Beam
Currently running: Atmosphere
[Errno 2] No such file or directory: 'ck_results_invi/Atmosphere_atmosphere-project-2.7.1_class.csv'
[Errno 2] No such file or directory: 'ck_results_invi/Atmosphere_atmosphere-project-3.0.0-alpha2_class.csv'
[Errno 2] No such file or directory: 'ck_results_invi/Atmosphere

ValueError: Length mismatch: Expected axis has 0 elements, new values have 48 elements

In [37]:
num_dependency += final_dependency_df[file_dict[file['filePath']]]

In [38]:
effort_data_df

Unnamed: 0,latest_commit,commit_compared_with,num_dependency,num_line_affected,actual_num_of_classes_touched,latest_commit_date,commit_compared_with_date,author,author_email
0,b84627ef4bc43f2096c1b764720f9ff2387dca32,3e331c108905a97fa9718b40844ddc1356fc86b5,840.0,1740,8,2021-01-30 19:44:19,2020-10-18 12:13:09,Jesse Wilson,jesse@swank.ca
1,b84627ef4bc43f2096c1b764720f9ff2387dca32,9ee33446f83932a2bb001e5ef2156a3d5de5838d,8831.0,10,19,2021-01-30 19:44:19,2020-10-18 16:37:48,Jesse Wilson,jesse@swank.ca
2,b84627ef4bc43f2096c1b764720f9ff2387dca32,f8065acac28a0b86f3bdbd854ac87af0789416d0,1215.0,8054,43,2021-01-30 19:44:19,2020-10-30 06:59:02,Jesse Wilson,jesse@swank.ca
3,b84627ef4bc43f2096c1b764720f9ff2387dca32,743a260b2ad8fa9830a953710dca53ea7525151b,1188.0,159,22,2021-01-30 19:44:19,2020-10-31 12:10:08,Yuri Schimke,yuri@schimke.ee
4,b84627ef4bc43f2096c1b764720f9ff2387dca32,e2b868c538b9eb103cc4a7ac0f3bd6c82df4057f,644.0,10,14,2021-01-30 19:44:19,2020-10-31 12:10:34,Yuri Schimke,yuri@schimke.ee
5,b84627ef4bc43f2096c1b764720f9ff2387dca32,963c76864344d0d55ba7421f474c130a958a9649,55869.0,5563,732,2021-01-30 19:44:19,2020-10-31 22:51:34,Jesse Wilson,jesse@swank.ca
6,b84627ef4bc43f2096c1b764720f9ff2387dca32,59ae0c138d6468aac5d92781b19cddc26e04e862,11847.0,2772,109,2021-01-30 19:44:19,2020-11-01 05:46:35,Jesse Wilson,jesse@swank.ca
7,b84627ef4bc43f2096c1b764720f9ff2387dca32,d80df110935cc4dc721789d6b343c399703fca16,108.0,14,3,2021-01-30 19:44:19,2020-11-01 14:10:13,Yuri Schimke,yuri@schimke.ee
8,b84627ef4bc43f2096c1b764720f9ff2387dca32,47a852e8de40dca245cb18a9f3f4c76ad93fda38,418.0,64,7,2021-01-30 19:44:19,2020-11-01 16:18:59,Yuri Schimke,yuri@schimke.ee
9,b84627ef4bc43f2096c1b764720f9ff2387dca32,4677beea96f0afb1e061f119e52e0203d4cd3738,42.0,589,4,2021-01-30 19:44:19,2020-11-03 19:56:29,Yuri Schimke,yuri@schimke.ee


In [48]:
users = g.search_users("jesse@swank.ca in:email")
for user in users:
    print(user.login)
    print(user.public_repos)
    print(user.contributions)

In [49]:
users = g.search_users("Jesse Wilson in:name")
for user in users:
    print(user.login)
    print(user.public_repos)
    print(user.contributions)

swankjesse
21
None
wilsonjwcsu
9
None
jesseswilson
5
None
jessewilson
0
None
jpwil93
7
None
HACKPLUSPLUS
7
None
uhthing
7
None
JesseAlexanderWilson
5
None
code4spark
3
None
lilocowboy
0
None
Jesseadamwilson
0
None


In [47]:
user.public_repos

106

In [29]:
user.contributions

In [17]:
file_dict[file['filePath']]

21

In [18]:
final_dependency_df

146.0    23.0
145.0    25.0
143.0    11.0
142.0    14.0
141.0    13.0
         ... 
43.0     14.0
58.0      3.0
77.0      3.0
72.0     29.0
96.0      4.0
Name: sum, Length: 65, dtype: float64

In [44]:
final_dependency_df.iloc[1]

4.0

In [46]:
final_dependency_df.iloc[file_dict[file['filePath']]]

4.0

In [16]:
current_project

Unnamed: 0,project_name,project_link,version_name,commit,timestamp,rank,previous_version,previous_version_timestamp,timestamp_diff,timestamp_diff_hours
897,Okhttp,https://github.com/square/okhttp,parent-5.0.0-alpha.2,"Commit(sha=""b84627ef4bc43f2096c1b764720f9ff238...",2021-01-30 19:44:19,5.0,parent-5.0.0-alpha.1,2021-01-30 18:36:37,0 days 01:07:42,1.0
898,Okhttp,https://github.com/square/okhttp,parent-5.0.0-alpha.1,"Commit(sha=""79d2a8d192d007b8acb2423d3ab6bba3a4...",2021-01-30 18:36:37,4.0,parent-4.9.1,2021-01-30 18:09:57,0 days 00:26:40,0.0
900,Okhttp,https://github.com/square/okhttp,parent-4.9.1,"Commit(sha=""63dcd95bfa2345bb3f3d4abc6b6dbf36cf...",2021-01-30 18:09:57,3.0,parent-4.10.0-RC1,2020-10-07 03:28:26,115 days 14:41:31,2774.0
899,Okhttp,https://github.com/square/okhttp,parent-4.10.0-RC1,"Commit(sha=""4fd1e8f99833eebdd2e99f3456322aa197...",2020-10-07 03:28:26,2.0,parent-4.9.0,2020-09-11 21:08:20,25 days 06:20:06,606.0
901,Okhttp,https://github.com/square/okhttp,parent-4.9.0,"Commit(sha=""cbeaf8f955fff9caa5652ccc6c1393ec8b...",2020-09-11 21:08:20,1.0,,2020-09-11 21:08:20,0 days 00:00:00,0.0


In [10]:
refactoring_miner_filename

NameError: name 'refactoring_miner_filename' is not defined

In [None]:
f.close()

In [None]:
g = Github(github_oauth, timeout=3000)
print(current_project.iloc[0]['project_link'].replace('https://github.com/',''))
repo = g.get_repo(current_project.iloc[0]['project_link'].replace('https://github.com/',''))
print(repo)

In [None]:
with open('raw_refactoringMiner/Okhttp/Okhttp_parent-5.0.0-alpha.1.json') as f:
    refactoring_miner = json.load(f)
    
for i in refactoring_miner['commits']:

    if len(i['refactorings']) != 0:
        commit_compared_with = i['sha1']
        file_arr = []
        num_line_affected = 0
        num_dependency = 0
        actual_num_of_classes_touched = 0
        for refactor in i['refactorings']:
            #print(refactor)
            for file in refactor['rightSideLocations']:

                #file_arr.append(file['filePath'])
                #print(file['filePath'], 'here')
                #print(file_dict[file['filePath']])
                actual_num_of_classes_touched += 1
                num_dependency += final_dependency_df[file_dict[file['filePath']]]
                num_line_affected += file['endLine'] - file['startLine']
               
        if num_dependency != 0 and num_line_affected != 0:
            main_data_arr.append([latest_commit, commit_compared_with, num_dependency, num_line_affected, actual_num_of_classes_touched])



In [19]:
effort_data_df

Unnamed: 0,latest_commit,commit_compared_with,num_dependency,num_line_affected,actual_num_of_classes_touched,latest_commit_date,commit_compared_with_date
0,b84627ef4bc43f2096c1b764720f9ff2387dca32,3e331c108905a97fa9718b40844ddc1356fc86b5,840.0,1740,8,2021-01-30 19:44:19,2020-10-18 12:13:09
1,b84627ef4bc43f2096c1b764720f9ff2387dca32,9ee33446f83932a2bb001e5ef2156a3d5de5838d,8831.0,10,19,2021-01-30 19:44:19,2020-10-18 16:37:48
2,b84627ef4bc43f2096c1b764720f9ff2387dca32,f8065acac28a0b86f3bdbd854ac87af0789416d0,1215.0,8054,43,2021-01-30 19:44:19,2020-10-30 06:59:02
3,b84627ef4bc43f2096c1b764720f9ff2387dca32,743a260b2ad8fa9830a953710dca53ea7525151b,1188.0,159,22,2021-01-30 19:44:19,2020-10-31 12:10:08
4,b84627ef4bc43f2096c1b764720f9ff2387dca32,e2b868c538b9eb103cc4a7ac0f3bd6c82df4057f,644.0,10,14,2021-01-30 19:44:19,2020-10-31 12:10:34
5,b84627ef4bc43f2096c1b764720f9ff2387dca32,963c76864344d0d55ba7421f474c130a958a9649,55869.0,5563,732,2021-01-30 19:44:19,2020-10-31 22:51:34
6,b84627ef4bc43f2096c1b764720f9ff2387dca32,59ae0c138d6468aac5d92781b19cddc26e04e862,11847.0,2772,109,2021-01-30 19:44:19,2020-11-01 05:46:35
7,b84627ef4bc43f2096c1b764720f9ff2387dca32,d80df110935cc4dc721789d6b343c399703fca16,108.0,14,3,2021-01-30 19:44:19,2020-11-01 14:10:13
8,b84627ef4bc43f2096c1b764720f9ff2387dca32,47a852e8de40dca245cb18a9f3f4c76ad93fda38,418.0,64,7,2021-01-30 19:44:19,2020-11-01 16:18:59
9,b84627ef4bc43f2096c1b764720f9ff2387dca32,4677beea96f0afb1e061f119e52e0203d4cd3738,42.0,589,4,2021-01-30 19:44:19,2020-11-03 19:56:29


In [None]:
for row in effort_data_df.iterrows():
    sha = row[1]['latest_commit']
    commit = repo.get_commit(sha=sha)
    print(commit)
    date = commit.commit.author.date
    author = commit.commit.author
    print(date)
    latest_commit_arr.append(date)

    sha = row[1]['commit_compared_with']
    commit = repo.get_commit(sha=sha)
    date = commit.commit.author.date
    commit_compared_with_arr.append(date)

effort_data_df['latest_commit_date'] = latest_commit_arr
effort_data_df['commit_compared_with_date'] = commit_compared_with_arr

effort_data_df.to_csv('Effort_Estimation_Results/'+ project_name + '.csv', index=False)

In [73]:
ck_metrics = pd.read_csv('ck_results_invi/Okhttp_parent-5.0.0-alpha.1_class.csv')
ck_metrics['className'] = ck_metrics['class'].str.split('.').str[-1] + '.java'
ck_metrics.head()

Unnamed: 0,file,class,type,cbo,wmc,dit,rfc,lcom,totalMethods,staticMethods,...,assignmentsQty,mathOperationsQty,variablesQty,maxNestedBlocks,anonymousClassesQty,subClassesQty,lambdasQty,uniqueWordsQty,modifiers,className
0,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,okhttp3.mockwebserver.CustomDispatcherTest,class,8,3,1,14,1,3,0,...,14,0,14,0,2,0,1,53,1,CustomDispatcherTest.java
1,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,okhttp3.mockwebserver.internal.http2.Http2Server,class,9,33,2,53,32,9,1,...,26,7,24,3,0,0,0,80,17,Http2Server.java
2,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,okhttp3.mockwebserver.MockWebServerTest,class,21,54,1,117,0,39,1,...,97,12,97,3,1,0,1,212,17,MockWebServerTest.java
3,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,okhttp3.mockwebserver.RecordedRequestTest,class,8,5,2,7,0,5,0,...,12,0,12,0,0,1,0,35,1,RecordedRequestTest.java
4,C:\Users\tanji\Desktop\SoftwareRemodularizatio...,okhttp3.curl.MainTest,class,8,12,1,18,55,11,2,...,13,0,13,1,0,0,0,47,1,MainTest.java


In [74]:
test = ck_metrics.loc[ck_metrics['class'] == 'okhttp3.sse.internal.ServerSentEventIteratorTest']
print(test)
print(test.values.tolist())

                                                  file  \
105  C:\Users\tanji\Desktop\SoftwareRemodularizatio...   

                                                class   type  cbo  wmc  dit  \
105  okhttp3.sse.internal.ServerSentEventIteratorTest  class    6   19    1   

     rfc  lcom  totalMethods  staticMethods  ...  assignmentsQty  \
105   10     0            18              0  ...               4   

     mathOperationsQty  variablesQty  maxNestedBlocks  anonymousClassesQty  \
105                 17             4                0                    1   

     subClassesQty  lambdasQty  uniqueWordsQty  modifiers  \
105              0           0              70         17   

                            className  
105  ServerSentEventIteratorTest.java  

[1 rows x 44 columns]
[['C:\\Users\\tanji\\Desktop\\SoftwareRemodularization\\raw_sourcecode\\Okhttp\\Okhttp_parent-5.0.0-alpha.1\\okhttp-sse\\src\\test\\java\\okhttp3\\sse\\internal\\ServerSentEventIteratorTest.java', 'okhttp