In [11]:
import json
import pandas as pd
import numpy as np
import networkx as nx
import jellyfish
import os
import shutil
import subprocess
import requests
from github import Github
from git import Repo
from scipy.cluster.hierarchy import dendrogram, linkage
from matplotlib import pyplot as plt
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score
from sklearn import preprocessing
from sklearn.cluster import AgglomerativeClustering
from zipfile import ZipFile
from filecmp import dircmp
import configparser

In [20]:
config = configparser.ConfigParser()
config.read('authentication.ini')
print(config.sections())
github_oauth = config['github']['api_key'] 

['github']


In [21]:
g = Github('tanjianjia@gmail.com', github_oauth, timeout=3000)

In [22]:
volatile_projects = pd.read_csv('volatile_projects_test.csv')

In [23]:
volatile_projects.head()

Unnamed: 0,project_name,project_link
0,okhttp,https://github.com/square/okhttp


In [9]:
main_arr = []

for row in volatile_projects.iterrows():
    
    repo = g.get_repo(row[1]['project_link'].replace('https://github.com/',''))
    for release in repo.get_tags():
        temp_arr = []
        temp_arr.append(row[1]['project_name'])
        temp_arr.append(row[1]['project_link'])
        temp_arr.append(release.name)
        temp_arr.append(release.commit)
        
        commit = repo.get_commit(sha=release.commit.sha)
        date = commit.commit.author.date
        
        temp_arr.append(date)
        
        main_arr.append(temp_arr)
    print(row[1]['project_name'], 'Success')

okhttp Success


In [7]:
new_links = pd.DataFrame(main_arr)
new_links.columns = ['project_name', 'project_link', 'version_name', 'commit', 'timestamp']
new_links.head()

Unnamed: 0,project_name,project_link,version_name,commit,timestamp
0,okhttp,https://github.com/square/okhttp,parent-5.0.0-alpha.2,"Commit(sha=""b84627ef4bc43f2096c1b764720f9ff238...",2021-01-30 19:44:19
1,okhttp,https://github.com/square/okhttp,parent-5.0.0-alpha.1,"Commit(sha=""79d2a8d192d007b8acb2423d3ab6bba3a4...",2021-01-30 18:36:37
2,okhttp,https://github.com/square/okhttp,parent-4.10.0-RC1,"Commit(sha=""4fd1e8f99833eebdd2e99f3456322aa197...",2020-10-07 03:28:26
3,okhttp,https://github.com/square/okhttp,parent-4.9.1,"Commit(sha=""63dcd95bfa2345bb3f3d4abc6b6dbf36cf...",2021-01-30 18:09:57
4,okhttp,https://github.com/square/okhttp,parent-4.9.0,"Commit(sha=""cbeaf8f955fff9caa5652ccc6c1393ec8b...",2020-09-11 21:08:20


In [8]:
len(new_links)

109

In [9]:
new_links['major_version'] = new_links['version_name'].apply(lambda x: x.split('.')[0])
new_links['major_version'] = new_links['major_version'].apply(lambda x: x.split('-')[-1])
new_links['major_version'] = new_links['major_version'].apply(lambda x: x.split('/')[-1])
new_links['major_version'] = new_links['major_version'].apply(lambda x: x.split('_')[-1])
new_links['major_version'] = new_links['major_version'].apply(lambda x: x.replace('v',''))
new_links['major_version'] = new_links['major_version'].apply(lambda x: x.replace('release',''))
new_links['major_version_int'] = pd.to_numeric(new_links['major_version'],errors='coerce')

In [10]:
new_links.head()

Unnamed: 0,project_name,project_link,version_name,commit,timestamp,major_version,major_version_int
0,okhttp,https://github.com/square/okhttp,parent-5.0.0-alpha.2,"Commit(sha=""b84627ef4bc43f2096c1b764720f9ff238...",2021-01-30 19:44:19,5,5.0
1,okhttp,https://github.com/square/okhttp,parent-5.0.0-alpha.1,"Commit(sha=""79d2a8d192d007b8acb2423d3ab6bba3a4...",2021-01-30 18:36:37,5,5.0
2,okhttp,https://github.com/square/okhttp,parent-4.10.0-RC1,"Commit(sha=""4fd1e8f99833eebdd2e99f3456322aa197...",2020-10-07 03:28:26,4,4.0
3,okhttp,https://github.com/square/okhttp,parent-4.9.1,"Commit(sha=""63dcd95bfa2345bb3f3d4abc6b6dbf36cf...",2021-01-30 18:09:57,4,4.0
4,okhttp,https://github.com/square/okhttp,parent-4.9.0,"Commit(sha=""cbeaf8f955fff9caa5652ccc6c1393ec8b...",2020-09-11 21:08:20,4,4.0


In [11]:
cleaned_links = new_links.groupby(['project_name','major_version']).agg(['count']).reset_index()
cleaned_links.columns = cleaned_links.columns.droplevel()
cleaned_links.columns = ['project_name', 'major_version', 'count', 'count1', 'count2', 'count3', 'count4']
cleaned_links = cleaned_links[['project_name', 'major_version', 'count']]
cleaned_links.head()

Unnamed: 0,project_name,major_version,count
0,okhttp,1,14
1,okhttp,2,17
2,okhttp,3,48
3,okhttp,4,27
4,okhttp,5,2


In [12]:
cleaned_links_requirement_fulfill = cleaned_links[cleaned_links['count'] >= 20]
cleaned_links_requirement_fulfill = cleaned_links_requirement_fulfill.sort_values(['project_name', 'major_version'], ascending=False)
cleaned_links_requirement_fulfill = cleaned_links_requirement_fulfill[cleaned_links_requirement_fulfill['project_name'] != 'socketio-socket.io-client-java']
cleaned_links_requirement_fulfill = cleaned_links_requirement_fulfill[cleaned_links_requirement_fulfill['project_name'] != 'apache-sling-org-apache-sling-testing-sling-mock']
cleaned_links_requirement_fulfill = cleaned_links_requirement_fulfill[cleaned_links_requirement_fulfill['project_name'] != 'apache-sling-org-apache-sling-scripting-jsp']
cleaned_links_requirement_fulfill = cleaned_links_requirement_fulfill[cleaned_links_requirement_fulfill['project_name'] != 'apache-sling-org-apache-sling-resourceresolver']
cleaned_links_requirement_fulfill = cleaned_links_requirement_fulfill[cleaned_links_requirement_fulfill['project_name'] != 'apache-sling-org-apache-sling-event']
cleaned_links_requirement_fulfill['major_version'] = cleaned_links_requirement_fulfill['major_version'].apply(lambda x: x.split('-')[-1])
cleaned_links_requirement_fulfill['major_version'] = cleaned_links_requirement_fulfill['major_version'].apply(lambda x: x.split('/')[-1])
cleaned_links_requirement_fulfill['major_version'] = cleaned_links_requirement_fulfill['major_version'].apply(lambda x: x.split('_')[-1])
cleaned_links_requirement_fulfill['major_version'] = cleaned_links_requirement_fulfill['major_version'].apply(lambda x: x.replace('v',''))
cleaned_links_requirement_fulfill['major_version'] = cleaned_links_requirement_fulfill['major_version'].apply(lambda x: x.replace('r',''))
cleaned_links_requirement_fulfill['major_version'] = pd.to_numeric(cleaned_links_requirement_fulfill['major_version'])
cleaned_links_requirement_fulfill['count'] = cleaned_links_requirement_fulfill['count'].astype(str)
cleaned_links_requirement_fulfill['rank'] = cleaned_links_requirement_fulfill.groupby('project_name').rank(method='min',ascending=False)
cleaned_links_requirement_fulfill.head(5)

Unnamed: 0,project_name,major_version,count,rank
3,okhttp,4,27,1.0
2,okhttp,3,48,2.0


In [13]:
project_links_rank_1 = cleaned_links_requirement_fulfill[cleaned_links_requirement_fulfill['rank'] == 1]
project_links_rank_1['major_version_int'] = project_links_rank_1['major_version']
project_links_rank_1.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,project_name,major_version,count,rank,major_version_int
3,okhttp,4,27,1.0,4


In [14]:
final_links = pd.merge(new_links, project_links_rank_1, how='right', on=['project_name', 'major_version_int'])
final_links.tail()

Unnamed: 0,project_name,project_link,version_name,commit,timestamp,major_version_x,major_version_int,major_version_y,count,rank
22,okhttp,https://github.com/square/okhttp,parent-4.0.0-alpha02,"Commit(sha=""7925bfc5c5da1605486e37a9360ab03f1e...",2019-05-26 00:53:59,4,4.0,4,27,1.0
23,okhttp,https://github.com/square/okhttp,parent-4.0.0-RC3,"Commit(sha=""bad333c0a31904ff76b0d67ab8c46d085c...",2019-06-24 23:44:19,4,4.0,4,27,1.0
24,okhttp,https://github.com/square/okhttp,parent-4.0.0-RC2,"Commit(sha=""8603e2d20e4335a7a530f90a2f6439d16b...",2019-06-21 14:01:40,4,4.0,4,27,1.0
25,okhttp,https://github.com/square/okhttp,parent-4.0.0-RC1,"Commit(sha=""148938a17895ec72ee09b6bb4d23fb2bd7...",2019-06-04 04:28:50,4,4.0,4,27,1.0
26,okhttp,https://github.com/square/okhttp,parent-4.0.0-ALPHA01,"Commit(sha=""8f21b934f928986bba7e50114911c3c494...",2019-05-09 00:54:28,4,4.0,4,27,1.0


In [15]:
final_20limit_links = final_links.groupby('project_name').head(20)

In [16]:
final_20limit_links.to_csv('volatile_projects_complete_links_limit20.csv', index=False)

## Skip the following 2 cells for full run

In [25]:
project_name = 'okhttp'
project_releases = pd.read_csv('volatile_projects_complete_links_limit20.csv')

#project_releases = project_releases[project_releases['project_name'] == 'apache-spark']

In [26]:
current_project = pd.DataFrame(project_releases[project_releases['project_name'] == project_name])
current_project['rank'] = current_project['timestamp'].rank()
current_project = current_project.sort_values(by=['rank'], ascending=False)
current_project['previous_version'] = current_project['version_name'].shift(-1)
current_project['previous_version_timestamp'] = current_project['timestamp'].shift(-1)
current_project['previous_version_timestamp'].fillna(current_project['timestamp'], inplace=True)
current_project['timestamp']= pd.to_datetime(current_project['timestamp'])
current_project['previous_version_timestamp']= pd.to_datetime(current_project['previous_version_timestamp'])
current_project['timestamp_diff'] = current_project['timestamp'] - current_project['previous_version_timestamp']
current_project['timestamp_diff_hours'] = current_project['timestamp_diff'].astype('timedelta64[h]')
current_project.tail()

Unnamed: 0,project_name,project_link,version_name,commit,timestamp,major_version_x,major_version_int,major_version_y,count,rank,previous_version,previous_version_timestamp,timestamp_diff,timestamp_diff_hours
15,okhttp,https://github.com/square/okhttp,parent-4.2.2,"Commit(sha=""d02340f9dfac4ead42befc1a4d477b4540...",2019-10-06 20:18:49,4,4.0,4,27,5.0,parent-4.2.1,2019-10-02 12:53:27,4 days 07:25:22,103.0
16,okhttp,https://github.com/square/okhttp,parent-4.2.1,"Commit(sha=""57a165b69c6551c1caec8a557e0e9c9abf...",2019-10-02 12:53:27,4,4.0,4,27,4.0,parent-4.2.0,2019-09-10 17:04:12,21 days 19:49:15,523.0
17,okhttp,https://github.com/square/okhttp,parent-4.2.0,"Commit(sha=""582f8ef2f78cf001d479cb65831674289f...",2019-09-10 17:04:12,4,4.0,4,27,3.0,parent-4.1.1,2019-09-05 04:24:05,5 days 12:40:07,132.0
18,okhttp,https://github.com/square/okhttp,parent-4.1.1,"Commit(sha=""cf93aca33cc30a44724a8fb9e7042ccd17...",2019-09-05 04:24:05,4,4.0,4,27,2.0,parent-4.1.0,2019-08-12 17:00:00,23 days 11:24:05,563.0
19,okhttp,https://github.com/square/okhttp,parent-4.1.0,"Commit(sha=""4739b278066c25de7d1fcada943e0aaddd...",2019-08-12 17:00:00,4,4.0,4,27,1.0,,2019-08-12 17:00:00,0 days 00:00:00,0.0


In [19]:
### Download dataset
try:
    os.mkdir('raw_sourcecode/' + project_name)
except:
    pass
for row in current_project.iterrows():
    command = 'cd C:/Users/tanji/Desktop/SoftwareRemodularization/raw_sourcecode/' + project_name +' & mkdir ' + project_name + '_' +row[1]['version_name']
    
    #print(command)
    os.system(command)
    
    command = 'git clone ' + row[1]['project_link'] +  ' C:/Users/tanji/Desktop/SoftwareRemodularization/raw_sourcecode/' + project_name + '/' + project_name + '_' + row[1]['version_name']
    
    print(command)
    os.system(command)
    commit = row[1]['commit'].replace('Commit(sha="','')
    commit = commit.replace('")','')
    command = 'cd C:/Users/tanji/Desktop/SoftwareRemodularization/raw_sourcecode/' + project_name + '/' + project_name +'_' + row[1]['version_name'] + ' & git checkout ' + commit
    print(command)
    os.system(command)

git clone https://github.com/square/okhttp C:/Users/tanji/Desktop/SoftwareRemodularization/raw_sourcecode/okhttp/okhttp_parent-4.9.1
cd C:/Users/tanji/Desktop/SoftwareRemodularization/raw_sourcecode/okhttp/okhttp_parent-4.9.1 & git checkout 63dcd95bfa2345bb3f3d4abc6b6dbf36cfb08aaf
git clone https://github.com/square/okhttp C:/Users/tanji/Desktop/SoftwareRemodularization/raw_sourcecode/okhttp/okhttp_parent-4.10.0-RC1
cd C:/Users/tanji/Desktop/SoftwareRemodularization/raw_sourcecode/okhttp/okhttp_parent-4.10.0-RC1 & git checkout 4fd1e8f99833eebdd2e99f3456322aa197f5e652
git clone https://github.com/square/okhttp C:/Users/tanji/Desktop/SoftwareRemodularization/raw_sourcecode/okhttp/okhttp_parent-4.9.0
cd C:/Users/tanji/Desktop/SoftwareRemodularization/raw_sourcecode/okhttp/okhttp_parent-4.9.0 & git checkout cbeaf8f955fff9caa5652ccc6c1393ec8b993799
git clone https://github.com/square/okhttp C:/Users/tanji/Desktop/SoftwareRemodularization/raw_sourcecode/okhttp/okhttp_parent-4.8.1
cd C:/Users

In [20]:
### Run Depends
try:
    os.mkdir('raw_depends/' + project_name)
except:
    pass
for row in current_project.iterrows():
    #command = 'cd C:/Users/tanji/Desktop/SoftwareRemodularization/raw_depends/' + project_name +' & mkdir ' + project_name + '_' +row[1]['version_name']
    #os.system(command)
    #print(command)
    command = 'cd C:/Users/tanji/Desktop/SoftwareRemodularization/depends-0.9.2 & ' + 'java -jar depends.jar java C:/Users/tanji/Desktop/SoftwareRemodularization/raw_sourcecode/'  + project_name + '/' + project_name + '_' + row[1]['version_name']+  ' ../raw_depends/' + project_name + "/" + project_name + '_' + row[1]['version_name'] 
    print(command)
    os.system(command)

cd C:/Users/tanji/Desktop/SoftwareRemodularization/depends-0.9.2 & java -jar depends.jar java C:/Users/tanji/Desktop/SoftwareRemodularization/raw_sourcecode/okhttp/okhttp_parent-4.9.1 ../raw_depends/okhttp/okhttp_parent-4.9.1
cd C:/Users/tanji/Desktop/SoftwareRemodularization/depends-0.9.2 & java -jar depends.jar java C:/Users/tanji/Desktop/SoftwareRemodularization/raw_sourcecode/okhttp/okhttp_parent-4.10.0-RC1 ../raw_depends/okhttp/okhttp_parent-4.10.0-RC1
cd C:/Users/tanji/Desktop/SoftwareRemodularization/depends-0.9.2 & java -jar depends.jar java C:/Users/tanji/Desktop/SoftwareRemodularization/raw_sourcecode/okhttp/okhttp_parent-4.9.0 ../raw_depends/okhttp/okhttp_parent-4.9.0
cd C:/Users/tanji/Desktop/SoftwareRemodularization/depends-0.9.2 & java -jar depends.jar java C:/Users/tanji/Desktop/SoftwareRemodularization/raw_sourcecode/okhttp/okhttp_parent-4.8.1 ../raw_depends/okhttp/okhttp_parent-4.8.1
cd C:/Users/tanji/Desktop/SoftwareRemodularization/depends-0.9.2 & java -jar depends.j

In [21]:
### Generate groundtruth
try:
    os.mkdir('groundtruth/' + project_name)
except:
    pass

counter = 10
for row in current_project.iterrows():
    counter -= 1
    if counter < 0:
        break
    project_rank = row[1]['rank']
    version_name = row[1]['version_name']
    #print(project_rank)
    rootdir = 'C:/Users/tanji/Desktop/SoftwareRemodularization/raw_sourcecode/' + project_name + '/' + project_name + '_' + row[1]['version_name']

    print(rootdir, 'current_project')


    ### To obtain the current initial directory
    full_dir_arr = []
    for root, dirs, files in os.walk(rootdir):
        #print(root)
        #print(dirs)
        for element in files:
            if '.java' in element:
                dir_string = root + '\\' + element
                full_dir_arr.append(dir_string)

    cluster_dict = {}
    cluster_tree = {}


    for element in full_dir_arr:
        element = element.split('\\')
        child = element[-1]
        parent = element[-2]
        cluster_tree[child] = parent
    

    '''
    current_rank = project_rank
    try:
        while current_rank >= (project_rank - 9):
            current_rank -= 1
            project_to_be_compared = current_project[current_project['rank'] == current_rank]
            project_to_be_compared_rootdir = 'C:/Users/tanji/Desktop/SoftwareRemodularization/raw_sourcecode/' + project_name + '/' + project_name + '_' + project_to_be_compared['version_name'].values[0]
            #print(project_to_be_compared_rootdir)


            ### To obtain the current initial directory
            project_to_be_compared_full_dir_arr = []
            for root, dirs, files in os.walk(project_to_be_compared_rootdir):
                #print(root)
                #print(dirs)
                for element in files:
                    if '.java' in element:
                        dir_string = root + '\\' + element
                        project_to_be_compared_full_dir_arr.append(dir_string)

            project_to_be_compared_cluster_dict = {}
            project_to_be_compared_cluster_tree = {}


            for element in project_to_be_compared_full_dir_arr:
                element = element.split('\\')
                child = element[-1]
                parent = element[-2]
                project_to_be_compared_cluster_tree[child] = parent

            cluster_tree =  {x:cluster_tree[x] for x in cluster_tree if x in project_to_be_compared_cluster_tree} 
    '''
    arr_a_rsf = []
    filename = 'C:/Users/tanji/Desktop/SoftwareRemodularization/groundtruth/' + project_name + '/' + project_name + '_' + row[1]['version_name'] + '.txt'
    with open(filename, 'w') as f:
        for key, value in cluster_tree.items():
            arr_a_rsf.append(key)
            f.write('contain ' + str(value).replace(" ",'') + ' ' + str(key).replace(' ','') + '\n')

        #depends_dir = 'C:/Users/tanji/Desktop/SoftwareRemodularization/raw_depends/' + project_name + '/' + project_name + '_' + row[1]['version_name'] + '.json'
        #print(depends_dir)
    #except:
        #print(e)
        #pass
    
        
        

C:/Users/tanji/Desktop/SoftwareRemodularization/raw_sourcecode/okhttp/okhttp_parent-4.9.1 current_project
C:/Users/tanji/Desktop/SoftwareRemodularization/raw_sourcecode/okhttp/okhttp_parent-4.10.0-RC1 current_project
C:/Users/tanji/Desktop/SoftwareRemodularization/raw_sourcecode/okhttp/okhttp_parent-4.9.0 current_project
C:/Users/tanji/Desktop/SoftwareRemodularization/raw_sourcecode/okhttp/okhttp_parent-4.8.1 current_project
C:/Users/tanji/Desktop/SoftwareRemodularization/raw_sourcecode/okhttp/okhttp_parent-4.8.0 current_project
C:/Users/tanji/Desktop/SoftwareRemodularization/raw_sourcecode/okhttp/okhttp_parent-4.7.2 current_project
C:/Users/tanji/Desktop/SoftwareRemodularization/raw_sourcecode/okhttp/okhttp_parent-4.7.1 current_project
C:/Users/tanji/Desktop/SoftwareRemodularization/raw_sourcecode/okhttp/okhttp_parent-4.7.0 current_project
C:/Users/tanji/Desktop/SoftwareRemodularization/raw_sourcecode/okhttp/okhttp_parent-4.6.0 current_project
C:/Users/tanji/Desktop/SoftwareRemodulari

In [27]:
current_project.head()

Unnamed: 0,project_name,project_link,version_name,commit,timestamp,major_version_x,major_version_int,major_version_y,count,rank,previous_version,previous_version_timestamp,timestamp_diff,timestamp_diff_hours
1,okhttp,https://github.com/square/okhttp,parent-4.9.1,"Commit(sha=""63dcd95bfa2345bb3f3d4abc6b6dbf36cf...",2021-01-30 18:09:57,4,4.0,4,27,20.0,parent-4.10.0-RC1,2020-10-07 03:28:26,115 days 14:41:31,2774.0
0,okhttp,https://github.com/square/okhttp,parent-4.10.0-RC1,"Commit(sha=""4fd1e8f99833eebdd2e99f3456322aa197...",2020-10-07 03:28:26,4,4.0,4,27,19.0,parent-4.9.0,2020-09-11 21:08:20,25 days 06:20:06,606.0
2,okhttp,https://github.com/square/okhttp,parent-4.9.0,"Commit(sha=""cbeaf8f955fff9caa5652ccc6c1393ec8b...",2020-09-11 21:08:20,4,4.0,4,27,18.0,parent-4.8.1,2020-08-06 14:01:19,36 days 07:07:01,871.0
3,okhttp,https://github.com/square/okhttp,parent-4.8.1,"Commit(sha=""fc6c29c4f93a7604fb0fee88be4bbc91dd...",2020-08-06 14:01:19,4,4.0,4,27,17.0,parent-4.8.0,2020-07-11 18:01:06,25 days 20:00:13,620.0
4,okhttp,https://github.com/square/okhttp,parent-4.8.0,"Commit(sha=""a70e992c3f7d9adea544c40cc2a4640d28...",2020-07-11 18:01:06,4,4.0,4,27,16.0,parent-4.7.2,2020-05-20 13:06:47,52 days 04:54:19,1252.0


In [23]:
### Run Refactoring Miner

project_name
#print(current_project.head())
first_tag = current_project.head(1)['version_name'].values[0]
last_tag= current_project.tail(1)['version_name'].values[0]

print(first_tag)
print(last_tag)

try:
    os.mkdir('raw_refactoringMiner/' + project_name)
except:
    pass
try:
    for row in current_project.iterrows():
        #command = 'cd C:/Users/tanji/Desktop/SoftwareRemodularization/raw_depends/' + project_name +' & mkdir ' + project_name + '_' +row[1]['version_name']
        #os.system(command)
        #print(command)
        command = 'cd C:/Users/tanji/Desktop/SoftwareRemodularization/RefactoringMiner-2.1.0/bin & ' + 'RefactoringMiner -bt C:/Users/tanji/Desktop/SoftwareRemodularization/raw_sourcecode/'  + project_name + '/' + project_name + '_' + row[1]['version_name'] + ' ' + row[1]['previous_version'] + ' ' + row[1]['version_name'] + ' -json ' + 'C:/Users/tanji/Desktop/SoftwareRemodularization/raw_refactoringMiner/' +project_name + '/'+ project_name + '_' + row[1]['version_name'] + '.json'
        print(command)
        os.system(command)
except:
    pass



parent-4.9.1
parent-4.1.0
cd C:/Users/tanji/Desktop/SoftwareRemodularization/RefactoringMiner-2.1.0/bin & RefactoringMiner -bt C:/Users/tanji/Desktop/SoftwareRemodularization/raw_sourcecode/okhttp/okhttp_parent-4.9.1 parent-4.10.0-RC1 parent-4.9.1 -json C:/Users/tanji/Desktop/SoftwareRemodularization/raw_refactoringMiner/okhttp/okhttp_parent-4.9.1.json
cd C:/Users/tanji/Desktop/SoftwareRemodularization/RefactoringMiner-2.1.0/bin & RefactoringMiner -bt C:/Users/tanji/Desktop/SoftwareRemodularization/raw_sourcecode/okhttp/okhttp_parent-4.10.0-RC1 parent-4.9.0 parent-4.10.0-RC1 -json C:/Users/tanji/Desktop/SoftwareRemodularization/raw_refactoringMiner/okhttp/okhttp_parent-4.10.0-RC1.json
cd C:/Users/tanji/Desktop/SoftwareRemodularization/RefactoringMiner-2.1.0/bin & RefactoringMiner -bt C:/Users/tanji/Desktop/SoftwareRemodularization/raw_sourcecode/okhttp/okhttp_parent-4.9.0 parent-4.8.1 parent-4.9.0 -json C:/Users/tanji/Desktop/SoftwareRemodularization/raw_refactoringMiner/okhttp/okhttp_

In [28]:
current_project.head()

Unnamed: 0,project_name,project_link,version_name,commit,timestamp,major_version_x,major_version_int,major_version_y,count,rank,previous_version,previous_version_timestamp,timestamp_diff,timestamp_diff_hours
1,okhttp,https://github.com/square/okhttp,parent-4.9.1,"Commit(sha=""63dcd95bfa2345bb3f3d4abc6b6dbf36cf...",2021-01-30 18:09:57,4,4.0,4,27,20.0,parent-4.10.0-RC1,2020-10-07 03:28:26,115 days 14:41:31,2774.0
0,okhttp,https://github.com/square/okhttp,parent-4.10.0-RC1,"Commit(sha=""4fd1e8f99833eebdd2e99f3456322aa197...",2020-10-07 03:28:26,4,4.0,4,27,19.0,parent-4.9.0,2020-09-11 21:08:20,25 days 06:20:06,606.0
2,okhttp,https://github.com/square/okhttp,parent-4.9.0,"Commit(sha=""cbeaf8f955fff9caa5652ccc6c1393ec8b...",2020-09-11 21:08:20,4,4.0,4,27,18.0,parent-4.8.1,2020-08-06 14:01:19,36 days 07:07:01,871.0
3,okhttp,https://github.com/square/okhttp,parent-4.8.1,"Commit(sha=""fc6c29c4f93a7604fb0fee88be4bbc91dd...",2020-08-06 14:01:19,4,4.0,4,27,17.0,parent-4.8.0,2020-07-11 18:01:06,25 days 20:00:13,620.0
4,okhttp,https://github.com/square/okhttp,parent-4.8.0,"Commit(sha=""a70e992c3f7d9adea544c40cc2a4640d28...",2020-07-11 18:01:06,4,4.0,4,27,16.0,parent-4.7.2,2020-05-20 13:06:47,52 days 04:54:19,1252.0


In [39]:
counter = 1
for row in current_project.iterrows():
    if counter > 1:
        break
    refactoring_miner_filename = f'raw_refactoringMiner/{project_name}/' + project_name + '_' + row[1]['version_name'] + '.json'
    print(refactoring_miner_filename)
    f = open(refactoring_miner_filename)
    refactoring_miner = json.load(f)
    for i in refactoring_miner['commits']:
        print(i)
    f.close()
    counter += 1

raw_refactoringMiner/okhttp/okhttp_parent-4.9.1.json
{'repository': 'https://github.com/square/okhttp', 'sha1': 'd2e28ab672d5734a76f97f48174a3e6e8339e183', 'url': 'https://github.com/square/okhttp/commit/d2e28ab672d5734a76f97f48174a3e6e8339e183', 'refactorings': []}
{'repository': 'https://github.com/square/okhttp', 'sha1': '63dcd95bfa2345bb3f3d4abc6b6dbf36cfb08aaf', 'url': 'https://github.com/square/okhttp/commit/63dcd95bfa2345bb3f3d4abc6b6dbf36cfb08aaf', 'refactorings': []}


In [24]:
try:
    os.mkdir('MoJo_1.2.1/' + project_name)
except:
    pass

groundtruth_dir = 'C:/Users/tanji/Desktop/SoftwareRemodularization/groundtruth/' + project_name + '/'
#'C:/Users/tanji/Desktop/SoftwareRemodularization/groundtruth/' + project_name + '/' + project_name + '_' + row[1]['version_name'] + '.txt'

groundtruth_arr = []
for root, dirs, files in os.walk(groundtruth_dir):
    #print(files)
    for file in files:
        groundtruth_full_path = groundtruth_dir + file
        groundtruth_arr.append(groundtruth_full_path)
    



for i in range(len(groundtruth_arr)-1 ,0,-1):
    print(i)
    print(groundtruth_arr[i], groundtruth_arr[i-1])
    
    # These arrays are for MoJo balancing. To have the same number of children / entities on both sides.
    arr_1 = []
    arr_2 = []
    
    # These arrays are to keep track of the changed composition for mapping?
    arr_1_changed_composition = []
    arr_2_changed_composition = []
    
    file_1 = open(groundtruth_arr[i])
    file_2 = open(groundtruth_arr[i-1])
    for line in file_1:
        line = line.split('\n')[0]
        line = line.split(' ')
        arr_1.append(line[-1])
        arr_1_changed_composition.append((line[-2], line[-1]))
    
    
    #print(to_be_added)
    
    for line in file_2:
        line = line.split('\n')[0]
        line = line.split(' ')
        arr_2.append(line[-1])
        arr_2_changed_composition.append((line[-2], line[-1]))
    
    to_be_added_2 = list(set(arr_1) - set(arr_2))
    #print(to_be_added_1)
    to_be_added_1 = list(set(arr_2) - set(arr_1))
    #print(to_be_added_2)
    
    composition_diff_2 = list(set(arr_1_changed_composition) - set(arr_2_changed_composition))
    composition_diff_1 = list(set(arr_2_changed_composition) - set(arr_1_changed_composition))
    composition_diff = composition_diff_1 + composition_diff_2
    #print(composition_diff)
    
    
    
    file_1 = open(groundtruth_arr[i])
    file_2 = open(groundtruth_arr[i-1])
    temp_1 = open('C:/Users/tanji/Desktop/SoftwareRemodularization/MoJo_1.2.1/temp_1.txt', 'w')
    for line in file_1:
        temp_1.write(line)
    for element in to_be_added_1:
        temp_1.write('contain ' + element + ' ' + element + '\n')
    temp_1.close()
    
    temp_2 = open('C:/Users/tanji/Desktop/SoftwareRemodularization/MoJo_1.2.1/temp_2.txt', 'w')
    for line in file_2:
        temp_2.write(line)
    for element in to_be_added_2:
        temp_2.write('contain ' + element + ' ' + element + '\n')
    temp_2.close()
    
    file_1.close()
    file_2.close()
    
    f = open('C:/Users/tanji/Desktop/SoftwareRemodularization/MoJo_1.2.1/' + project_name + '/' + project_name + '_results.txt', 'a')
    identifier = str(groundtruth_arr[i].split('/')[-1].replace('.txt', '')) + ',' + str(groundtruth_arr[i-1].split('/')[-1].replace('.txt', '')) + '\n'
    print(identifier)
    f.write(str(identifier))
    f.write(str(composition_diff) + '\n')
    #print(str(groundtruth_arr[i].split('_')[-1].replace('.txt', '')))
    #print(current_project[current_project['version_name'] == str(groundtruth_arr[i].split('_')[-1].replace('.txt', ''))]['timestamp_diff_hours'].values[0])
    time_taken = current_project[current_project['version_name'] == str(groundtruth_arr[i].split('_')[-1].replace('.txt', ''))]['timestamp_diff_hours'].values[0]
    f.write('Hours taken:' + str(time_taken) + '\n')
    f.close()
    
    command = 'cd C:/Users/tanji/Desktop/SoftwareRemodularization/MoJo_1.2.1 & ' + 'java MoJo ' + 'temp_1.txt' + ' ' + 'temp_2.txt' + ' >> ' + project_name + '/' + project_name + '_results.txt'
    print(command)
    os.system(command)
    
    os.remove('C:/Users/tanji/Desktop/SoftwareRemodularization/MoJo_1.2.1/temp_1.txt')
    os.remove('C:/Users/tanji/Desktop/SoftwareRemodularization/MoJo_1.2.1/temp_2.txt')
    

9
C:/Users/tanji/Desktop/SoftwareRemodularization/groundtruth/okhttp/okhttp_parent-4.9.1.txt C:/Users/tanji/Desktop/SoftwareRemodularization/groundtruth/okhttp/okhttp_parent-4.9.0.txt
okhttp_parent-4.9.1,okhttp_parent-4.9.0

cd C:/Users/tanji/Desktop/SoftwareRemodularization/MoJo_1.2.1 & java MoJo temp_1.txt temp_2.txt >> okhttp/okhttp_results.txt
8
C:/Users/tanji/Desktop/SoftwareRemodularization/groundtruth/okhttp/okhttp_parent-4.9.0.txt C:/Users/tanji/Desktop/SoftwareRemodularization/groundtruth/okhttp/okhttp_parent-4.8.1.txt
okhttp_parent-4.9.0,okhttp_parent-4.8.1

cd C:/Users/tanji/Desktop/SoftwareRemodularization/MoJo_1.2.1 & java MoJo temp_1.txt temp_2.txt >> okhttp/okhttp_results.txt
7
C:/Users/tanji/Desktop/SoftwareRemodularization/groundtruth/okhttp/okhttp_parent-4.8.1.txt C:/Users/tanji/Desktop/SoftwareRemodularization/groundtruth/okhttp/okhttp_parent-4.8.0.txt
okhttp_parent-4.8.1,okhttp_parent-4.8.0

cd C:/Users/tanji/Desktop/SoftwareRemodularization/MoJo_1.2.1 & java MoJo t

In [None]:
## Get all commits

repo = g.get_repo('apache/zookeeper')

for release in repo.get_commits():
    #print(release.commit.sha)
    commit = repo.get_commit(sha=release.commit.sha)
    print(commit.commit.author.date)
    #print(commit.commit.message)

In [31]:
## Get all tags

repo = g.get_repo('apache/zookeeper')

for release in repo.get_tags():
    #print(release.commit.sha)
    commit = repo.get_commit(sha=release.commit.sha)
    print(commit.commit.author.date)
    #print(commit.commit.message)

2019-01-22 14:37:29
[maven-release-plugin] prepare release zookeeper-\
2021-03-17 09:42:11
Prepared 3.7.0
2021-03-17 09:42:11
Prepared 3.7.0
2021-01-24 19:05:00
Prepared 3.7.0
2021-01-19 09:36:44
Prepared 3.7.0
2021-04-08 16:27:50
Prepared 3.6.3
2021-04-08 16:27:50
Prepared 3.6.3
2021-04-04 10:47:19
Prepared 3.6.3
2021-04-01 09:20:13
Prepared 3.6.3
2020-09-04 12:34:41
Prepared 3.6.2
2020-09-04 12:34:41
Prepared 3.6.2
2020-08-31 14:47:18
Prepared 3.6.2
2020-04-21 14:57:00
Prepared 3.6.1
2020-04-21 14:57:00
Prepared 3.6.1
2020-04-15 15:56:05
Prepared 3.6.1
2020-02-25 14:17:17
Prepared 3.6.0
2020-02-25 14:17:17
Prepared 3.6.0
2020-02-18 16:52:00
Prepared 3.6.0
2020-02-05 19:03:11
Prepared 3.6.0
2020-02-01 13:26:45
Prepared 3.6.0
2020-01-11 13:21:49
Prepared 3.6.0
2021-01-06 19:46:31
Preparing for release 3.5.9
2021-01-06 19:46:31
Preparing for release 3.5.9
2021-01-06 16:26:38
Preparing for release 3.5.9
2020-11-26 14:33:54
Preparing for release 3.5.9
2020-05-04 14:24:46
Preparing for rel

2019-03-06 03:32:11
Exclude spotbugs annotations jar from the release

Fix for 3.4.14-rc4: exclude `spotbugs-annotations` jar from the distribution.

Author: Andor Molnar <andor@apache.org>

Reviewers: phunt@apache.org

Closes #841 from anmolnar/fix_rc_exclude_spotbugsjar

Change-Id: Ie4a36613161e103fc2c50690281fa26c90f48d1b
(cherry picked from commit 372e713a9d2d9264417313e5d68e9437ffddd0f5)
Signed-off-by: Patrick Hunt <phunt@apache.org>
2019-02-27 15:48:59
Fixed version numbers in pom.xml files
2019-02-20 12:01:26
ZOOKEEPER-1815: Tolerate incorrectly set system hostname in tests (3.4)

Inspired by the following commit: https://github.com/apache/zookeeper/commit/23852655a6d41f675b8a9cca66387fca9bfe4e12

in order to fix build problems on H31 Jenkins slave. Infra reported that nothing has been changed in name resolution, but the patch still looks reasonable and hopefully fix the JMX connection problem.

This "backport" is JMX-only and doesn't try to fully backport the original patch.

A

KeyboardInterrupt: 

In [5]:
response = requests.get("https://api.github.com/repos/apache/zookeeper/stats/contributors")

In [6]:
print(response)

<Response [200]>


In [7]:
response.json()

[{'total': 2,
  'weeks': [{'w': 1193529600, 'a': 0, 'd': 0, 'c': 0},
   {'w': 1194134400, 'a': 0, 'd': 0, 'c': 0},
   {'w': 1194739200, 'a': 0, 'd': 0, 'c': 0},
   {'w': 1195344000, 'a': 0, 'd': 0, 'c': 0},
   {'w': 1195948800, 'a': 0, 'd': 0, 'c': 0},
   {'w': 1196553600, 'a': 0, 'd': 0, 'c': 0},
   {'w': 1197158400, 'a': 0, 'd': 0, 'c': 0},
   {'w': 1197763200, 'a': 0, 'd': 0, 'c': 0},
   {'w': 1198368000, 'a': 0, 'd': 0, 'c': 0},
   {'w': 1198972800, 'a': 0, 'd': 0, 'c': 0},
   {'w': 1199577600, 'a': 0, 'd': 0, 'c': 0},
   {'w': 1200182400, 'a': 0, 'd': 0, 'c': 0},
   {'w': 1200787200, 'a': 0, 'd': 0, 'c': 0},
   {'w': 1201392000, 'a': 0, 'd': 0, 'c': 0},
   {'w': 1201996800, 'a': 0, 'd': 0, 'c': 0},
   {'w': 1202601600, 'a': 0, 'd': 0, 'c': 0},
   {'w': 1203206400, 'a': 0, 'd': 0, 'c': 0},
   {'w': 1203811200, 'a': 0, 'd': 0, 'c': 0},
   {'w': 1204416000, 'a': 0, 'd': 0, 'c': 0},
   {'w': 1205020800, 'a': 0, 'd': 0, 'c': 0},
   {'w': 1205625600, 'a': 0, 'd': 0, 'c': 0},
   {'w': 12

In [9]:
print(len(response.json()))

100


In [None]:
total_weeks = None

for element in response.json():
    if total_weeks is None:
        total_weeks = []
        for week in element['weeks']:
            total_weeks.append(week['w'], week['a'], week['d'], week['c'])
    else:
        for week in element['weeks']:
            total_weeks.append(week['w'], week['a'], week['d'], week['c'])
    

In [15]:
response_2 = requests.get("https://api.github.com/repos/apache/spark/stats/contributors")

In [16]:
print(len(response_2.json()))

100


In [16]:
file = open('C:/Users/tanji/Desktop/SoftwareRemodularization/MoJo_1.2.1/dropwizard/dropwizard_results.txt')
for line in file:
    print(line)
    print(' ')
file.close()

dropwizard_v2.1.0-beta.1,dropwizard_v2.0.9

 
MoJo(temp_1.txt,temp_2.txt) = 26

 


 
MoJo(temp_2.txt,temp_1.txt) = 26

 
The Mojo value is 26

 
dropwizard_v2.0.9,dropwizard_v2.0.8

 
MoJo(temp_1.txt,temp_2.txt) = 5

 


 
MoJo(temp_2.txt,temp_1.txt) = 5

 
The Mojo value is 5

 
dropwizard_v2.0.8,dropwizard_v2.0.7

 
MoJo(temp_1.txt,temp_2.txt) = 0

 


 
MoJo(temp_2.txt,temp_1.txt) = 0

 
The Mojo value is 0

 
dropwizard_v2.0.7,dropwizard_v2.0.6

 
MoJo(temp_1.txt,temp_2.txt) = 0

 


 
MoJo(temp_2.txt,temp_1.txt) = 0

 
The Mojo value is 0

 
dropwizard_v2.0.6,dropwizard_v2.0.5

 
MoJo(temp_1.txt,temp_2.txt) = 0

 


 
MoJo(temp_2.txt,temp_1.txt) = 0

 
The Mojo value is 0

 
dropwizard_v2.0.5,dropwizard_v2.0.4

 
MoJo(temp_1.txt,temp_2.txt) = 0

 


 
MoJo(temp_2.txt,temp_1.txt) = 0

 
The Mojo value is 0

 
dropwizard_v2.0.4,dropwizard_v2.0.3

 
MoJo(temp_1.txt,temp_2.txt) = 0

 


 
MoJo(temp_2.txt,temp_1.txt) = 0

 
The Mojo value is 0

 
dropwizard_v2.0.3,dropwizard_v2.0.21

