In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import re

In [2]:
pd.options.display.max_rows = 500

In [3]:
import os
import sys

releasy_module = os.path.abspath(os.path.join('..','..','..','dev','releasy2'))
if releasy_module not in sys.path:
    sys.path.insert(0, releasy_module)
    
import releasy
from releasy.miner_git import GitVcs
from releasy.miner import TagReleaseMiner, PathCommitMiner, RangeCommitMiner, TimeCommitMiner, VersionReleaseMatcher, VersionReleaseSorter, TimeReleaseSorter, VersionWoPreReleaseMatcher


In [4]:
projects = pd.read_pickle('projects.zip')

In [5]:
releases = pd.DataFrame(columns=[
    "project","name", "lang","head", "time", "commits", "base_releases",
    "range_commits", "range_base_releases", "range_tpos", "range_fpos","range_fneg",
    "time_commits", "time_base_releases", "time_tpos", "time_fpos","time_fneg"])
    
suffix_exception_catalog = {
    "spring-projects/spring-boot": ".RELEASE",
    "spring-projects/spring-framework": ".RELEASE",
    "netty/netty": ".Final",
    "godotengine/godot": "-stable",
}

count = 0
for i,project in enumerate(projects.itertuples()):
    path = os.path.abspath(os.path.join('..','..','..','repos2',project.name))
    
    try:
        print(f"{i+1:3} {project.name}")
        if project.name in suffix_exception_catalog:
            suffix_exception = suffix_exception_catalog[project.name]
        else:
            suffix_exception = None
        
        vcs = GitVcs(path)
        release_matcher = VersionWoPreReleaseMatcher(suffix_exception=suffix_exception)
        time_release_sorter = TimeReleaseSorter()
        version_release_sorter = VersionReleaseSorter()

        time_release_miner = TagReleaseMiner(vcs, release_matcher, time_release_sorter)
        time_release_set = time_release_miner.mine_releases()

        version_release_miner = TagReleaseMiner(vcs, release_matcher, version_release_sorter)
        version_release_set = version_release_miner.mine_releases()

        path_miner = PathCommitMiner(vcs, time_release_set)
        range_miner = RangeCommitMiner(vcs, version_release_set)
        time_miner = TimeCommitMiner(vcs, version_release_set)
    
        print(f" - parsing by path")
        path_release_set = path_miner.mine_commits()
        print(f" - parsing by time")
        time_release_set = time_miner.mine_commits()
        print(f" - parsing by range")
        range_release_set = range_miner.mine_commits()
        
        print("")
        stats = []
        for release in version_release_set:
            path_commits = set(path_release_set[release.name].commits)
            range_commits = set(range_release_set[release.name].commits)
            time_commits = set(time_release_set[release.name].commits)
            
            path_base_releases = [release.name.value for release in path_release_set[release.name].base_releases]
            range_base_releases = [release.name.value for release in range_release_set[release.name].base_releases]
            time_base_releases = [release.name.value for release in time_release_set[release.name].base_releases]

            stats.append({
                "project": project.name,
                "name": release.name.value,
                "lang": project.lang,
                "head": release.head,
                "time": release.time,
                "commits": len(path_commits),
                "base_releases": path_base_releases,
                "range_commits": len(range_commits),
                "range_base_releases": range_base_releases,
                "range_tpos": len(path_commits & range_commits),
                "range_fpos": len(range_commits - path_commits),
                "range_fneg": len(path_commits - range_commits),
                "time_commits": len(time_commits),
                "time_base_releases": time_base_releases,
                "time_tpos": len(path_commits & time_commits),
                "time_fpos": len(time_commits - path_commits),
                "time_fneg": len(path_commits - time_commits)
            })
        
        releases = releases.append(pd.DataFrame(stats))
    except Exception as e:
        print(f" - error: {e}")
    
releases_bkp = releases.copy()  

  1 vuejs/vue
 - parsing by path
 - parsing by time
 - parsing by range

  2 facebook/react
 - parsing by path
 - parsing by time
 - parsing by range

  3 twbs/bootstrap
 - parsing by path
 - parsing by time
 - parsing by range

  4 d3/d3
 - parsing by path
 - parsing by time
 - parsing by range

  5 facebook/react-native
 - parsing by path
 - parsing by time
 - parsing by range

  6 nodejs/node
 - parsing by path
 - parsing by time
 - parsing by range

  7 mrdoob/three.js
 - parsing by path
 - parsing by time
 - parsing by range

  8 angular/angular.js
 - parsing by path
 - parsing by time
 - parsing by range

  9 webpack/webpack
 - parsing by path
 - parsing by time
 - parsing by range

 10 jquery/jquery
 - parsing by path
 - parsing by time
 - parsing by range

 11 microsoft/TypeScript
 - parsing by path
 - parsing by time
 - parsing by range

 12 ant-design/ant-design
 - parsing by path
 - parsing by time
 - parsing by range

 13 reduxjs/redux
 - parsing by path
 - parsing by time


In [9]:
releases = releases_bkp.copy()

In [10]:
releases['head'] = releases['head'].apply(lambda commit: commit.id)
releases.commits = pd.to_numeric(releases.commits)
releases.time = pd.to_datetime(releases.time, utc=True)
releases.range_commits = pd.to_numeric(releases.range_commits)
releases.range_tpos = pd.to_numeric(releases.range_tpos)
releases.range_fpos = pd.to_numeric(releases.range_fpos)
releases.range_fneg = pd.to_numeric(releases.range_fneg)
releases.time_commits = pd.to_numeric(releases.time_commits)
releases.time_tpos = pd.to_numeric(releases.time_tpos)
releases.time_fpos = pd.to_numeric(releases.time_fpos)
releases.time_fneg = pd.to_numeric(releases.time_fneg)

In [12]:
def precision(row, column):
    if row[column+"_tpos"] + row[column+"_fpos"] == 0:
        return 1.0
    else:
        return row[column+"_tpos"] / (row[column+"_tpos"] + row[column+"_fpos"])
    
def recall(row, column):
    if row[column+"_tpos"] + row[column+"_fneg"] == 0:
        return 1.0
    else:
        return row[column+"_tpos"] / (row[column+"_tpos"] + row[column+"_fneg"])


In [13]:
releases['range_precision'] = releases.apply(precision, args=('range',), axis=1)
releases['range_recall'] = releases.apply(recall, args=('range',), axis=1)
releases['time_precision'] = releases.apply(precision, args=('time',), axis=1)
releases['time_recall'] = releases.apply(recall, args=('time',), axis=1)

In [14]:
releases.head()

Unnamed: 0,project,name,lang,head,time,commits,base_releases,range_commits,range_base_releases,range_tpos,...,range_fneg,time_commits,time_base_releases,time_tpos,time_fpos,time_fneg,range_precision,range_recall,time_precision,time_recall
0,vuejs/vue,0.6.0,javascript,218557cdec830a629252f4a9e2643973dc1f1d2d,2013-12-08 00:32:17+00:00,354,[],354,[],354,...,0,354,[],354,0,0,1.0,1.0,1.0,1.0
1,vuejs/vue,v0.7.0,javascript,f4861ca9905a170b9a4b185e8a2038dc7c11c58e,2013-12-24 03:31:05+00:00,34,[0.6.0],34,[0.6.0],34,...,0,34,[0.6.0],34,0,0,1.0,1.0,1.0,1.0
2,vuejs/vue,v0.7.1,javascript,590a7ee55b655f166ecdf8e7a5e22dab7a9e6dd7,2013-12-24 21:58:33+00:00,6,[v0.7.0],6,[v0.7.0],6,...,0,6,[v0.7.0],6,0,0,1.0,1.0,1.0,1.0
3,vuejs/vue,v0.7.2,javascript,e78fb82212ac229c91fc8b1f09a84b24f577541a,2013-12-28 05:43:40+00:00,3,[v0.7.1],3,[v0.7.1],3,...,0,3,[v0.7.1],3,0,0,1.0,1.0,1.0,1.0
4,vuejs/vue,v0.7.3,javascript,dcc839a4314de8825f7087f00c4e40ea2d22b46f,2014-01-06 19:13:12+00:00,15,[v0.7.2],15,[v0.7.2],15,...,0,15,[v0.7.2],15,0,0,1.0,1.0,1.0,1.0


In [16]:
releases.dtypes

project                             object
name                                object
lang                                object
head                                object
time                   datetime64[ns, UTC]
commits                              int64
base_releases                       object
range_commits                        int64
range_base_releases                 object
range_tpos                           int64
range_fpos                           int64
range_fneg                           int64
time_commits                         int64
time_base_releases                  object
time_tpos                            int64
time_fpos                            int64
time_fneg                            int64
range_precision                    float64
range_recall                       float64
time_precision                     float64
time_recall                        float64
dtype: object

In [17]:
releases.to_pickle("releases.zip")

In [18]:
releases.groupby(['project']).mean()[['range_precision','range_recall','time_precision','time_recall']].sort_values('range_recall')

Unnamed: 0_level_0,range_precision,range_recall,time_precision,time_recall
project,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
dotnet/efcore,0.861097,0.907146,0.363637,0.775465
taosdata/TDengine,0.899112,0.919301,0.454907,0.819983
jellyfin/jellyfin,0.92805,0.966667,0.368276,0.909911
Wox-launcher/Wox,0.973684,0.973684,0.741645,0.936863
cefsharp/CefSharp,0.980903,0.984641,0.54115,0.784172
alibaba/fastjson,0.967328,0.988636,0.584946,0.957588
apache/dubbo,0.985109,0.988803,0.439195,0.832963
ionic-team/ionic-framework,0.964719,0.990948,0.656842,0.885974
spring-projects/spring-framework,0.986764,0.992801,0.369032,0.927707
ansible/ansible,0.98711,0.993056,0.239749,0.905605


In [19]:
releases.groupby(['project']).mean()[['range_precision','range_recall','time_precision','time_recall']].sort_values('range_recall')


Unnamed: 0_level_0,range_precision,range_recall,time_precision,time_recall
project,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
dotnet/efcore,0.861097,0.907146,0.363637,0.775465
taosdata/TDengine,0.899112,0.919301,0.454907,0.819983
jellyfin/jellyfin,0.92805,0.966667,0.368276,0.909911
Wox-launcher/Wox,0.973684,0.973684,0.741645,0.936863
cefsharp/CefSharp,0.980903,0.984641,0.54115,0.784172
alibaba/fastjson,0.967328,0.988636,0.584946,0.957588
apache/dubbo,0.985109,0.988803,0.439195,0.832963
ionic-team/ionic-framework,0.964719,0.990948,0.656842,0.885974
spring-projects/spring-framework,0.986764,0.992801,0.369032,0.927707
ansible/ansible,0.98711,0.993056,0.239749,0.905605


In [21]:
releases.sort_values('range_recall')

Unnamed: 0,project,name,lang,head,time,commits,base_releases,range_commits,range_base_releases,range_tpos,...,range_fneg,time_commits,time_base_releases,time_tpos,time_fpos,time_fneg,range_precision,range_recall,time_precision,time_recall
2,ansible/ansible,0.01,python,56de2e112a97f312c7a07c89e1ce5de74a2637f8,2012-03-08 19:12:58+00:00,260,[],0,[0.0.2],0,...,260,0,[0.0.2],0,0,260,1.000000,0.0,1.000000,0.000000
2,spring-projects/spring-framework,v3.0.1.RELEASE,java,c20c4e1f05eb3c64062ba3af509c528ad003f089,2010-02-18 17:56:59+00:00,234,[v3.0.0.RELEASE],0,[v3.0.1.RELEASE.A],0,...,234,0,[v3.0.1.RELEASE.A],0,0,234,1.000000,0.0,1.000000,0.000000
14,alibaba/fastjson,1.1.42,java,354543ec35cda1a67839ac0a03792e69682435be,2014-10-11 08:24:04+00:00,122,"[1.1.33, 1.1.36]",0,[1.1.37],0,...,122,0,[1.1.37],0,0,122,1.000000,0.0,1.000000,0.000000
25,dotnet/efcore,v2.1.10,c#,0cde562cc070fd00ecf33248df715f6df58a1691,2019-01-10 21:54:40+00:00,3,[v2.2.1],0,[v2.1.8],0,...,3,0,[v2.1.8],0,0,3,1.000000,0.0,1.000000,0.000000
61,ytdl-org/youtube-dl,2012.02.26,python,c4105fa035a5c7c4d9aebc4a27822b1839961b7f,2012-02-26 23:42:26+00:00,11,[2012.01.08],0,[2012.02.22],0,...,11,0,[2012.02.22],0,0,11,1.000000,0.0,1.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
74,ytdl-org/youtube-dl,2013.01.11,python,142d38f776913d1e2c004bd7358942f645ffd734,2013-01-11 07:05:30+00:00,5,[2013.01.08],5,[2013.01.08],5,...,0,6,[2013.01.08],5,1,0,1.000000,1.0,0.833333,1.000000
75,ytdl-org/youtube-dl,2013.01.12,python,fbc5f99db98de9707b0cb72d060a777cec2c0af4,2013-01-12 16:59:58+00:00,4,[2012.12.99],36,[2013.01.11],4,...,0,21,[2013.01.11],4,17,0,0.111111,1.0,0.190476,1.000000
76,ytdl-org/youtube-dl,2013.01.13,python,bbc3e2753a465cf96d19913d160c148464542319,2013-01-12 21:18:13+00:00,10,[2013.01.12],10,[2013.01.12],10,...,0,13,[2013.01.12],10,3,0,1.000000,1.0,0.769231,1.000000
64,ytdl-org/youtube-dl,2012.10.09,python,7b107eea5180444f7d8585889139cfe87eaa2cbd,2012-10-09 13:53:20+00:00,68,[2012.09.27],68,[2012.09.27],68,...,0,41,[2012.09.27],38,3,30,1.000000,1.0,0.926829,0.558824
