In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import re
import time

In [2]:
pd.options.display.max_rows = 700

In [3]:
import os
import sys

releasy_module = os.path.abspath(os.path.join('..','..','..','dev','releasy2'))
if releasy_module not in sys.path:
    sys.path.insert(0, releasy_module)
    
import releasy
from releasy.miner_git import GitVcs
from releasy.miner import TagReleaseMiner, TimeVersionReleaseSorter, TimeNaiveCommitMiner, PathCommitMiner, RangeCommitMiner, TimeCommitMiner, VersionReleaseMatcher, VersionReleaseSorter, TimeReleaseSorter, VersionWoPreReleaseMatcher


In [4]:
projects = pd.read_pickle('projects.zip')

In [5]:
p = "briannesbitt/Carbon"

releases = pd.DataFrame()
suffix_exception_catalog = {
    "spring-projects/spring-boot": "^.RELEASE$",
    "spring-projects/spring-framework": "^.RELEASE$",
    "netty/netty": "^.Final$",
    "godotengine/godot": "^-stable$",
}

release_exception_catalog = {
    "facebook/react": [
        "15.3.1",
        "15.3.2",
        "16.1.0"
    ], 
    "facebook/react-native": [
        "0.60.2"
    ],
    "nodejs/node": [
        "heads/tags/v0.5.6"
    ], 
    "vercel/next.js": [
        "v2.4.2"
    ], 
    "ionic-team/ionic-framework": [
        "1.0.0"
    ], 
    "grafana/grafana": [
        "6.1.6",
        "7.0.0",
        "7.2.1"
    ], 
    "vercel/hyper": [
        "v0.7.0",
        "v0.7.1"
    ], 
    "nestjs/nest": [
        "6.3.1"
    ], 
    "apache/dubbo": [
        "2.7.6"
    ], 
    "psf/requests": [
        "2.0"
    ], 
    "huggingface/transformers": [
        "0.1.2",
        "0.5.0",
        "1.0",
        "3.0.1",
        "v0.2.0" #clone
    ], 
    "laravel/laravel": [
        "v4.0.8" # clone
    ], 
    "laravel/framework": [
        "5.3"
    ], 
    "dotnet/efcore": [
        "rel/1.0.1",
        "release/2.2",
        "release/3.0",
        "v2.1.10", #clone
        "v2.1.13", #clone
        "v2.1.16", #clone
        "v2.1.17", #clone
        "v2.1.19", #clone
        "v2.1.20", #clone
        "v2.1.21", #clone
        "v2.2.7",  #clone
        "v2.2.8"  #clone
    ], 
    "aspnetboilerplate/aspnetboilerplate": [
        "v.5.1.0",
        "v0.7.3.0", # clone
        "v5.10.1",  # clone
        "v5.12"     # clone
    ], 
    "SignalR/SignalR": [
        "v0.3.5",
        "0.5"
    ],
    "AutoMapper/AutoMapper": [
        "3.3.1"
    ], 
    "sinatra/sinatra": [
        "1.0",
        "v1.1.0",
        "v1.1.1",
        "v1.1.2",
        "v1.1.3",
        "v1.1.4",
        "v1.2.0",
        "v1.2.1",
        "v1.2.2",
        "v1.2.3",
        "v1.2.4",
        "v1.2.5",
        "v1.2.6",
        "v1.2.7",
        "v1.2.8",
        "v1.2.9",
        "v1.3.0",
        "v1.3.1",
        "v1.3.2",
        "v1.3.3",
        "v1.3.4",
        "v1.3.5",
        "v1.3.6",
        "v1.4.0",
        "v1.4.1",
        "v1.4.2",
        "v1.4.3"
    ], 
    "hashicorp/terraform": [
        "0.7.7"
    ],
    "rclone/rclone": [
        "v1.46" #clone
    ],
    "istio/istio": [
        "1.0.7", #clone
        "1.1.2"  #clone
    ],
    "XX-net/XX-Net": [
    	"1.14.5", #clone
        "1.15.0"  #clone
    ],
    "alibaba/fastjson": [
        "1.2.37" #clone
    ],
    "briannesbitt/Carbon": [
        "1.26.1", #clone
        "1.38.3"  #clone
    ],
    "vuetifyjs/vuetify": [
        "v0.8.3" #clone
    ],
    "microsoft/TypeScript": [
        "v1.5.4" #clone
    ],
    "psf/requests": [
        "v2.16.3" #clone
    ],
    "Wox-launcher/Wox": [
        "v1.0.0.185" #clone
    ],
    "jellyfin/jellyfin": [
        "v10.0.1" #clone
    ],
    "radareorg/radare2": [
        "1.0" #clone
    ],
    "v2ray/v2ray-core": [
        "v0.14.2", #clone
        "v2.19.2", #clone
        "v2.19.6", #clone
        "v2.36.3", #clone
        "v2.40.2", #clone
        "v2.41",   #clone
        "v3.11.3", #clone
        "v3.18",   #clone
        "v3.22",   #clone
        "v3.25", #clone
        "v3.38",   #clone
        "v3.46.4"  #clone
    ]
}

def analyze_project(name, lang, suffix_exception_catalog, release_exception_catalog):
    start = time.time()
    path = os.path.abspath(os.path.join('..','..','..','repos2',name))
    if name in suffix_exception_catalog:
        suffix_exception = suffix_exception_catalog[name]
    else:
        suffix_exception = None
    if name in release_exception_catalog:
        release_exceptions = release_exception_catalog[name]
    else:
        release_exceptions = None

    vcs = GitVcs(path)
    release_matcher = VersionWoPreReleaseMatcher(suffix_exception=suffix_exception, 
                                                 release_exceptions=release_exceptions)
    release_miner = TagReleaseMiner(vcs, release_matcher)
    releases = release_miner.mine_releases()

    version_sorter = TimeVersionReleaseSorter()
    releases_wbase = version_sorter.sort(releases)

    path_miner = PathCommitMiner(vcs, releases)
    range_miner = RangeCommitMiner(vcs, releases_wbase)
    time_miner = TimeCommitMiner(vcs, releases_wbase)
    time_naive_miner = TimeNaiveCommitMiner(vcs, releases_wbase)

    path_release_set = path_miner.mine_commits()
    range_release_set = range_miner.mine_commits()
    time_release_set = time_miner.mine_commits()
    time_naive_release_set = time_naive_miner.mine_commits()

    stats = []
    for release in releases:
        if f"{name}@{release.name}" not in release_exception_catalog:
            path_commits = set(path_release_set[release.name].commits)
            range_commits = set(range_release_set[release.name].commits)
            time_commits = set(time_release_set[release.name].commits)
            time_naive_commits = set(time_naive_release_set[release.name].commits)

            path_base_releases = [release.name.value for release in (path_release_set[release.name].base_releases or [])]
            range_base_releases = [release.name.value for release in (range_release_set[release.name].base_releases or [])]
            time_base_releases = [release.name.value for release in (time_release_set[release.name].base_releases or [])]
            time_naive_base_releases = [release.name.value for release in (time_naive_release_set[release.name].base_releases or [])]

            stats.append({
                "project": name,
                "name": release.name.value,
                "version": release.name.version,
                "semantic_version": release.name.semantic_version,
                "prefix": release.name.prefix,
                "suffix": release.name.suffix,
                "lang": lang,
                "head": str(release.head.id),
                "time": release.time,
                "committers": len(path_release_set[release.name].committers),
                "commits": len(path_commits),
                "merges": len(path_release_set[release.name].merges),
                "base_releases": path_base_releases,
                "base_releases_qnt": len(path_base_releases),
                "range_commits": len(range_commits),
                "range_base_releases": range_base_releases,
                "range_tpos": len(path_commits & range_commits),
                "range_fpos": len(range_commits - path_commits),
                "range_fneg": len(path_commits - range_commits),
                "time_commits": len(time_commits),
                "time_base_releases": time_base_releases,
                "time_tpos": len(path_commits & time_commits),
                "time_fpos": len(time_commits - path_commits),
                "time_fneg": len(path_commits - time_commits),
                "time_naive_commits": len(time_naive_commits),
                "time_naive_base_releases": time_naive_base_releases,
                "time_naive_tpos": len(path_commits & time_naive_commits),
                "time_naive_fpos": len(time_naive_commits - path_commits),
                "time_naive_fneg": len(path_commits - time_naive_commits)
            })
    releases = pd.DataFrame(stats)
    print(f"{time.time() - start:10} - {name}") 
    return releases



data = [analyze_project(p, "java", suffix_exception_catalog, release_exception_catalog)]
releases = pd.concat(data)

releases.commits = pd.to_numeric(releases.commits)
releases.time = pd.to_datetime(releases.time, utc=True)
releases.range_commits = pd.to_numeric(releases.range_commits)
releases.range_tpos = pd.to_numeric(releases.range_tpos)
releases.range_fpos = pd.to_numeric(releases.range_fpos)
releases.range_fneg = pd.to_numeric(releases.range_fneg)
releases.time_commits = pd.to_numeric(releases.time_commits)
releases.time_tpos = pd.to_numeric(releases.time_tpos)
releases.time_fpos = pd.to_numeric(releases.time_fpos)
releases.time_fneg = pd.to_numeric(releases.time_fneg)
releases = releases.set_index(['project', 'name'])



TypeError: 'NoneType' object is not iterable

In [None]:
def precision(row, column):
    if (row[column+"_tpos"] + row[column+"_fpos"]) == 0:
        return 0.0
    else:
        return row[column+"_tpos"] / (row[column+"_tpos"] + row[column+"_fpos"])
    
def recall(row, column):
    if (row[column+"_tpos"] + row[column+"_fneg"]) == 0:
        return 0.0
    else:
        return row[column+"_tpos"] / (row[column+"_tpos"] + row[column+"_fneg"])
    
def fmeasure(row, column):
    if (row[column+"_precision"] + row[column+"_recall"]) == 0:
        return 0
    else:
        return 2 * row[column+"_precision"] * row[column+"_recall"] / (row[column+"_precision"] + row[column+"_recall"])

releases['range_precision'] = releases.apply(precision, args=('range',), axis=1)
releases['range_recall'] = releases.apply(recall, args=('range',), axis=1)
releases['range_fmeasure'] = releases.apply(fmeasure, args=('range',), axis=1)

releases['time_precision'] = releases.apply(precision, args=('time',), axis=1)
releases['time_recall'] = releases.apply(recall, args=('time',), axis=1)
releases['time_fmeasure'] = releases.apply(fmeasure, args=('time',), axis=1)

releases['time_naive_precision'] = releases.apply(precision, args=('time_naive',), axis=1)
releases['time_naive_recall'] = releases.apply(recall, args=('time_naive',), axis=1)
releases['time_naive_fmeasure'] = releases.apply(fmeasure, args=('time_naive',), axis=1)

In [None]:
releases.groupby(level=0).mean()[['time_precision','range_precision', 'time_recall', 'range_recall']]

In [None]:
releases[releases.time_precision < releases.range_precision][['commits','time_commits','base_releases','time_base_releases','range_base_releases','time_precision','range_precision', 'time_recall', 'range_recall']]

In [19]:
releases.loc[("nodejs/node","v2.0.0"),['commits', 'range_commits', 'time_commits']]

KeyError: ('nodejs/node', 'v2.0.0')

In [16]:
releases.groupby(level=0).mean()

Unnamed: 0_level_0,commits,merges,base_releases_qnt,range_commits,range_tpos,range_fpos,range_fneg,time_commits,time_tpos,time_fpos,...,time_naive_fneg,range_precision,range_recall,range_fmeasure,time_precision,time_recall,time_fmeasure,time_naive_precision,time_naive_recall,time_naive_fmeasure
project,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
nodejs/node,100.111298,0.649241,1.145025,103.005059,99.84317,3.161889,0.268128,66.662732,63.819562,2.84317,...,36.608769,0.955127,0.999024,0.969942,0.927424,0.961547,0.936786,0.403462,0.94702,0.489241


In [22]:
releases = releases.set_index(['project', 'name'])

In [23]:
releases.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,lang,head,time,commits,base_releases,range_commits,range_base_releases,range_tpos,range_fpos,range_fneg,time_commits,time_base_releases,time_tpos,time_fpos,time_fneg,range_precision,range_recall,time_precision,time_recall
project,name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
vuejs/vue,0.6.0,javascript,218557cdec830a629252f4a9e2643973dc1f1d2d,2013-12-08 00:32:17+00:00,354,[],354,[],354,0,0,354,[],354,0,0,1.0,1.0,1.0,1.0
vuejs/vue,v0.7.0,javascript,f4861ca9905a170b9a4b185e8a2038dc7c11c58e,2013-12-24 03:31:05+00:00,34,[0.6.0],34,[0.6.0],34,0,0,34,[0.6.0],34,0,0,1.0,1.0,1.0,1.0
vuejs/vue,v0.7.1,javascript,590a7ee55b655f166ecdf8e7a5e22dab7a9e6dd7,2013-12-24 21:58:33+00:00,6,[v0.7.0],6,[v0.7.0],6,0,0,6,[v0.7.0],6,0,0,1.0,1.0,1.0,1.0
vuejs/vue,v0.7.2,javascript,e78fb82212ac229c91fc8b1f09a84b24f577541a,2013-12-28 05:43:40+00:00,3,[v0.7.1],3,[v0.7.1],3,0,0,3,[v0.7.1],3,0,0,1.0,1.0,1.0,1.0
vuejs/vue,v0.7.3,javascript,dcc839a4314de8825f7087f00c4e40ea2d22b46f,2014-01-06 19:13:12+00:00,15,[v0.7.2],15,[v0.7.2],15,0,0,15,[v0.7.2],15,0,0,1.0,1.0,1.0,1.0


In [24]:
releases.dtypes

lang                                object
head                                object
time                   datetime64[ns, UTC]
commits                              int64
base_releases                       object
range_commits                        int64
range_base_releases                 object
range_tpos                           int64
range_fpos                           int64
range_fneg                           int64
time_commits                         int64
time_base_releases                  object
time_tpos                            int64
time_fpos                            int64
time_fneg                            int64
range_precision                    float64
range_recall                       float64
time_precision                     float64
time_recall                        float64
dtype: object

In [25]:
releases.to_pickle("releases.zip")
releases.to_csv("releases.csv")

In [17]:
releases.groupby(level=0).mean()[['range_precision','range_recall','time_precision','time_recall']].sort_values('range_recall')

Unnamed: 0_level_0,range_precision,range_recall,time_precision,time_recall
project,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
dotnet/efcore,0.861097,0.907146,0.339455,0.734689
taosdata/TDengine,0.899112,0.919301,0.49473,0.8094
jellyfin/jellyfin,0.92805,0.966667,0.368276,0.909911
Wox-launcher/Wox,0.973684,0.973684,0.741645,0.936863
cefsharp/CefSharp,0.980903,0.984641,0.526221,0.779052
alibaba/fastjson,0.967328,0.988636,0.584992,0.957588
apache/dubbo,0.985109,0.988803,0.438777,0.832963
ionic-team/ionic-framework,0.964719,0.990948,0.658225,0.887462
spring-projects/spring-framework,0.986764,0.992801,0.369048,0.927707
ansible/ansible,0.98711,0.993056,0.236216,0.883675


In [19]:
releases.groupby(['project']).mean()[['range_precision','range_recall','time_precision','time_recall']].sort_values('range_recall')


Unnamed: 0_level_0,range_precision,range_recall,time_precision,time_recall
project,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
dotnet/efcore,0.861097,0.907146,0.363637,0.775465
taosdata/TDengine,0.899112,0.919301,0.454907,0.819983
jellyfin/jellyfin,0.92805,0.966667,0.368276,0.909911
Wox-launcher/Wox,0.973684,0.973684,0.741645,0.936863
cefsharp/CefSharp,0.980903,0.984641,0.54115,0.784172
alibaba/fastjson,0.967328,0.988636,0.584946,0.957588
apache/dubbo,0.985109,0.988803,0.439195,0.832963
ionic-team/ionic-framework,0.964719,0.990948,0.656842,0.885974
spring-projects/spring-framework,0.986764,0.992801,0.369032,0.927707
ansible/ansible,0.98711,0.993056,0.239749,0.905605


In [21]:
releases.sort_values('range_recall')

Unnamed: 0,project,name,lang,head,time,commits,base_releases,range_commits,range_base_releases,range_tpos,...,range_fneg,time_commits,time_base_releases,time_tpos,time_fpos,time_fneg,range_precision,range_recall,time_precision,time_recall
2,ansible/ansible,0.01,python,56de2e112a97f312c7a07c89e1ce5de74a2637f8,2012-03-08 19:12:58+00:00,260,[],0,[0.0.2],0,...,260,0,[0.0.2],0,0,260,1.000000,0.0,1.000000,0.000000
2,spring-projects/spring-framework,v3.0.1.RELEASE,java,c20c4e1f05eb3c64062ba3af509c528ad003f089,2010-02-18 17:56:59+00:00,234,[v3.0.0.RELEASE],0,[v3.0.1.RELEASE.A],0,...,234,0,[v3.0.1.RELEASE.A],0,0,234,1.000000,0.0,1.000000,0.000000
14,alibaba/fastjson,1.1.42,java,354543ec35cda1a67839ac0a03792e69682435be,2014-10-11 08:24:04+00:00,122,"[1.1.33, 1.1.36]",0,[1.1.37],0,...,122,0,[1.1.37],0,0,122,1.000000,0.0,1.000000,0.000000
25,dotnet/efcore,v2.1.10,c#,0cde562cc070fd00ecf33248df715f6df58a1691,2019-01-10 21:54:40+00:00,3,[v2.2.1],0,[v2.1.8],0,...,3,0,[v2.1.8],0,0,3,1.000000,0.0,1.000000,0.000000
61,ytdl-org/youtube-dl,2012.02.26,python,c4105fa035a5c7c4d9aebc4a27822b1839961b7f,2012-02-26 23:42:26+00:00,11,[2012.01.08],0,[2012.02.22],0,...,11,0,[2012.02.22],0,0,11,1.000000,0.0,1.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
74,ytdl-org/youtube-dl,2013.01.11,python,142d38f776913d1e2c004bd7358942f645ffd734,2013-01-11 07:05:30+00:00,5,[2013.01.08],5,[2013.01.08],5,...,0,6,[2013.01.08],5,1,0,1.000000,1.0,0.833333,1.000000
75,ytdl-org/youtube-dl,2013.01.12,python,fbc5f99db98de9707b0cb72d060a777cec2c0af4,2013-01-12 16:59:58+00:00,4,[2012.12.99],36,[2013.01.11],4,...,0,21,[2013.01.11],4,17,0,0.111111,1.0,0.190476,1.000000
76,ytdl-org/youtube-dl,2013.01.13,python,bbc3e2753a465cf96d19913d160c148464542319,2013-01-12 21:18:13+00:00,10,[2013.01.12],10,[2013.01.12],10,...,0,13,[2013.01.12],10,3,0,1.000000,1.0,0.769231,1.000000
64,ytdl-org/youtube-dl,2012.10.09,python,7b107eea5180444f7d8585889139cfe87eaa2cbd,2012-10-09 13:53:20+00:00,68,[2012.09.27],68,[2012.09.27],68,...,0,41,[2012.09.27],38,3,30,1.000000,1.0,0.926829,0.558824
