In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import re

In [2]:
pd.options.display.max_rows = 500

In [3]:
import os
import sys

releasy_module = os.path.abspath(os.path.join('..','..','..','dev','releasy2'))
if releasy_module not in sys.path:
    sys.path.insert(0, releasy_module)
    
import releasy
from releasy.miner_git import GitVcs
from releasy.miner import TagReleaseMiner, PathCommitMiner, RangeCommitMiner, TimeCommitMiner, VersionReleaseMatcher, VersionReleaseSorter, TimeReleaseSorter, VersionWoPreReleaseMatcher


In [4]:
projects = pd.read_pickle('projects.zip')

In [6]:
i = 0
for project in projects.itertuples():
    path = os.path.abspath(os.path.join('..','..','..','repos2',project.Index))
    
  #  try:
    vcs = GitVcs(path)
    release_matcher = VersionWoPreReleaseMatcher()
    time_release_sorter = TimeReleaseSorter()
    release_miner = TagReleaseMiner(vcs, release_matcher)

    release_set = release_miner.mine_releases()

    if len(release_set.prefixes) > 1:
        print(f"{i+1:<2}: {project.Index} - {project.lang}")

        for prefix in release_set.prefixes:
            freq = release_set.prefixes.count(prefix)
            print(f" - {freq:3} {prefix}")
        i += 1

        prev_release = None
        for release in release_set:
            if prev_release and release.name.version == prev_release.name.version: 
                if release.head.id == prev_release.head.id:
                    warn = "clone"
                else:
                    warn = "check"
            else:
                warn = ""
            print(f"{warn:6} {release.name}")
            prev_release = release

    print("\n\n")
        
        #if len(release_set.prefixes) > 1:
        #for release in release_set:
        #    print(f" - {release.name}")
        
   # except Exception as e:
    #    print(f" - error: {e}")

    

1 : vuejs/vue - javascript
 -  38 None
 - 120 v
       v2.6.12
       0.11.0
       0.11.1
       0.11.10
       0.11.2
       0.11.3
       0.11.4
       0.11.5
       0.11.6
       0.11.7
       0.11.8
       0.11.9
       0.12.0
       0.12.1
       0.12.10
       0.12.11
       0.12.12
       0.12.13
       0.12.14
       0.12.15
       0.12.16
       0.12.2
       0.12.3
       0.12.4
       0.12.5
       0.12.6
       0.12.7
       0.12.8
       0.12.9
       0.6.0
       1.0.0
       1.0.1
       1.0.2
       1.0.3
       1.0.4
       1.0.5
       1.0.6
       1.0.7
       1.0.8
       v0.10.0
       v0.10.1
       v0.10.2
       v0.10.3
       v0.10.4
       v0.10.5
       v0.10.6
       v0.7.0
       v0.7.1
       v0.7.2
       v0.7.3
       v0.7.4
       v0.7.5
       v0.7.6
       v0.8.0
       v0.8.1
       v0.8.2
       v0.8.3
       v0.8.4
       v0.8.5
       v0.8.6
       v0.8.7
       v0.8.8
       v0.9.0
       v0.9.1
       v0.9.2
       v0.9.3
       v1.0.10
       

In [24]:
releases = pd.DataFrame(columns=[
    "project","name", "lang","head", "time", "commits", "base_releases",
    "range_commits", "range_base_releases", "range_tpos", "range_fpos","range_fneg",
    "time_commits", "time_base_releases", "time_tpos", "time_fpos","time_fneg"])
    
suffix_exception_catalog = {
    "spring-projects/spring-boot": ".RELEASE",
    "spring-projects/spring-framework": ".RELEASE",
    "netty/netty": ".Final",
    "godotengine/godot": "-stable",
}

count = 0
for i,project in enumerate(projects.itertuples()):
    path = os.path.abspath(os.path.join('..','..','..','repos2',project.name))
    
    try:
        print(f"{i+1:3} {project.name}")
        if project.name in suffix_exception_catalog:
            suffix_exception = suffix_exception_catalog[project.name]
        else:
            suffix_exception = None
        
        vcs = GitVcs(path)
        release_matcher = VersionWoPreReleaseMatcher(suffix_exception=suffix_exception)
        time_release_sorter = TimeReleaseSorter()
        version_release_sorter = VersionReleaseSorter()

        time_release_miner = TagReleaseMiner(vcs, release_matcher, time_release_sorter)
        time_release_set = time_release_miner.mine_releases()

        version_release_miner = TagReleaseMiner(vcs, release_matcher, version_release_sorter)
        version_release_set = version_release_miner.mine_releases()

        path_miner = PathCommitMiner(vcs, time_release_set)
        range_miner = RangeCommitMiner(vcs, version_release_set)
        time_miner = TimeCommitMiner(vcs, version_release_set)
    
        print(f" - parsing by path")
        path_release_set = path_miner.mine_commits()
        print(f" - parsing by time")
        time_release_set = time_miner.mine_commits()
        print(f" - parsing by range")
        range_release_set = range_miner.mine_commits()
        
        print("")
        stats = []
        for release in version_release_set:
            path_commits = set(path_release_set[release.name].commits)
            range_commits = set(range_release_set[release.name].commits)
            time_commits = set(time_release_set[release.name].commits)
            
            path_base_releases = [release.name.value for release in path_release_set[release.name].base_releases]
            range_base_releases = [release.name.value for release in range_release_set[release.name].base_releases]
            time_base_releases = [release.name.value for release in time_release_set[release.name].base_releases]

            stats.append({
                "project": project.name,
                "name": release.name.value,
                "lang": project.lang,
                "head": release.head,
                "time": release.time,
                "commits": len(path_commits),
                "base_releases": path_base_releases,
                "range_commits": len(range_commits),
                "range_base_releases": range_base_releases,
                "range_tpos": len(path_commits & range_commits),
                "range_fpos": len(range_commits - path_commits),
                "range_fneg": len(path_commits - range_commits),
                "time_commits": len(time_commits),
                "time_base_releases": time_base_releases,
                "time_tpos": len(path_commits & time_commits),
                "time_fpos": len(time_commits - path_commits),
                "time_fneg": len(path_commits - time_commits)
            })
        
        releases = releases.append(pd.DataFrame(stats))
    except Exception as e:
        print(f" - error: {e}")
    
releases_bkp = releases.copy()  

In [20]:
for i,project in enumerate(projects.itertuples()):
    path = os.path.abspath(os.path.join('..','..','..','repos2',project.name))
    
    try:
        vcs = GitVcs(path)
        release_matcher = VersionReleaseMatcher()
        time_release_sorter = TimeReleaseSorter()
        release_miner = TagReleaseMiner(vcs, release_matcher, time_release_sorter)
        
        release_set = release_miner.mine_releases()

        print(f"{i+1:<2}: {project.name} - {project.lang}")
        for suffix in release_set.suffixes:
            freq = release_set.suffixes.count(suffix)
            print(f" - {freq:3} {suffix}")
        
        if len(release_set.suffixes) > 1:
            for release in release_set:
                if release.name.suffix:
                    print(f" - {release.name}")
        
    except Exception as e:
        print(f" - error: {e}")


1 : vuejs/vue - javascript
 -   1 -rc.5
 -   1 -csp.1
 -   1 -beta.7
 - 158 None
 -   2 -alpha.3
 -   1 -rc3
 -   2 -alpha.6
 -   2 -beta.0
 -   1 -beta5
 -   1 -csp.2
 -   1 -beta.6
 -   2 -rc.1
 -   2 -alpha.4
 -   1 -beta3
 -   1 -rc.2-migration
 -   1 -migration
 -   2 -alpha.7
 -   1 -rc.3
 -   1 -rc.7
 -   3 -beta.3
 -  32 -csp
 -   5 -beta.1
 -   2 -alpha.8
 -   1 -beta4
 -   2 -alpha.2
 -   3 -rc
 -   1 -rc.4
 -   1 -beta2
 -   2 -rc.2
 -   1 -rc.6
 -   2 -alpha.5
 -   1 -beta.5
 -   1 -beta.8
 -   1 -rc.8
 -   2 -alpha.1
 -   2 -beta.4
 -   2 -rc2
 -   4 -beta.2
 - 0.10.0-rc
 - 0.11.0-rc
 - 0.11.0-rc2
 - 0.11.0-rc3
 - 0.12.0-beta2
 - 0.12.0-beta3
 - 0.12.0-beta4
 - 0.12.0-beta5
 - 0.12.0-rc
 - 0.12.0-rc2
 - 0.12.0-csp
 - 0.12.1-csp
 - 0.12.1-csp.1
 - 0.12.1-csp.2
 - 0.12.6-csp
 - 0.12.7-csp
 - 0.12.8-csp
 - 0.12.9-csp
 - 0.12.10-csp
 - 0.12.11-csp
 - 0.12.12-csp
 - 1.0.0-alpha.1
 - 1.0.0-alpha.2
 - 1.0.0-alpha.3
 - 0.12.14-csp
 - 1.0.0-alpha.4
 - 0.12.15-csp
 - 1.0.0-alpha.5
 

In [34]:
exception = [
    "nodejs/node" ,    # one release mislabeled
    "mrdoob/three.js", # use "r" prefix
    "aria2/aria2",     # use "release" prefix
    "square/okhttp",   # use "parent" prefix
    "apache/dubbo",    # use "dubbo" prefix
    "dotnet/efcore",   # use multiple release prefix (release, rel, v)
    "aspnetboilerplate/aspnetboilerplate", # one release with dot
    "taosdata/TDengine", # user "ver" prefix
    "nginx/nginx",     # use "release" prefix
]
#exception = []

i = 0
for project in projects.itertuples():
    path = os.path.abspath(os.path.join('..','..','..','repos2',project.Index))
    
    try:
        if project.Index in exception:
            continue
        
        vcs = GitVcs(path)
        release_matcher = VersionWoPreReleaseMatcher()
        time_release_sorter = TimeReleaseSorter()
        release_miner = TagReleaseMiner(vcs, release_matcher, time_release_sorter)
        
        release_set = release_miner.mine_releases()
        monorepo = False
        for prefix in release_set.prefixes:
            if prefix != None and prefix != "v":
                monorepo = True
            
        if monorepo:
            print(f"{i+1:<2}: {project.Index} - {project.lang}")
            for prefix in release_set.prefixes:
                freq = release_set.prefixes.count(prefix)
                print(f" - {freq:3} {prefix}")
            i+=1
        
    except Exception as e:
        print(f" - error: {e}")


In [35]:
suffix_exception_catalog = {
    "spring-projects/spring-boot": ".RELEASE",
    "spring-projects/spring-framework": ".RELEASE",
    "netty/netty": ".Final",
    "godotengine/godot": "-stable",
}
for project in projects.itertuples():
    path = os.path.abspath(os.path.join('..','..','..','repos2',project.Index))
    
    try:
        vcs = GitVcs(path)
        if project.Index in suffix_exception_catalog:
            suffix_exception = suffix_exception_catalog[project.Index]
        else:
            suffix_exception = None
        release_matcher_wopre = VersionWoPreReleaseMatcher(suffix_exception=suffix_exception)
        release_matcher = VersionReleaseMatcher()
        time_release_sorter = TimeReleaseSorter()
        release_miner_wopre = TagReleaseMiner(vcs, release_matcher_wopre, time_release_sorter)
        release_miner = TagReleaseMiner(vcs, release_matcher, time_release_sorter)
        
        release_set_wopre = release_miner_wopre.mine_releases()
        release_set = release_miner.mine_releases()
        if len(release_set_wopre) < 10:
            print(f"{len(release_set_wopre):4}:{len(release_set):4} {project.Index} - {project.lang}")
            for suffix in release_set.suffixes:
                freq = release_set.suffixes.count(suffix)
                print(f" - {freq:3} {suffix}")
    except Exception as e:
        print(f" - error: {e}")

        