# Mine releases from selected projects

This notebook mine the releases

In [1]:
import os
import datetime
import json
import re
import traceback
import math

from multiprocessing import Pool
import pandas as pd

from ipywidgets import IntProgress
from IPython.display import display

import releasy

In [2]:
from util import (
    DATA_PATH,
    REPO_PATH,
    TMP_PATH, 
    CPU,
    CycleType,
    RAPID_RELEASE_LIM,
    TRAD_RELEASE_LIM,
    delta2days,
    # is_rapid_release,
    # is_trad_release,
)

In [3]:
selected_projects = pd.read_csv(DATA_PATH / '10_projects_selected.csv')

## Mine Releases


In [4]:
def get_release_time(srelease):
    ''' Calculate when the release was ready to delivery '''
    if srelease.commits:
        return srelease.release.tag.commit.committer_time
        # return srelease.commits.last(lambda c: c.committer_time).committer_time
    else:
        return srelease.time
    
def get_cycle(srelease):
    ''' Calculate the time elapsed from previous release '''
    if srelease.prev_main_release:
        prev_release_time = get_release_time(srelease.prev_main_release)
        release_time = get_release_time(srelease)
        return release_time - prev_release_time
    else:
        return datetime.timedelta(0)

def get_start(srelease):
    ''' Calculate the time the release development started ''' 
    if srelease.commits:
        return srelease.commits.first(lambda c: c.author_time).author_time
    else:
        return get_release_time(srelease)
        
def get_duration(srelease):
    ''' Calculate the development duration '''
    return get_release_time(srelease) - get_start(srelease)
        
def get_start_delay(srelease):
    ''' Calculate the time the development started in relation of the previous release '''
    if srelease.prev_main_release:
        prev_release = srelease.prev_main_release
        return get_start(srelease) - get_release_time(prev_release)
    else:
        return datetime.timedelta(0)

def get_commits_in_advance(srelease):
    if not srelease.commits or not srelease.prev_main_release:
        return 0
    
    prev_release = srelease.prev_main_release
    return [commit for commit in srelease.commits if commit.author_time < get_release_time(prev_release)]

In [5]:
def is_rapid_release(release):    
    if not release.commits:
        return False
    if not release.prev_main_release:
        return False
    days = delta2days(get_cycle(release))
    if days <= 0:
        return False
    if days > RAPID_RELEASE_LIM:
        return False
    return True

def is_trad_release(release):
    if not release.commits:
        return False
    if not release.prev_main_release:
        return False
    days = delta2days(get_cycle(release))
    if days <= 0:
        return False
    if days < TRAD_RELEASE_LIM:
        return False
    return True

In [6]:
def mine(name: str) -> releasy.Project:
    elapsed_time = datetime.datetime.now()
    
    try:
        repo_path = str(REPO_PATH / name)
        project = releasy.Miner(repo_path, name).apply(
            releasy.FinalReleaseMiner(),
            releasy.HistoryCommitMiner(),
            releasy.BaseReleaseMiner(),
            releasy.ContributorMiner(),
            releasy.SemanticReleaseMiner()
        ).mine()

        rapid_releases = [release for release in project.main_releases if is_rapid_release(release)]
        trad_releases = [release for release in project.main_releases if is_trad_release(release)]

        project_data = {
            'project': project.name,
            'prefixes': len(project.releases.prefixes()),
            'prefixes_names': str(" ".join(project.releases.prefixes())),
            'main_releases': len(project.main_releases),
            'rapid_releases': len(rapid_releases),
            'trad_releases': len(trad_releases),
            'patches': len(project.patches),
            'releases': len(project.releases)
        }
        
        release_data = []
        for srelease in project.main_releases:
            # release = srelease.release
            # cycle_days = delta2days(get_cycle(srelease))
            # delay = delta2days(srelease.delay)
            # prev_release = srelease.prev_main_release

            if not srelease.commits or not srelease.prev_main_release:
                continue
                
            cycle_days = delta2days(get_cycle(srelease))
            duration = delta2days(get_duration(srelease))
            start_delay = delta2days(get_start_delay(srelease))
            commits_in_advance = get_commits_in_advance(srelease)
            
            release_data.append({
                'project': project.name,
                'release': srelease.name,
                'previous_release': srelease.prev_main_release.name if srelease.prev_main_release else '',
                'cycle_old': delta2days(srelease.cycle),
                'cycle_days': cycle_days,
                'rapid_release': is_rapid_release(srelease),
                'trad_release': is_trad_release(srelease),
                
                'commits': len(srelease.commits),
                'commits_in_advance': len(commits_in_advance),
                'start_delay': start_delay,
                'duration': duration,
                'patches': len(srelease.patches)     
            })
        
        elapsed_time = datetime.datetime.now() - elapsed_time
        project_data['time'] = elapsed_time
    except Exception as err:
        print(f"{name:40} {err=}")
        traceback.print_exception(err)
        project_data = {}
        release_data = []
    
    return project_data, release_data

In [7]:
project_names = list(selected_projects['project'])
# project_names = project_names[:1]

In [8]:
with Pool(processes=CPU) as pool:
    processed = 0
    project_results = []
    release_results = []
    progress = IntProgress(min=0, max=len(project_names))
    display(progress)
    for result in pool.imap_unordered(mine, project_names):
        project_result, release_result = result
        project_results.append(project_result)
        release_results.extend(release_result)
        progress.value += 1


IntProgress(value=0, max=1665)

In [9]:
projects = pd.DataFrame(project_results)
projects.sample(1)

Unnamed: 0,project,prefixes,prefixes_names,main_releases,rapid_releases,trad_releases,patches,releases,time
417,chromedp/chromedp,1,v,8,1,5,33,41,0 days 00:00:00.047259


In [10]:
releases = pd.DataFrame(release_results)
releases.sample(10)

Unnamed: 0,project,release,previous_release,cycle_old,cycle_days,rapid_release,trad_release,commits,commits_in_advance,start_delay,duration,patches
32283,localForage/localForage,1.2.0,1.1.0,27.908576,27.908576,True,False,21,0,1.151146,26.757431,9
4320,aws/aws-sdk-php,3.63.0,3.62.0,21.024236,21.024236,True,False,2,0,21.024236,0.0,7
1911,go-acme/lego,v1.1.0,v1.0.0,139.855359,139.855359,False,True,72,0,3.763808,136.091551,0
18717,deepmind/sonnet,v1.14,v1.13,45.068414,45.068414,False,False,35,0,0.133935,44.934479,0
45156,nativefier/nativefier,v7.7.0,v7.6.0,510.627488,510.627488,False,True,7,0,385.520451,125.107037,1
23320,Pikaday/Pikaday,1.2.0,1.1.0,304.788762,304.789294,False,True,62,6,-149.224664,454.013958,0
10243,dropwizard/metrics,v4.1.0,v4.0.0,497.08265,497.082639,False,True,180,0,8.1e-05,497.082558,23
25764,josdejong/mathjs,v5.3.0,v5.2.0,54.993275,54.993275,False,False,19,0,28.571806,26.42147,1
26598,segmentio/evergreen,v4.8.0,v4.7.0,14.28765,14.287639,True,False,2,0,14.28662,0.001019,0
2674,graphql-dotnet/graphql-dotnet,v0.6.0,v0.5.0,19.679688,19.679688,True,False,8,1,-0.706725,20.386412,6


In [11]:
len(projects)

1665

In [12]:
len(releases)

48067

In [13]:
len(releases.query("rapid_release == True"))

29203

In [14]:
len(releases.query("trad_release == True"))

11342

In [15]:
len(releases.query("rapid_release == False and trad_release == False"))

7522

In [16]:
releases['commits'].sum()

4952463

In [17]:
projects.to_csv(DATA_PATH / '20_projects.csv', index=False)

In [18]:
releases.to_csv(DATA_PATH / '20_releases.csv', index=False)

In [19]:
# releases.query('rapid_release == True or trad_release == True').sort_values(['cycle_days']).head(50)

In [22]:
releases.query('abs(cycle_old - cycle_days) > 10')

Unnamed: 0,project,release,previous_release,cycle_old,cycle_days,rapid_release,trad_release,commits,commits_in_advance,start_delay,duration,patches
355,go-yaml/yaml,v2.1.0,v2.0.0,0.006157,45.303993,False,False,27,24,-881.579271,926.883264,1
595,kickstarter/android-oss,v2.0.0,v1.18.0,68.127164,49.180405,False,False,54,0,1.076007,48.104398,2
596,kickstarter/android-oss,v2.2.0,v2.0.0,65.072778,84.014687,False,True,36,0,44.143310,39.871377,0
657,tijsverkoyen/CssToInlineStyles,1.1.0,1.0.0,678.173530,718.893322,False,True,2,0,718.874965,0.018356,0
763,lebab/lebab,v1.1.0,v1.0.0,1.026620,25.988310,True,False,105,0,0.045775,25.942535,1
...,...,...,...,...,...,...,...,...,...,...,...,...
47025,GoogleChrome/lighthouse,v2.6.0,v2.5.0,27.371019,44.139676,False,False,77,1,-6.044201,50.183877,0
47027,GoogleChrome/lighthouse,v2.8.0,v2.7.0,55.086968,28.938646,True,False,53,0,3.788542,25.150104,0
47028,GoogleChrome/lighthouse,v2.9.0,v2.8.0,0.009583,26.157454,True,False,28,0,0.002280,26.155174,4
47138,tmux/tmux,2.0,1.9,438.949676,374.009294,False,True,291,2,-6.316701,380.325995,0
