# Mine releases from selected projects

This notebook mine the releases

In [1]:
import os
import datetime
import json
import re
import traceback
import math

from multiprocessing import Pool
import pandas as pd

from ipywidgets import IntProgress
from IPython.display import display

import releasy

In [2]:
from util import (
    DATA_PATH,
    REPO_PATH,
    TMP_PATH, 
    CPU,
    CycleType,
    RAPID_RELEASE_LIM,
    TRAD_RELEASE_LIM,
    delta2days,
    is_rapid_release,
    is_trad_release,
)

In [3]:
selected_projects = pd.read_csv(DATA_PATH / '10_projects_selected.csv')

## Mine Releases


In [4]:
def mine(name: str) -> releasy.Project:
    elapsed_time = datetime.datetime.now()
    
    try:
        repo_path = str(REPO_PATH / name)
        project = releasy.Miner(repo_path, name).apply(
            releasy.FinalReleaseMiner(),
            releasy.HistoryCommitMiner(),
            releasy.BaseReleaseMiner(),
            releasy.ContributorMiner(),
            releasy.SemanticReleaseMiner()
        ).mine()

        rapid_releases = [release for release in project.main_releases if is_rapid_release(release)]
        trad_releases = [release for release in project.main_releases if is_trad_release(release)]

        project_data = {
            'project': project.name,
            'prefixes': len(project.releases.prefixes()),
            'prefixes_names': str(" ".join(project.releases.prefixes())),
            'main_releases': len(project.main_releases),
            'rapid_releases': len(rapid_releases),
            'trad_releases': len(trad_releases),
            'patches': len(project.patches),
            'releases': len(project.releases)
        }
        
        release_data = []
        for srelease in project.main_releases:
            # release = srelease.release
            cycle_days = delta2days(srelease.cycle)
            delay = delta2days(srelease.delay)

            if srelease.commits:
                duration = delta2days(srelease.time - srelease.commits.first(lambda c: c.author_time).author_time)
            else:
                duration = 0 
            
            release_data.append({
                'project': project.name,
                'release': srelease.name,
                'previous_release': srelease.prev_main_release.name if srelease.prev_main_release else '',
                'cycle': srelease.cycle,
                'cycle_days': cycle_days,
                'cycle_rdays': math.ceil(cycle_days),
                'rapid_release': is_rapid_release(srelease),
                'trad_release': is_trad_release(srelease),
                
                'commits': len(srelease.commits),
                'start_delay': delay,
                'start_delay_rdays': math.ceil(delay),
                'duration': duration,
                'duration_rdays': math.ceil(duration),
                'patches': len(srelease.patches)     
            })
        
        elapsed_time = datetime.datetime.now() - elapsed_time
        project_data['time'] = elapsed_time
    except Exception as err:
        print(f"{name:40} {err=}")
        traceback.print_exception(err)
        project_data = {}
        release_data = []
    
    return project_data, release_data

In [5]:
project_names = list(selected_projects['project'])
# project_names = project_names[:1]

In [6]:
with Pool(processes=CPU) as pool:
    processed = 0
    project_results = []
    release_results = []
    progress = IntProgress(min=0, max=len(project_names))
    display(progress)
    for result in pool.imap_unordered(mine, project_names):
        project_result, release_result = result
        project_results.append(project_result)
        release_results.extend(release_result)
        progress.value += 1


IntProgress(value=0, max=1665)

In [7]:
projects = pd.DataFrame(project_results)
projects.sample(10)

Unnamed: 0,project,prefixes,prefixes_names,main_releases,rapid_releases,trad_releases,patches,releases,time
561,reactjs/react-modal,1,v,19,7,6,91,110,0 days 00:00:00.078163
677,oblador/react-native-animatable,1,v,10,4,4,13,23,0 days 00:00:00.023313
1439,Kr328/ClashForAndroid,2,v,18,10,1,94,112,0 days 00:00:00.089820
1466,thomaspark/bootswatch,1,v,15,1,11,34,49,0 days 00:00:00.136185
116,PyGithub/PyGithub,2,v,62,40,9,23,85,0 days 00:00:00.123948
269,doccano/doccano,1,v,8,2,3,19,27,0 days 00:00:00.179169
852,symfony/finder,1,v,26,4,20,440,466,0 days 00:00:00.336742
1237,segmentio/nightmare,1,,21,9,4,67,88,0 days 00:00:00.093118
703,didi/cube-ui,2,v,13,10,1,69,82,0 days 00:00:00.126630
1040,tomnomnom/gron,2,v,7,2,2,27,34,0 days 00:00:00.023040


In [8]:
releases = pd.DataFrame(release_results)
releases.sample(10)

Unnamed: 0,project,release,previous_release,cycle,cycle_days,cycle_rdays,rapid_release,trad_release,commits,start_delay,start_delay_rdays,duration,duration_rdays,patches
51636,harness/drone,v2.6.0,v2.5.0,12 days 20:53:33,12.870521,13,True,False,2,8.020984,9,4.849537,5,0
10072,jsx-eslint/eslint-plugin-react,v6.10.0,v6.9.0,38 days 04:03:20,38.168981,39,True,False,49,-96.630556,-96,134.799537,135,3
6778,Radarr/Radarr,v2.0.0.4472,v2.0.0.4427,19 days 13:28:27,19.561424,20,True,False,23,0.28544,1,19.275984,20,0
6361,isomorphic-git/isomorphic-git,v0.43.0,v0.42.0,0 days 04:40:46,0.194977,1,True,False,1,0.194977,1,0.0,0,2
12063,QuantConnect/Lean,11024,11023,0 days 00:00:00,0.0,0,False,False,0,0.0,0,0.0,0,0
45635,puppeteer/puppeteer,v0.12.0,v0.11.0,23 days 10:57:45,23.456771,24,True,False,68,0.017569,1,23.439201,24,0
16873,raineorshine/npm-check-updates,1.2.0,1.1.0,52 days 03:33:59,52.1486,53,False,False,5,0.0039,1,52.144699,53,0
41327,gchq/CyberChef,v6.4.0,v6.3.0,6 days 02:35:22,6.107894,7,True,False,5,5.849884,6,0.258009,1,6
15475,i18next/react-i18next,v1.3.0,v1.2.0,22 days 17:34:07,22.732025,23,True,False,17,-4.707419,-4,27.439444,28,1
28349,xtaci/kcptun,v20181226,v20181224,0 days 00:00:00,0.0,0,False,False,0,0.0,0,0.0,0,0


In [9]:
len(projects)

1665

In [10]:
len(releases)

52836

In [11]:
len(releases.query("rapid_release == True"))

29306

In [12]:
len(releases.query("trad_release == True"))

11296

In [13]:
len(releases.query("rapid_release == False and trad_release == False"))

12234

In [14]:
releases['commits'].sum()

5520217

In [15]:
projects.to_csv(DATA_PATH / '20_projects.csv', index=False)

In [16]:
releases.to_csv(DATA_PATH / '20_releases.csv', index=False)