#

This notebook evaluate the releases

# Imports

In [1]:
import datetime
import json

## Data Analysis

In [2]:
# %matplotlib inline
%matplotlib notebook

import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import pandas as pd
pd.options.display.max_rows = 1000

from scipy.stats import wilcoxon
from scipy.stats import ranksums

##  Releasy development version

In [3]:
import os
import sys

releasy_module = os.path.abspath(os.path.join('..','..','..','dev','releasy'))
if releasy_module not in sys.path:
    sys.path.insert(0, releasy_module)

In [4]:
for mod in sorted(sys.modules.keys()):
    if mod.startswith("releasy"):
        del sys.modules[mod]

In [5]:
from releasy.miner.vcs.miner import Miner
from releasy.miner.vcs.git import GitVcs

In [6]:
from releasy.miner.vcs import miner as releasy_miner
from releasy.miner.vcs import git as releasy_git

In [7]:
import releasy

In [8]:
import importlib
importlib.reload(releasy_miner)
importlib.reload(releasy_git)

<module 'releasy.miner.vcs.git' from '/home/felipecrp/dev/releasy/releasy/miner/vcs/git.py'>

# Dataset setup

## Variables

In [34]:
repo_path = os.path.join('..','..','..','repos')

In [35]:
projects = pd.read_pickle("projects.zip")
projects["data"] = None

### Mining projects using Releasy

  - Currently, the **track_base_release** param has some performance issues. So to enable rapid mining, it is recommended to disable this param.
  
```{python}
params = {
    "track_base_release": False
}
```

Other release prefixes will be ignored

In [151]:
release_prefixes = {
    "angular/angular": [None],
    "nodejs/node": ["v"],
    "git/git": ["v"],
    "microsoft/vscode": [None,"v"],
    "ansible/ansible": [None,"v"],
    "antirez/redis": [None,"v"],
    "aspnet/AspNetCore": [None,"v"],
    "bcit-ci/CodeIgniter": [None,"v"],
    "prometheus/prometheus": [None,"v"],
    "psf/requests": [None,"v"],
    "facebook/react": [None,"v"],
    "guzzle/guzzle": [None,"v"],
    "ionic-team/ionic": [None,"v"],
    "Wox-launcher/Wox": [None,"v"],
    "FortAwesome/Font-Awesome": [None,"v"],
    "microsoft/terminal": [None,"v"],
    "blueimp/jQuery-File-Upload": [None,"v"],
    "ReactiveX/RxJava": [None,"v"],
    "PHPMailer/PHPMailer": ["v"],
    "scrapy/scrapy": ["scrapy-",None],
    "square/okhttp": ["parent-",None],   
    "square/retrofit": ["parent-",None],
    "x64dbg/x64dbg": [None,"V"], # only pre releases ALPHA
    "FFmpeg/FFmpeg": ["v","ffmpeg-",":"],
    "angular/angular.js": ["v"],
    "bilibili/ijkplayer": ["n","wk","k"],
    "dotnet-architecture/eShopOnContainers": ["netcore",None,"v"],
    "dotnet/aspnetcore": ["v",None],
    "zeit/hyper": ["v",None],
    "grafana/grafana": ["v"], # bug -> v5.,
    "golang/go": ["release.r","go"],
    "dotnet/roslyn": ["version-"],
    "facebook/create-react-app": ["v"],
    "fastlane/fastlane": [None],
    "google/guava": ["v"],
    "jekyll/jekyll": ["v"],
    "moby/moby": [None,"v"],
    "php/php-src": ["php-","PHP-"],
    "scikit-learn/scikit-learn": [None],
    "tensorflow/tensorflow": [None,"v"]
}

release_mine_params = {}
for name in release_prefixes:
    release_mine_params[name] = { "release_prefixes": release_prefixes[name] }


In [152]:
count = 1
projects["data"] = None
for name, project in projects[(projects.data.isnull())].iterrows():
    print(f"{datetime.datetime.now()} - {count:3} - Processing {name}")
    project_group, project_name = name.split("/") 
    path = os.path.join(repo_path, project_group, f"{project_name}.git")
    projects.loc[name, "path"] = path    
    
    mine_params = {
        "track_base_release": False
    }
    if name in release_mine_params:
        mine_params.update(release_mine_params[name])
    
    miner = releasy_miner.Miner(name=name,vcs=releasy_git.GitVcs(path), **mine_params)
    project = miner.mine_releases()
    projects.loc[name, "data"] = project
    count += 1
print(f"{datetime.datetime.now()} - Ended")

2020-01-11 12:52:25.908042 -   1 - Processing freeCodeCamp/freeCodeCamp
2020-01-11 12:52:25.909442 -   2 - Processing facebook/react
2020-01-11 12:52:25.917664 -   3 - Processing twbs/bootstrap
2020-01-11 12:52:25.921858 -   4 - Processing facebook/create-react-app
2020-01-11 12:52:25.931672 -   5 - Processing axios/axios
2020-01-11 12:52:25.935570 -   6 - Processing FortAwesome/Font-Awesome
2020-01-11 12:52:25.939467 -   7 - Processing angular/angular.js
2020-01-11 12:52:25.947793 -   8 - Processing webpack/webpack
2020-01-11 12:52:25.961751 -   9 - Processing hakimel/reveal.js
2020-01-11 12:52:25.963602 -  10 - Processing socketio/socket.io
2020-01-11 12:52:25.968577 -  11 - Processing microsoft/vscode
2020-01-11 12:52:25.973252 -  12 - Processing microsoft/TypeScript
2020-01-11 12:52:25.978049 -  13 - Processing angular/angular
2020-01-11 12:52:25.985735 -  14 - Processing ant-design/ant-design
2020-01-11 12:52:25.995296 -  15 - Processing reduxjs/redux
2020-01-11 12:52:25.998094 - 

## Projects dataset

In [153]:
projects["num_releases"] = projects["data"].apply(lambda p : len(p.releases))
projects["num_tags"] = projects["data"].apply(lambda p : len(p.tags))
projects["per_releases"] = projects["num_releases"] / projects["num_tags"]
projects.loc[projects["num_tags"] == 0, "per_releases"] = 0

In [154]:
projects.shape

(100, 14)

In [155]:
projects.num_tags.sum()


17800

In [156]:
projects.num_releases.sum()

13878

## Tags dataset

In [61]:
tags = []
for project in projects["data"]:
    for tag in project.tags:
        tag = {
            "project": project.name,
            "name": tag.name,
            "data": tag
        }
        tags.append(tag)
tags = pd.DataFrame(tags)
tags.set_index(["project", "name"], inplace=True)

In [62]:
tags["annotated"] = tags["data"].apply(lambda t: t.is_annotated)

## Releases dataset

In [63]:
releases = []
for project in projects["data"]:#.apply(lambda project: project):
    for release in project.releases:
        releases.append({
            "project": project.name,
            "name": release.name,
            "data": release
        })
releases = pd.DataFrame(releases)
releases.set_index(["project","name"], inplace=True)

In [64]:
releases["prefix"] = releases["data"].apply(lambda r: r.prefix)
releases["head_commit"] = releases["data"].apply(lambda r: r.head_commit.hashcode)

releases["time"] = releases["data"].apply(lambda r: r.get_time())
releases["start_development"] = releases["data"].apply(lambda r: r.get_time(of=releasy.START_DEVELOPMENT_TIME))
releases["length"] = releases["data"].apply(lambda r: r.get_length())
# releases.loc[releases["length"] == pd.to_timedelta(0), "length"] = np.NaN


In [65]:
releases.shape

(16807, 6)

In [66]:
def get_prefixes(project):
    prefixes = {}
    for release in project.releases:
        prefix = release.prefix
        if not prefix:
            prefix = "None"
        if prefix not in prefixes:
            prefixes[prefix] = 0
        prefixes[prefix] += 1
    return prefixes

projects["num_prefixes"] = projects["data"].apply(lambda project: len(get_prefixes(project).keys()))
projects["prefixes"] = projects["data"].apply(lambda project: ",".join(get_prefixes(project).keys()))

### show release prefixes

In [149]:
projects_to_screen = projects[(projects["num_prefixes"] > 1) & (~projects.index.isin(release_prefixes.keys()))]
print(projects_to_screen.shape[0])
projects_to_screen.sort_values(["name"])["prefixes"]

0


Series([], Name: prefixes, dtype: object)

### Manual inpection of releases

In [157]:
releases.loc["angular/angular"]

Unnamed: 0_level_0,data,prefix,head_commit,time,start_development,length
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2.0.0-alpha.13,2.0.0-alpha.13,,da4862a0c82866e117ee3727f80b205c00da0710,2015-03-13 20:49:51-07:00,2015-03-13 20:49:51-07:00,0 days 00:00:00
2.0.0-alpha.14,2.0.0-alpha.14,,1c9938ed9868259955b2d12c8ae541bf6dcbf09a,2015-03-23 17:14:55-07:00,2015-03-23 17:14:55-07:00,0 days 00:00:00
2.0.0-alpha.15,2.0.0-alpha.15,,81e6d13241d143b6e72a09530314db5a9e24aeaa,2015-03-24 07:50:39-07:00,2015-03-24 07:50:39-07:00,0 days 00:00:00
2.0.0-alpha.16,2.0.0-alpha.16,,acce69da754bfdd6eaf4a7fe0ff75e4e6c11a849,2015-03-26 13:29:47-07:00,2015-03-26 13:29:47-07:00,0 days 00:00:00
2.0.0-alpha.17,2.0.0-alpha.17,,50f8892c6bb8e33ec888fc4fd987d8b1bab19c54,2015-03-27 16:21:41-07:00,2015-03-27 16:21:41-07:00,0 days 00:00:00
2.0.0-alpha.18,2.0.0-alpha.18,,dbffa88dc2ba6ab49c706c6fe1e5dcb0f3b19ed9,2015-03-27 17:16:22-07:00,2015-03-27 17:16:22-07:00,0 days 00:00:00
2.0.0-alpha.19,2.0.0-alpha.19,,7d29636087dbb9964f6c5a2bfe61032bed73db39,2015-04-13 16:15:12-07:00,2015-04-13 16:15:12-07:00,0 days 00:00:00
2.0.0-alpha.20,2.0.0-alpha.20,,fe70c2647a2c30b49d17a5a53b85672afcf55aef,2015-04-20 17:53:29-07:00,2015-04-20 17:53:29-07:00,0 days 00:00:00
2.0.0-alpha.21,2.0.0-alpha.21,,b72eb0783b24a785c34d4f74e667c224adb36b12,2015-04-27 22:15:48-07:00,2015-04-27 22:15:48-07:00,0 days 00:00:00
2.0.0-alpha.22,2.0.0-alpha.22,,f0ef72d6cc95802ea62b233323e239f5dbbe8d9a,2015-05-07 16:04:06-07:00,2015-05-07 16:04:06-07:00,0 days 00:00:00



Other resources

In [52]:
n_tags = 0
for name, project in projects.iterrows():
    project_group, project_name = name.split("/") 
    path = os.path.join(repo_path, project_group, f"{project_name}.git")
    %cd $path
    n_tag = !(git tag | wc -l)
    n_tag = int(n_tag[0])
    n_tags += n_tag
print(n_tags)

/home/felipecrp/repos/freeCodeCamp/freeCodeCamp.git
/home/felipecrp/repos/facebook/react.git
/home/felipecrp/repos/twbs/bootstrap.git
/home/felipecrp/repos/facebook/create-react-app.git
/home/felipecrp/repos/axios/axios.git
/home/felipecrp/repos/FortAwesome/Font-Awesome.git
/home/felipecrp/repos/angular/angular.js.git
/home/felipecrp/repos/webpack/webpack.git
/home/felipecrp/repos/hakimel/reveal.js.git
/home/felipecrp/repos/socketio/socket.io.git
/home/felipecrp/repos/microsoft/vscode.git
/home/felipecrp/repos/microsoft/TypeScript.git
/home/felipecrp/repos/angular/angular.git
/home/felipecrp/repos/ant-design/ant-design.git
/home/felipecrp/repos/reduxjs/redux.git
/home/felipecrp/repos/storybookjs/storybook.git
/home/felipecrp/repos/denoland/deno.git
/home/felipecrp/repos/ionic-team/ionic.git
/home/felipecrp/repos/grafana/grafana.git
/home/felipecrp/repos/zeit/hyper.git
/home/felipecrp/repos/elastic/elasticsearch.git
/home/felipecrp/repos/spring-projects/spring-boot.git
/home/felipecrp/r