#

This notebook evaluate the releases

# Imports

In [1]:
import datetime
import json

## Data Analysis

In [2]:
# %matplotlib inline
%matplotlib notebook

import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import pandas as pd
pd.options.display.max_rows = 1000

from scipy.stats import wilcoxon
from scipy.stats import ranksums

##  Releasy development version

In [3]:
import os
import sys

releasy_module = os.path.abspath(os.path.join('..','..','..','dev','releasy'))
if releasy_module not in sys.path:
    sys.path.insert(0, releasy_module)

In [4]:
for mod in sorted(sys.modules.keys()):
    if mod.startswith("releasy"):
        del sys.modules[mod]

In [5]:
from releasy.miner.vcs.miner import Miner
from releasy.miner.vcs.git import GitVcs

In [6]:
from releasy.miner.vcs import miner as releasy_miner
from releasy.miner.vcs import git as releasy_git

In [7]:
import releasy

In [8]:
import importlib
importlib.reload(releasy_miner)
importlib.reload(releasy_git)

<module 'releasy.miner.vcs.git' from '/home/felipecrp/dev/releasy/releasy/miner/vcs/git.py'>

# Dataset setup

## Variables

In [9]:
repo_path = os.path.join('..','..','..','repos')

In [10]:
projects = pd.read_pickle("projects.zip")
projects["data"] = None

### Mining projects using Releasy

  - Currently, the **track_base_release** param has some performance issues. So to enable rapid mining, it is recommended to disable this param.
  
```{python}
params = {
    "track_base_release": False
}
```

In [27]:
release_mine_params = {}

In [83]:
count = 1
projects["data"] = None
for name, project in projects[(projects.data.isnull())].iterrows():
    print(f"{datetime.datetime.now()} - {count:3} - Processing {name}")
    project_group, project_name = name.split("/") 
    path = os.path.join(repo_path, project_group, f"{project_name}.git")
    projects.loc[name, "path"] = path    
    
    mine_params = {
        "track_base_release": False
    }
    if name in release_mine_params:
        mine_params.update(release_mine_params[name])
    
    miner = releasy_miner.Miner(name=name,vcs=releasy_git.GitVcs(path), **mine_params)
    project = miner.mine_releases()
    projects.loc[name, "data"] = project
    count += 1
print(f"{datetime.datetime.now()} - Ended")

2019-12-19 09:54:42.315395 -   1 - Processing freeCodeCamp/freeCodeCamp
2019-12-19 09:54:42.316414 -   2 - Processing facebook/react
2019-12-19 09:54:42.321756 -   3 - Processing twbs/bootstrap
2019-12-19 09:54:42.324496 -   4 - Processing facebook/create-react-app
2019-12-19 09:54:42.332695 -   5 - Processing axios/axios
2019-12-19 09:54:42.335022 -   6 - Processing nodejs/node
2019-12-19 09:54:42.742366 -   7 - Processing FortAwesome/Font-Awesome
2019-12-19 09:54:42.745293 -   8 - Processing angular/angular.js
2019-12-19 09:54:42.754028 -   9 - Processing mrdoob/three.js
2019-12-19 09:54:42.757679 -  10 - Processing puppeteer/puppeteer
2019-12-19 09:54:42.759708 -  11 - Processing microsoft/vscode
2019-12-19 09:54:42.765755 -  12 - Processing microsoft/TypeScript
2019-12-19 09:54:42.770826 -  13 - Processing angular/angular
2019-12-19 09:54:42.780672 -  14 - Processing ant-design/ant-design
2019-12-19 09:54:42.789865 -  15 - Processing reduxjs/redux
2019-12-19 09:54:42.792487 -  16 -

## Projects dataset

In [85]:
projects["num_releases"] = projects["data"].apply(lambda p : len(p.releases))
projects["num_tags"] = projects["data"].apply(lambda p : len(p.tags))
projects["per_releases"] = projects["num_releases"] / projects["num_tags"]
projects.loc[projects["num_tags"] == 0, "per_releases"] = 0

In [86]:
projects.shape

(100, 13)

## Tags dataset

In [87]:
tags = []
for project in projects["data"]:
    for tag in project.tags:
        tag = {
            "project": project.name,
            "name": tag.name,
            "data": tag
        }
        tags.append(tag)
tags = pd.DataFrame(tags)
tags.set_index(["project", "name"], inplace=True)

In [88]:
tags["annotated"] = tags["data"].apply(lambda t: t.is_annotated)

## Releases dataset

In [89]:
releases = []
for project in projects["data"]:#.apply(lambda project: project):
    for release in project.releases:
        releases.append({
            "project": project.name,
            "name": release.name,
            "data": release
        })
releases = pd.DataFrame(releases)
releases.set_index(["project","name"], inplace=True)

In [90]:
releases["prefix"] = releases["data"].apply(lambda r: r.prefix)
releases["head_commit"] = releases["data"].apply(lambda r: r.head_commit.hashcode)

releases["time"] = releases["data"].apply(lambda r: r.get_time())
releases["start_development"] = releases["data"].apply(lambda r: r.get_time(of=releasy.START_DEVELOPMENT_TIME))
releases["length"] = releases["data"].apply(lambda r: r.get_length())
# releases.loc[releases["length"] == pd.to_timedelta(0), "length"] = np.NaN


In [91]:
releases.shape

(16822, 6)

In [92]:
def get_prefixes(project):
    prefixes = {}
    for release in project.releases:
        prefix = release.prefix
        if not prefix:
            prefix = "None"
        if prefix not in prefixes:
            prefixes[prefix] = 0
        prefixes[prefix] += 1
    return prefixes

projects["num_prefixes"] = projects["data"].apply(lambda project: len(get_prefixes(project).keys()))
projects["prefixes"] = projects["data"].apply(lambda project: ",".join(get_prefixes(project).keys()))

### show release prefixes

In [97]:
projects.loc["nodejs/node"]

description     Node.js JavaScript runtime :sparkles::turtle::...
stars                                                       66329
url                      https://api.github.com/repos/nodejs/node
git_url                          git://github.com/nodejs/node.git
language                                               JavaScript
created_at                                    2014-11-26 19:57:11
data                                                  nodejs/node
path                               ../../../repos/nodejs/node.git
num_releases                                                  539
num_tags                                                      543
per_releases                                             0.992634
num_prefixes                                                    1
prefixes                                                        v
Name: nodejs/node, dtype: object

In [95]:
release_mine_params

{'angular/angular': {'release_prefixes': ['']},
 'nodejs/node': {'release_prefixes': ['v']},
 'microsoft/vscode': {'release_prefixes': ['', 'v']}}

In [112]:
release_prefixes.keys()

dict_keys(['angular/angular', 'nodejs/node', 'microsoft/vscode'])

In [151]:
projects_to_screen = projects[(projects["num_prefixes"] > 1) & (~projects.index.isin(release_prefixes.keys()))]
print(projects_to_screen.shape[0])
projects_to_screen.sort_values(["name"])["prefixes"]

18


name
FFmpeg/FFmpeg                                                                  v,ffmpeg-,n
PHPMailer/PHPMailer                                                           phpmailer-,v
airbnb/lottie-android                                                              v,v.,vv
angular/angular.js                                                                  v,g3-v
bilibili/ijkplayer                                                                  n,wk,k
dotnet-architecture/eShopOnContainers                                       netcore,None,v
dotnet/roslyn                                        Oss.Scan.,VS.Toolset.Roslyn.,version-
facebook/create-react-app                v,babel-preset-react-app@,create-react-app@,es...
fastlane/fastlane                        None,produce/,watchbuild/,match/,gym/,credenti...
golang/go                                                                     release.r,go
google/guava                                             v,jdk5-backport-v,failureacc

In [149]:
releases.loc["blueimp/jQuery-File-Upload"]

Unnamed: 0_level_0,data,prefix,head_commit,time,start_development,length
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
8.1.0,8.1.0,,442027a0fe8d483687596ee72da83c58ce293f1b,2013-05-09 16:44:06+02:00,2013-05-09 16:44:06+02:00,00:00:00
8.2.1,8.2.1,,86dc08cd16e6753d469c049844a09702733cc3ac,2013-05-16 13:28:32-05:00,2013-05-16 13:28:32-05:00,00:00:00
8.3.0,8.3.0,,377652a472340b12b5404bf78e6aea3630081e57,2013-06-11 20:31:01-05:00,2013-06-11 20:31:01-05:00,00:00:00
8.3.1,8.3.1,,05ed2ef504124aaacf3c7ac601911b7e7b93bbd4,2013-06-12 16:41:36-05:00,2013-06-12 16:41:36-05:00,00:00:00
8.3.2,8.3.2,,6afe968058cd4e3168a9d1f3871f28efff86f03c,2013-06-14 10:05:31-05:00,2013-06-14 10:05:31-05:00,00:00:00
8.4.0,8.4.0,,c3cbd35d5d1e1307fe4614eb2fb01e7d9c32c438,2013-06-26 03:10:14-05:00,2013-06-26 03:10:14-05:00,00:00:00
8.4.1,8.4.1,,1528f15be3e1135b7fcf5c037c576e046688a3d4,2013-06-26 15:18:50-05:00,2013-06-26 15:18:50-05:00,00:00:00
8.4.2,8.4.2,,2b7f48ebee1b1ffd20f4badfacba5d14a75ee380,2013-06-26 19:12:39-05:00,2013-06-26 19:12:39-05:00,00:00:00
8.4.3,8.4.3,,20554c06043259816b397bc0f7eed4632c339930,2013-07-04 10:00:57-05:00,2013-07-04 10:00:57-05:00,00:00:00
8.5.0,8.5.0,,470d73d1bf307afa07bee6b5c25e3719e48f27ef,2013-07-05 11:47:05-05:00,2013-07-05 11:47:05-05:00,00:00:00


In [150]:
release_prefixes = {
    "angular/angular": [None],
    "nodejs/node": ["v"],
    "git/git": ["v"],
    "microsoft/vscode": [None,"v"],
    "ansible/ansible": [None,"v"],
    "antirez/redis": [None,"v"],
    "aspnet/AspNetCore": [None,"v"],
    "bcit-ci/CodeIgniter": [None,"v"],
    "prometheus/prometheus": [None,"v"],
    "psf/requests": [None,"v"],
    "facebook/react": [None,"v"],
    "guzzle/guzzle": [None,"v"],
    "ionic-team/ionic": [None,"v"],
    "Wox-launcher/Wox": [None,"v"],
    "FortAwesome/Font-Awesome": [None,"v"],
    "microsoft/terminal": [None,"v"],
    "blueimp/jQuery-File-Upload": [None,"v"],
    "scrapy/scrapy": ["scrapy-",None],
    "square/okhttp": ["parent-",None],   
    "square/retrofit": ["parent-",None],
    "x64dbg/x64dbg": [None,"V"] # only pre releases ALPHA
}

release_mine_params = {}
for name in release_prefixes:
    release_mine_params[name] = { "release_prefixes": release_prefixes[name] }
