### 1- Mine releases from projects

For each project:
* get releases and save into database.

Projects Selection:
 - qtd_stars >= 100
 - forked is false
 - size  > 10000
 - ci_service is null or (ci_service like 'Travis CI' and analysis_point is not null)

In [None]:
repos = getProjects()
i=0
for repo in repos:
    project = repo[0]
    i += 1
    print('\n\n\n{}/{} Project - {}\n\n\n'.format(i,len(repos),project))
    
    query_releases = 'https://api.github.com/repos/{}/releases?sort=created&direction=asc&per_page=100&page='.format(project)
    mineReleases(project,query_releases)
    setProjectReleaseMined(project)

## 2- Classify Releases as major

In [None]:
repos = getProjectsMined()
i=0
for repo in repos:
    project = repo[0]
    i += 1
    print('\n\n\n{}/{} Project - {}\n\n\n'.format(i,len(repos),project))
    
    releases = getReleases(repo[0])
    for release in releases:
        
        if isMajor(release[1]):
            updateMajorRelease(repo[0],release[0])

## 3- Bind issues and PRs to releases

In [None]:
repos = getProjectsMined()
i=0
for repo in repos:
    project = repo[0]
    i += 1
    print('\n\n\n{}/{} Project - {}\n\n\n'.format(i,len(repos),project))

    #issues = getIssues(repo[0])
    releases = getReleases(repo[0])
    
    j = len(releases) -1
    while j >= 0:
        release_creation = releases[j][2]
        release_id = releases[j][3]
        
        updateIssues(project,release_creation,release_id)
        updatePRs(project,release_creation,release_id)
        
        j -= 1

In [None]:
i = getReleases('lerna/lerna')

In [None]:
i

### Aux Functions

In [None]:
import psycopg2
import os
import requests 
import time
from datetime import datetime, timedelta
import pandas as pd
from scipy import stats
import numpy as np
np.seterr(divide='ignore', invalid='ignore')

In [None]:
def connectDB():
    f = open('/home/psql_pwd.txt', "r")
    pwd = f.readline().replace('\n','')
    
    return psycopg2.connect(user = "ci_quality",
                              password = pwd,
                              host = "127.0.0.1",
                              port = "5432",
                              database = "Causal_CI_Quality_v4")

In [None]:
def loadTokens():
    f = open('/home/gh_tokens.txt', "r")
    tokens =[]
    tk = f.readline().replace('\n','')
    while tk != '':
        tokens.append(tk)
        tk = f.readline().replace('\n','')

    f.close()
    return tokens

In [None]:
tokens = loadTokens()
i_token =0


def requestAPI(URL):
    # api-endpoint 
    global i_token
    
    if (len(tokens)-1) > i_token:
        i_token += 1
    else:
        i_token = 0
        
    r = requests.get(url = URL, headers={'Authorization': tokens[i_token],'Accept':'application/vnd.github.cloak-preview'}).json()
    if len(r) > 0:
        try:
            #print('keys: {}'.format(r.keys()))
            
            if isinstance(r, dict) and 'message' in r.keys():
                print('{} -- {}'.format(datetime.now().strftime("%H:%M:%S"),r['message']))
                if 'API rate limit exceeded' in r['message']:
                    time.sleep(600)
                    requestAPI(URL)
                else:
                    return None

            return r
        except Exception as e:
            print('\n Erro no request get: {}'.format(e))
            print(r)
    else:
        return None

In [None]:
def getProjects():
    query = """select repo_name from projects
            where qtd_stars >= 100 and forked is false and size  > 10000
            and (ci_service is null or ci_service like 'Travis CI') and analysis_point is not null)
            and releases_mined is False
            ORDER BY repo_name """
    
    
    connection = connectDB()
    cursor = connection.cursor()
    cursor.execute(query)
    result = cursor.fetchall()
    connection.close()
    return result

In [None]:
def setProjectReleaseMined(repo_name):
    query = """UPDATE  projects 
        set releases_mined = True
        WHERE repo_name like %s"""

    connection = connectDB()
    cursor = connection.cursor()
    cursor.execute(query, [repo_name])
    connection.commit()
    cursor.close()
    connection.close()

In [None]:
def mineReleases(project,url):
    page = 1
    releases= requestAPI(url+str(page))
    
    while releases is not None and len(releases) > 0:
        #processPRs(prs)
        try:
            for r in releases:
                print('+ Project: {}   Release - {}'.format(project, r['id']))
                storeRelease(project,r)
        
        except Exception as e:
            print('\n\n==============================================================')
            print('Error while processing storeRelease >>> Exception: {}'.format(e)) 
            print('Project: {}    Releases - {}\n\n'.format(project, url+str(page)))
        
        page += 1
        releases= requestAPI(url+str(page))

In [None]:
def storeRelease(repo_name, release):
    connection = connectDB()
    cursor = connection.cursor()
    
    query = """INSERT INTO project_releases(
    repo_name,node_id,url,html_url,created_at,body,id,tag_name,name)
    VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s);"""
    
    cursor.execute(query, [repo_name,release['node_id'],release['url'],release['html_url'],release['created_at'],release['body'],
                           release['id'],release['tag_name'],release['name']])
    connection.commit()
    
    cursor.close()
    connection.close()

In [None]:
def getReleases(repo_name):
    connection = connectDB()
    cursor = connection.cursor()
    
    query = """select id,tag_name, created_at, node_id from project_releases
            where repo_name like %s
            ORDER BY created_at asc;"""
    
    cursor.execute(query, [repo_name])
    result = cursor.fetchall()
    connection.close()
    return result

In [None]:
def updateMajorRelease(repo_name,id):
    query = """UPDATE  project_releases 
        set major = True
        WHERE repo_name like %s AND id = %s"""

    connection = connectDB()
    cursor = connection.cursor()
    cursor.execute(query, [repo_name, id])
    connection.commit()
    cursor.close()
    connection.close()

In [None]:
def getProjectsMined():
    query = """select repo_name from projects
            where releases_mined is True
            ORDER BY repo_name """
    
    
    connection = connectDB()
    cursor = connection.cursor()
    cursor.execute(query)
    result = cursor.fetchall()
    connection.close()
    return result

In [None]:
def isMajor(tag):
    parts = tag.split('.')
    number =''
    for p in parts:
        number += ''.join(filter(str.isdigit, p))


    if number == '':
        return False
    elif number[0] == '0':
        number = '1'+number[1:]
        
    #print(number)
    #print(number[0])
    #print(len(number))
    #print(number[0]+('0'*(len(number)-1)))
    #print('result = {}'.format(int(number)/int(number[0]+('0'*(len(number)-1)))))
        
    result = int(number)/int(number[0]+('0'*(len(number)-1)))
    if result == 1:
        return True

    return False

In [None]:
def getIssues(repo_name):
    connection = connectDB()
    cursor = connection.cursor()
    
    query = """select id,created_at from issue
            where repo_name like %s
            ORDER BY created_at """
    
    cursor.execute(query, [repo_name])
    result = cursor.fetchall()
    connection.close()
    return result

In [None]:
def updateIssues(repo_name,dt,node_id):
    query = """select * from bindissuesreleases(%s,%s,%s);"""
    
    print('select * from bindissuesreleases({},{},{});'.format(repo_name, dt,node_id))

    connection = connectDB()
    cursor = connection.cursor()
    #cursor.callproc('bindissuesreleases', (str(repo_name), dt,str(node_id)))
    cursor.execute(query, [repo_name, dt,node_id])
    connection.commit()
    cursor.close()
    connection.close()

In [None]:
def updatePRs(repo_name,dt,node_id):
    query = """select * from bindprsreleases(%s,%s,%s);"""
    
    print('select * from bindprsreleases({},{},{});'.format(repo_name, dt,node_id))

    connection = connectDB()
    cursor = connection.cursor()
    cursor.callproc('bindprsreleases', (repo_name, dt,node_id))
    #cursor.execute(query, [repo_name, dt,node_id])
    connection.commit()
    cursor.close()
    connection.close()