# Generate a log of commits from a Github repository using Python3/Jupyter-notebook

## User variables

One commit link per line

Add comments to the commits by adding `|comment` after the link

In [1]:
logName = "Bosque-UD 2.5"
logVersion = "2"
repoLink = "https://github.com/alvelvis/UD_Portuguese-Bosque"
branch = "workbench"
initialCommit = "02bf943fd9cd28aae35aa481b09d675b5974b196"
finalCommit = "https://github.com/alvelvis/UD_Portuguese-Bosque/commit/25fb8f7323c04a13147290ff133d95b9ed893134"
xpLink = "https://comcorhd.ga/xp/commits-log"
commitsLinks = '''
https://github.com/alvelvis/UD_Portuguese-Bosque/commit/4ef856b77a79071ba9c4c38afc056a5474f29423|<a target="_blank" href="https://docs.google.com/document/d/1Vu90BuIBNYUDkQmeicpoPl9xHMzJaIP0a0Z29U6kjdE/edit">Relatório de correções de features desde 2.3</a>
https://github.com/alvelvis/UD_Portuguese-Bosque/commit/92fc86a54342f27e39a55aba40f49ac8eba3ee9b|<a target="_blank" href="https://docs.google.com/document/d/1GTzaYaDli4xjxshkZ6_NZbvPOD1u5zcGwRmJqyR25qk/edit#heading=h.mp566x6a7wfr">Relatório de pontuação pós-Bosque-UD 2.4</a>
https://github.com/alvelvis/UD_Portuguese-Bosque/commit/0ba5a48c029266a99d30dfcdaf22526c90c42bed|<a target="_blank" href="https://www.overleaf.com/project/5d1dee877ca0ca2fddf340f1">Artigo aceito no JDP 2019 sobre as locuções verbais</a>
https://github.com/alvelvis/UD_Portuguese-Bosque/commit/24b2d0ebbcaf4a6403df974f7df0974d21f23900|<a target="_blank" href="/xp/dep_Elvis/dep_Elvis.html">Relatório de correções de dep desde 2.3</a>
https://github.com/alvelvis/UD_Portuguese-Bosque/commit/5e326dc545a987e90637f8368899242bcf403f4b|Relatório da Tatiana disponível em breve
https://github.com/alvelvis/UD_Portuguese-Bosque/commit/b4538d8926d852c9c65ca98eda19384060ad0f8e|Relatório da Wogue disponível em breve
https://github.com/alvelvis/UD_Portuguese-Bosque/commit/6d78b24815aca182a846cf1839fddfc94a90b2c7|Relatório da Aline disponível em breve
https://github.com/alvelvis/UD_Portuguese-Bosque/commit/69016af720c13248abc486004e6f128100fa5997|Tatiana - revisar
https://github.com/alvelvis/UD_Portuguese-Bosque/commit/e79c69c158349b7bd0c325b4a81017f89734fa3a|Esse commit dependia das revisões de dependência do acl:relcl
https://github.com/alvelvis/UD_Portuguese-Bosque/commit/61b12bf92b4bad6ca50d46b99e3c5b836973f3ec|Relatório da Wogue disponível em breve
https://github.com/alvelvis/UD_Portuguese-Bosque/commit/94902f0f71c6c2a6a82a0ffdb99b6d034a5eefb0|Aline - revisar
https://github.com/alvelvis/UD_Portuguese-Bosque/commit/32e13f3a2c843a3c2bbc57c5b595cbee2d68a47d|Aline - revisar
https://github.com/UniversalDependencies/UD_Portuguese-Bosque/commit/184d4f74a5f6e1d2728fd6a6237f545dd1754545
'''.strip().split("\n")
exceptCommits = '''
Merge branch
https://github.com/alvelvis/UD_Portuguese-Bosque/commit/aabcdfbc0918c123f96ab4523e39c1b0b81b22e7
https://github.com/alvelvis/UD_Portuguese-Bosque/commit/02bf943fd9cd28aae35aa481b09d675b5974b196
https://github.com/alvelvis/UD_Portuguese-Bosque/commit/41aefb40bb5b2460fa4b863a0d6cc84ad957a0fa
https://github.com/alvelvis/UD_Portuguese-Bosque/commit/5471753b4ca3a7b633bc60e952e850acd1b89284
'''.strip().split("\n")

## Libraries

In [2]:
!pip3 install pyquery
from pyquery import PyQuery
from datetime import datetime
import requests
dicionarioIssues = {}



## Function to get the data structure of a commit

In [3]:
def parseUrl(html, comment="", tokens=""):
    if comment == "notCount":
        comment = ""
        tokens = 0
    
    htmlIssue = PyQuery(url=html('.issue-link').attr('href')) if html('.issue-link') else ""
    
    return {
        'commit': html('.commit-title').html().split("<")[0].strip(),
        'issue': {
            'name': htmlIssue('.js-issue-title').html().strip() if html('.issue-link') else "No issue",
            'number': htmlIssue('.gh-header-number').html().split("#")[1].strip() if html('.issue-link') else "0",
            'link': html('.issue-link').attr('href') if html('.issue-link') else "#",
        },
        'tokens': int(html('.toc-diff-stats')('strong')[1].text.split(" addition")[0].split(" deletion")[0].replace(',', '')) if isinstance(tokens, str) else tokens,
        'author': {
            'name': html('.commit-author').html(),
            'avatar': html('.AvatarStack-body')('img').attr('src'),
            'link': f"https://github.com{html('.AvatarStack-body')('a').attr('href')}",
        },
        'link': html.base_url,
        'comment': comment,
        'date': html('relative-time').html(),
        'datetime': html('relative-time').attr('datetime')
    }

## Function to get the commits of the repo from the Github API

In [4]:
def getApiCommits(repoLink, initialCommit, finalCommit):
    foundInitial = False
    foundFinal = False
    commits = []
    for i in range(10):
        json = requests.get(f"https://api.github.com/repos/{repoLink.split('.com/')[1]}/commits?per_page=100&page={i+1}&sha={branch}").json()
        for commit in json:
            if commit.get('sha') == finalCommit:
                foundInitial = True
            if foundInitial:
                commits.append(commit.get('sha'))
            if commit.get('sha') == initialCommit:
                foundFinal = True
                break
        if foundFinal:
            break
    
    return commits

## Normalizing variables

In [5]:
if not '.com/' in repoLink:
    print('Repository link invalid')
    exit()

if repoLink.endswith("/"):
    repoLink = repoLink[:-1]

if "/" in initialCommit:
    initialCommit = initialCommit.rsplit("/", 1)[1]
if "/" in finalCommit:
    finalCommit = finalCommit.rsplit("/", 1)[1]
    
for i, commit in enumerate(commitsLinks):
    if "/" in commit:
        commitsLinks[i] = commit.split("|")[0].rsplit("/", 1)[1] + "|" + commit.split("|")[1] if "|" in commit else commit.split("|")[0].rsplit("/", 1)[1]

for i, commit in enumerate(exceptCommits):
    if "/" in commit:
        exceptCommits[i] = commit.split("|")[0].rsplit("/", 1)[1] + "|" + commit.split("|")[1] if "|" in commit else commit.split("|")[0].rsplit("/", 1)[1]

## Get the list of commits you want to count and normalize the links

In [6]:
listaDeCommits = []
[listaDeCommits.append(x) for x in commitsLinks if not any(y in x for y in listaDeCommits) and not any(y in x for y in exceptCommits)]

apiCommits = getApiCommits(repoLink, initialCommit, finalCommit)
if initialCommit and finalCommit:
    listaDeCommits.extend([x for x in apiCommits if not any(x in y for y in listaDeCommits) and not any(y in x for y in exceptCommits)])

for i, url in enumerate(listaDeCommits):
    if not "/" in url.split("|")[0]:
        listaDeCommits[i] = repoLink + "/commit/" + url
    if url.endswith("/"):
        listaDeCommits[i] = url[:-1]

if not initialCommit:
    initialCommit = listaDeCommits[0]
if not finalCommit:
    finalCommit = listaDeCommits[-1]
if not "/" in initialCommit.split("|")[0]:
    initialCommit = repoLink + "/commit/" + initialCommit
if not "/" in finalCommit.split("|")[0]:
    finalCommit = repoLink + "/commit/" + finalCommit

## Get the HTMLs of the commits links

In [7]:
initialCommit = PyQuery(initialCommit)
finalCommit = PyQuery(finalCommit)
initialCommitDate = parseUrl(initialCommit)['date']
finalCommitDate = parseUrl(finalCommit)['date']

novaListaDeCommits = []
for i, url in enumerate(listaDeCommits):
    comment = ""
    if '|' in url:
        comment = url.split("|", 1)[1]
        url = url.split("|", 1)[0]
    html = PyQuery(url=url)
    if not any(y in html('.commit-title').html().split("<")[0].strip() for y in exceptCommits):
        novaListaDeCommits.append({'html': html, 'comment': comment})
        print(f"[{i+1}/{len(listaDeCommits)}] {html('.commit-title').html().split('<')[0].strip()} {url}")

[1/77] Correções de feats https://github.com/alvelvis/UD_Portuguese-Bosque/commit/4ef856b77a79071ba9c4c38afc056a5474f29423
[2/77] Correções de pontos finais que não apontavam para root https://github.com/alvelvis/UD_Portuguese-Bosque/commit/92fc86a54342f27e39a55aba40f49ac8eba3ee9b
[3/77] Encarando os verbos auxiliares em locuções verbais como MWE auxiliare… https://github.com/alvelvis/UD_Portuguese-Bosque/commit/0ba5a48c029266a99d30dfcdaf22526c90c42bed
[4/77] 400 eliminações de dep https://github.com/alvelvis/UD_Portuguese-Bosque/commit/24b2d0ebbcaf4a6403df974f7df0974d21f23900
[5/77] Correções de MWE é_que https://github.com/alvelvis/UD_Portuguese-Bosque/commit/5e326dc545a987e90637f8368899242bcf403f4b
[6/77] Correções de "tal como" e "como" https://github.com/alvelvis/UD_Portuguese-Bosque/commit/b4538d8926d852c9c65ca98eda19384060ad0f8e
[7/77] Corrigindo estruturas comparativas https://github.com/alvelvis/UD_Portuguese-Bosque/commit/6d78b24815aca182a846cf1839fddfc94a90b2c7
[8/77] Correç

## Parsing Github commit HTMLs

In [8]:
for i, item in enumerate(novaListaDeCommits):
    commit = parseUrl(item['html'], item['comment'])
    if not f"{commit['issue']['name']} #{commit['issue']['number']}" in dicionarioIssues:
        dicionarioIssues[f"{commit['issue']['name']} #{commit['issue']['number']}"] = []
    dicionarioIssues[f"{commit['issue']['name']} #{commit['issue']['number']}"].append(commit)

## Mounting and saving the HTML page

In [9]:
html = "<title>Commits: " + logName + " v" + logVersion + "</title><style>td,th{ padding:8px; } body{ width:45vw; margin:auto; margin-top:10px; }</style>"
html += f"<h1>Commits: <a target='_blank' href='{repoLink}'>{logName}</a></h1><hr>"
html += f"Version {logVersion}"
if initialCommit and finalCommit: html += f" - commits from <a target='_blank' href='{repoLink + '/commit/' + parseUrl(initialCommit)['link'] if not '/' in parseUrl(initialCommit)['link'] else parseUrl(initialCommit)['link']}'>{initialCommitDate}</a> to <a href='{repoLink + '/commit/' + parseUrl(finalCommit)['link'] if not '/' in parseUrl(finalCommit)['link'] else parseUrl(finalCommit)['link']}' target='_blank'>{finalCommitDate}</a>"
html += f"<br><br>Total issues: {len(dicionarioIssues)}"
html += f"<br>Total commits: {sum([len(dicionarioIssues[x]) for x in dicionarioIssues])}"
html += f"<br>Total changes: {sum([sum([y['tokens'] for y in dicionarioIssues[x]]) for x in dicionarioIssues])}"
html += "<br>"
html += "<h3>Issues:</h3>"
html += "<ul>"
html += f'''{"".join(['<li><a href="#' + x + '">' + x + '</a> (' + str(sum([y['tokens'] for y in dicionarioIssues[x]])) + ' changes)</li>' for x in sorted(dicionarioIssues, key=lambda x: int(x.split('#')[1]))])}'''
html += "</ul><br><br>"

for item in sorted(dicionarioIssues, key=lambda x: int(x.split("#")[1])):
    html += f'<table border="1" style="margin:auto; border-collapse: collapse;"><tr><th id="{item}" colspan="42" style="font-size:24px"><center>{item.split("#")[0]} <a target="_blank" href="{repoLink}/issues/{item.split("#")[1]}">#{item.split("#")[1]}</a></center></th></tr><tr><td colspan="42" style="text-align:center">{sum([x["tokens"] for x in dicionarioIssues[item]])} changes</td></tr>'
    html += '<tr><th>Author</th><th>Commit</th><th>Changes</th><th>Comments</th></tr>'
    for commit in sorted(dicionarioIssues[item], key=lambda x: x['datetime']):
        html += '<tr><td><center><a href="{authorLink}" target="_blank"><img title="{authorName}" src="{authorImg}" style="height:40px; width:auto;"></a></center></td><td><a target="_blank" href="{commitLink}">{commitName}</a><br><small>{commitDate}</small></td><td><center>{total}</center></td><td>{comment}</td></tr>'.format(
            authorLink=commit['author']['link'],
            authorImg=commit['author']['avatar'],
            authorName=commit['author']['name'],
            commitName=commit['commit'],
            commitLink=commit['link'],
            total=commit['tokens'],
            comment=commit['comment'],
            commitDate=commit['date'],
        )

    html += '</table><br><a href="#">Back to top</a><br><br><br>'
        
html += f"<hr>Log updated in {datetime.now()}<br><a target='_blank' href='https://github.com/alvelvis/commits-log/'>Fork me on Github</a><br><br>"
with open(logName + "-v" + logVersion + ".html", "w") as f:
    f.write(html)
    
print(f'{xpLink}/{logName}-v{logVersion}.html')

https://comcorhd.ga/xp/commits-log/Bosque-UD 2.5-v2.html
