# Commits and collaborators

In [1]:
import pandas as pd
data = pd.read_csv("data/commits-trends.csv", index_col = 0)

In [2]:
data

Unnamed: 0,commit_date,project,qt_commit,qt_author,relative_interest
112921,2010-11-09,Catch2,1,1,
112925,2010-11-10,Catch2,1,1,
112926,2010-11-10,Catch2,1,1,
112927,2010-11-10,Catch2,1,1,
112960,2010-11-10,Catch2,1,1,
...,...,...,...,...,...
843980,2023-08-23,zaproxy,1,1,
844403,2023-08-25,zaproxy,1,1,
844432,2023-08-25,zaproxy,1,1,
844692,2023-08-28,zaproxy,1,1,


In [3]:
window_size = 12
data['smoothed_qt_commit'] = data.groupby('project')['qt_commit'].transform(lambda x: x.rolling(window=window_size, min_periods = 2).mean())
data['smoothed_qt_author'] = data.groupby('project')['qt_author'].transform(lambda x: x.rolling(window=window_size, min_periods = 2).mean())

In [4]:
data

Unnamed: 0,commit_date,project,qt_commit,qt_author,relative_interest,smoothed_qt_commit,smoothed_qt_author
112921,2010-11-09,Catch2,1,1,,,
112925,2010-11-10,Catch2,1,1,,1.0,1.0
112926,2010-11-10,Catch2,1,1,,1.0,1.0
112927,2010-11-10,Catch2,1,1,,1.0,1.0
112960,2010-11-10,Catch2,1,1,,1.0,1.0
...,...,...,...,...,...,...,...
843980,2023-08-23,zaproxy,1,1,,1.0,1.0
844403,2023-08-25,zaproxy,1,1,,1.0,1.0
844432,2023-08-25,zaproxy,1,1,,1.0,1.0
844692,2023-08-28,zaproxy,1,1,,1.0,1.0


# Visualização commits/authors por tempo

In [4]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
from ipywidgets import interact, widgets

data["commit_date"] = pd.to_datetime(data["commit_date"], format="%Y-%m-%d")
data = data.sort_values(by=['project', 'commit_date'])

# Função para atualizar o gráfico com base nos projetos selecionados.
def update_plot(project_list):
    plt.figure(figsize=(10, 5))

    for project in project_list:
        view = data[data["project"] == project]
        plt.plot(view['commit_date'], view['smoothed_qt_commit'], linestyle='-', label=f"{project} commits")
        plt.plot(view['commit_date'], view['smoothed_qt_author'], linestyle='--', label=f"{project} authors")

    plt.xlabel('Data')
    plt.ylabel('Quantidade')
    plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=4))
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
    plt.title('Quantidade de Commits/Authors ao longo do Tempo')
    plt.xticks(rotation=90)
    plt.legend()
    plt.tight_layout()
    plt.show()

# Lista de todos os projetos disponíveis.
project_options = data['project'].unique()

# Converte o numpy.ndarray em uma tupla.
initial_value = tuple(project_options)

# Cria um controle de seleção múltipla para escolher os projetos.
project_selector = widgets.SelectMultiple(
    options=project_options,
    value=initial_value,
    description='Projetos'
)

# Cria um controle interativo para atualizar o gráfico.
interact(update_plot, project_list=project_selector)


interactive(children=(SelectMultiple(description='Projetos', index=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, …

<function __main__.update_plot(project_list)>

In [None]:
def plot_author_trends(project_list, start_date, end_date):
    plt.figure(figsize=(chart_width, chart_height))
    
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)

    for project in project_list:
        contributors = commit_project_by_month[commit_project_by_month.index.get_level_values('project') == project]
        contributors = contributors[(contributors.index.get_level_values('commit_date') >= start_date) 
                        & (contributors.index.get_level_values('commit_date') <= end_date)]
        contributors['ema_short'] = contributors['qt_author'].ewm(span=ema_short).mean()
        contributors['ema_long'] = contributors['qt_author'].ewm(span=ema_long).mean()
        plt.plot(contributors.index.get_level_values('commit_date'), contributors['qt_author'], label=f'{project}: # contributors',linestyle=linestyle_absolute)
        plt.plot(contributors.index.get_level_values('commit_date'), contributors['ema_short'], label=f'{project}: EMA-{ema_short}', linewidth=linewidth_ema)
        plt.plot(contributors.index.get_level_values('commit_date'), contributors['ema_long'], label=f'{project}: EMA-{ema_long}', linewidth=linewidth_ema)
        
        if contributors['ema_short'].iloc[-1] > contributors['ema_long'].iloc[-1]:
            print(f'{project}\'s lifecycle is on a high trend for authors.')
        else:
            print(f'{project}\'s lifecycle is on a low trend for authors.')
                        
        print(f'EMA_short-term = {contributors["ema_short"].iloc[-1]}')
        print(f'EMA_long-term = {contributors["ema_long"].iloc[-1]}')
        
    #plt.xlabel('Date')
    plt.ylabel('Authors',fontsize=chart_fontsize)
    #plt.title('Author trends')
    plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=4))
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
    plt.xticks(rotation=90,fontsize=chart_fontsize)
    plt.yticks(fontsize=chart_fontsize)
    plt.legend(fontsize=chart_fontsize)
    plt.gca().spines['top'].set_visible(False)
    plt.gca().spines['right'].set_visible(False)
    plt.tight_layout()
    plt.show()

project_options = data['project'].unique()
project_selector = widgets.SelectMultiple(options=project_options,description='Projects')
start_date_picker = widgets.DatePicker(value=data['commit_date'].min(),description='Start date')
end_date_picker = widgets.DatePicker(value=data['commit_date'].max(),description='End date')

interact(plot_author_trends, project_list=project_selector, start_date=start_date_picker, end_date=end_date_picker)