# Modificando Dados

In [1]:
import pandas as pd
data = pd.read_csv("CSV/all_relations.csv", index_col = 0)

In [2]:
data

Unnamed: 0,commit_date,project,qt_commit,qt_author,relative_interest
8359,2010-11-01,Catch2,27,1,0.0
9168,2010-12-01,Catch2,32,2,0.0
111,2011-01-01,Catch2,41,1,0.0
928,2011-02-01,Catch2,25,2,0.0
1744,2011-03-01,Catch2,41,2,0.0
...,...,...,...,...,...
3287,2023-04-01,zaproxy,26,8,6.0
4138,2023-05-01,zaproxy,48,8,6.0
4979,2023-06-01,zaproxy,40,5,3.0
5815,2023-07-01,zaproxy,60,5,5.0


In [3]:
window_size = 12
data['smoothed_qt_commit'] = data.groupby('project')['qt_commit'].transform(lambda x: x.rolling(window=window_size, min_periods = 2).mean())
data['smoothed_qt_author'] = data.groupby('project')['qt_author'].transform(lambda x: x.rolling(window=window_size, min_periods = 2).mean())

In [4]:
data

Unnamed: 0,commit_date,project,qt_commit,qt_author,relative_interest,smoothed_qt_commit,smoothed_qt_author
8359,2010-11-01,Catch2,27,1,0.0,,
9168,2010-12-01,Catch2,32,2,0.0,29.500000,1.500000
111,2011-01-01,Catch2,41,1,0.0,33.333333,1.333333
928,2011-02-01,Catch2,25,2,0.0,31.250000,1.500000
1744,2011-03-01,Catch2,41,2,0.0,33.200000,1.600000
...,...,...,...,...,...,...,...
3287,2023-04-01,zaproxy,26,8,6.0,43.583333,8.750000
4138,2023-05-01,zaproxy,48,8,6.0,42.416667,8.666667
4979,2023-06-01,zaproxy,40,5,3.0,41.583333,8.333333
5815,2023-07-01,zaproxy,60,5,5.0,45.166667,8.166667


# Visualização commits/authors por tempo

In [5]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
from ipywidgets import interact, widgets

data["commit_date"] = pd.to_datetime(data["commit_date"], format="%Y-%m-%d")
data = data.sort_values(by=['project', 'commit_date'])

# Função para atualizar o gráfico com base nos projetos selecionados.
def update_plot(project_list):
    plt.figure(figsize=(10, 5))

    for project in project_list:
        view = data[data["project"] == project]
        plt.plot(view['commit_date'], view['smoothed_qt_commit'], linestyle='-', label=f"{project} commits")
        plt.plot(view['commit_date'], view['smoothed_qt_author'], linestyle='--', label=f"{project} authors")

    plt.xlabel('Data')
    plt.ylabel('Quantidade')
    plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=4))
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
    plt.title('Quantidade de Commits/Authors ao longo do Tempo')
    plt.xticks(rotation=90)
    plt.legend()
    plt.tight_layout()
    plt.show()

# Lista de todos os projetos disponíveis.
project_options = data['project'].unique()

# Converte o numpy.ndarray em uma tupla.
initial_value = tuple(project_options)

# Cria um controle de seleção múltipla para escolher os projetos.
project_selector = widgets.SelectMultiple(
    options=project_options,
    value=initial_value,
    description='Projetos'
)

# Cria um controle interativo para atualizar o gráfico.
interact(update_plot, project_list=project_selector)


interactive(children=(SelectMultiple(description='Projetos', index=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, …

<function __main__.update_plot(project_list)>