# Ajustando Dados

In [1]:
import pandas as pd
pd.options.mode.chained_assignment = None
# Esse próximo CSV é criado no notebook "Juntando Commits e Trends"
data = pd.read_csv("CSV/all_relations.csv", index_col = 0)
data["commit_date"] = pd.to_datetime(data["commit_date"], format="%Y-%m-%d")

In [2]:
data

Unnamed: 0,commit_date,project,qt_commit,qt_author,relative_interest
12499,2014-07-29,CNTK,1,1,
12505,2014-08-29,CNTK,1,1,
12658,2014-08-31,CNTK,1,1,
6998,2014-09-01,CNTK,1,1,
10009,2014-09-02,CNTK,1,1,
...,...,...,...,...,...
3324,2023-04-01,zaproxy,26,8,7.0
4185,2023-05-01,zaproxy,48,8,7.0
5031,2023-06-01,zaproxy,40,5,5.0
5878,2023-07-01,zaproxy,60,5,4.0


# Visualização da tendência

## Tendência Geral

In [3]:
view = data.copy()
data_last_date = pd.DataFrame()

project_list = view['project'].unique()

for project in project_list:
    filter_view = view[view["project"] == project]
    filter_view['EMA_curto_commit'] = filter_view['qt_commit'].ewm(span=12).mean()
    filter_view['EMA_longo_commit'] = filter_view['qt_commit'].ewm(span=24).mean()
    filter_view['EMA_curto_author'] = filter_view['qt_author'].ewm(span=12).mean()
    filter_view['EMA_longo_author'] = filter_view['qt_author'].ewm(span=24).mean()
    filter_view['EMA_curto_interest'] = filter_view['relative_interest'].ewm(span=12).mean()
    filter_view['EMA_longo_interest'] = filter_view['relative_interest'].ewm(span=24).mean()
    
    data_last_date = pd.concat([data_last_date, filter_view])
    
data_last_date.drop_duplicates(subset='project', keep='last', inplace = True)

In [4]:
import numpy as np
data_last_date["commit"] = np.where((data_last_date.EMA_curto_commit > data_last_date.EMA_longo_commit), "alta", "baixa")
data_last_date["author"] = np.where(data_last_date.EMA_curto_author > data_last_date.EMA_longo_author, "alta", "baixa")
data_last_date["interest"] = np.where(data_last_date.EMA_curto_interest > data_last_date.EMA_longo_interest, "alta", "baixa")
data_last_date = data_last_date[["project", "commit", "author", "interest"]].sort_values(by=['project'])
data_last_date["geral"] = np.where(((data_last_date["commit"] == "alta") & (data_last_date["author"] == "alta")) | ((data_last_date["commit"] == "alta") & (data_last_date["interest"] == "alta")) | ((data_last_date["author"] == "alta") & (data_last_date["interest"] == "alta")), "alta", "baixa")

In [5]:
data_last_date[(data_last_date["commit"] == "baixa") & (data_last_date["author"] == "baixa") & (data_last_date["interest"] == "alta")]

Unnamed: 0,project,commit,author,interest,geral
6672,Chart.js,baixa,baixa,alta,baixa
6673,ITK,baixa,baixa,alta,baixa
3279,brain.js,baixa,baixa,alta,baixa
6679,brakeman,baixa,baixa,alta,baixa
9106,cpprestsdk,baixa,baixa,alta,baixa
6681,cypress,baixa,baixa,alta,baixa
6683,dask,baixa,baixa,alta,baixa
5842,deeplearning4j,baixa,baixa,alta,baixa
6684,django,baixa,baixa,alta,baixa
6689,grape,baixa,baixa,alta,baixa


## Tendência de Commits

In [6]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
import seaborn as sns
from ipywidgets import interact, widgets

data = data.sort_values(by=['project', 'commit_date'])

periodo_curto = 12  # Período curto para EMA
periodo_longo = 24  # Período longo para EMA

def update_plot(project_list, start_date, end_date):
    plt.figure(figsize=(7, 5))
    
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)

    for project in project_list:
        views = data[data["project"] == project]
        views = views[(views["commit_date"] >= start_date) & (views["commit_date"] <= end_date)]
        views["commit_date"] = pd.to_datetime(views["commit_date"], format="%Y-%m-%d")
        views['EMA_curto'] = views['qt_commit'].ewm(span=periodo_curto).mean()
        views['EMA_longo'] = views['qt_commit'].ewm(span=periodo_longo).mean()
        plt.plot(views['commit_date'], views['qt_commit'], label=f'Commits - {project}')
        plt.plot(views['commit_date'], views['EMA_curto'], label=f'MME-{periodo_curto} - {project}')
        plt.plot(views['commit_date'], views['EMA_longo'], label=f'MME-{periodo_longo} - {project}')
        
        if views['EMA_curto'].iloc[-1] > views['EMA_longo'].iloc[-1]:
            print(f'A tendência para o projeto {project} está em alta.')
        else:
            print(f'A tendência para o projeto {project} está em baixa.')
                        
        print(f'EMA_curto = {views["EMA_curto"].iloc[-1]}')
        print(f'EMA_longo = {views["EMA_longo"].iloc[-1]}')
        
    plt.xlabel('Data')
    plt.ylabel('Tendência')
    plt.title('Tendência EMA por Commits')
    plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=4))
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
    plt.xticks(rotation=90)
    plt.legend()
    plt.tight_layout()
    plt.show()

project_options = data['project'].unique()
initial_value = tuple(project_options)
project_selector = widgets.SelectMultiple(
    options=project_options,
    #value=initial_value,
    description='Projetos'
)

# Adicionando widgets para selecionar o intervalo de datas
start_date_picker = widgets.DatePicker(
    value=data['commit_date'].min(),
    description='Data Inicial'
)

end_date_picker = widgets.DatePicker(
    value=data['commit_date'].max(),
    description='Data Final'
)

interact(update_plot, project_list=project_selector, start_date=start_date_picker, end_date=end_date_picker)

interactive(children=(SelectMultiple(description='Projetos', options=('CNTK', 'Catch2', 'Chart.js', 'ITK', 'Py…

<function __main__.update_plot(project_list, start_date, end_date)>

## Tendência de Author

In [7]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
import seaborn as sns
from ipywidgets import interact, widgets

data["commit_date"] = pd.to_datetime(data["commit_date"], format="%Y-%m-%d")
data = data.sort_values(by=['project', 'commit_date'])

periodo_curto = 12  # Período curto para EMA
periodo_longo = 24  # Período longo para EMA

def update_plot(project_list, start_date, end_date):
    plt.figure(figsize=(7, 5))
    
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)

    for project in project_list:
        views = data[data["project"] == project]
        views = views[(views["commit_date"] >= start_date) & (views["commit_date"] <= end_date)]
        views["commit_date"] = pd.to_datetime(views["commit_date"], format="%Y-%m-%d")
        views['EMA_curto'] = views['qt_author'].ewm(span=periodo_curto).mean()
        views['EMA_longo'] = views['qt_author'].ewm(span=periodo_longo).mean()
        plt.plot(views['commit_date'], views['qt_author'], label=f'Authors - {project}')
        plt.plot(views['commit_date'], views['EMA_curto'], label=f'MME-{periodo_curto} - {project}')
        plt.plot(views['commit_date'], views['EMA_longo'], label=f'MME-{periodo_longo} - {project}')
        
        if views['EMA_curto'].iloc[-1] > views['EMA_longo'].iloc[-1]:
            print(f'A tendência para o projeto {project} está em alta.')
        else:
            print(f'A tendência para o projeto {project} está em baixa.')
            
        print(f'EMA_curto = {views["EMA_curto"].iloc[-1]}')
        print(f'EMA_longo = {views["EMA_longo"].iloc[-1]}')
        
    plt.xlabel('Data')
    plt.ylabel('Tendência')
    plt.title('Tendência EMA por Authors')
    plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=4))
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
    plt.xticks(rotation=90)
    plt.legend()
    plt.tight_layout()
    plt.show()

project_options = data['project'].unique()
initial_value = tuple(project_options)
project_selector = widgets.SelectMultiple(
    options=project_options,
    #value=initial_value,
    description='Projetos'
)

# Adicionando widgets para selecionar o intervalo de datas
start_date_picker = widgets.DatePicker(
    value=data['commit_date'].min(),
    description='Data Inicial'
)

end_date_picker = widgets.DatePicker(
    value=data['commit_date'].max(),
    description='Data Final'
)

interact(update_plot, project_list=project_selector, start_date=start_date_picker, end_date=end_date_picker)

interactive(children=(SelectMultiple(description='Projetos', options=('CNTK', 'Catch2', 'Chart.js', 'ITK', 'Py…

<function __main__.update_plot(project_list, start_date, end_date)>

## Tendência de Interesse Relativo

In [8]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
import seaborn as sns
from ipywidgets import interact, widgets

data["commit_date"] = pd.to_datetime(data["commit_date"], format="%Y-%m-%d")
data = data.sort_values(by=['project', 'commit_date'])

periodo_curto = 12  # Período curto para EMA
periodo_longo = 24  # Período longo para EMA

def update_plot(project_list, start_date, end_date):
    plt.figure(figsize=(7, 5))
    
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)

    for project in project_list:
        views = data[data["project"] == project]
        views = views[(views["commit_date"] >= start_date) & (views["commit_date"] <= end_date)]
        views["commit_date"] = pd.to_datetime(views["commit_date"], format="%Y-%m-%d")
        views['EMA_curto'] = views['relative_interest'].ewm(span=periodo_curto).mean()
        views['EMA_longo'] = views['relative_interest'].ewm(span=periodo_longo).mean()
        plt.plot(views['commit_date'], views['relative_interest'], label=f'Interesse - {project}')
        plt.plot(views['commit_date'], views['EMA_curto'], label=f'MME-{periodo_curto} - {project}')
        plt.plot(views['commit_date'], views['EMA_longo'], label=f'MME-{periodo_longo} - {project}')
        
        if views['EMA_curto'].iloc[-1] > views['EMA_longo'].iloc[-1]:
            print(f'A tendência para o projeto {project} está em alta.')
        else:
            print(f'A tendência para o projeto {project} está em baixa.')
            
        print(f'EMA_curto = {views["EMA_curto"].iloc[-1]}')
        print(f'EMA_longo = {views["EMA_longo"].iloc[-1]}')
        
    plt.xlabel('Data')
    plt.ylabel('Tendência')
    plt.title('Tendência EMA por Interesse Relativo')
    plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=4))
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
    plt.xticks(rotation=90)
    plt.legend()
    plt.tight_layout()
    plt.show()

project_options = data['project'].unique()
initial_value = tuple(project_options)
project_selector = widgets.SelectMultiple(
    options=project_options,
    #value=initial_value,
    description='Projetos'
)

# Adicionando widgets para selecionar o intervalo de datas
start_date_picker = widgets.DatePicker(
    value=data['commit_date'].min(),
    description='Data Inicial'
)

end_date_picker = widgets.DatePicker(
    value=data['commit_date'].max(),
    description='Data Final'
)

interact(update_plot, project_list=project_selector, start_date=start_date_picker, end_date=end_date_picker)

interactive(children=(SelectMultiple(description='Projetos', options=('CNTK', 'Catch2', 'Chart.js', 'ITK', 'Py…

<function __main__.update_plot(project_list, start_date, end_date)>