# Pegando dados

In [24]:
import pandas as pd
# Esse próximo CSV é criado no notebook "Juntando Commits e Trends"
data = pd.read_csv("CSV/all_relations.csv", index_col = 0)

In [25]:
data

Unnamed: 0,commit_date,project,qt_commit,qt_author,relative_interest
12499,2014-07-29,CNTK,1,1,
12505,2014-08-29,CNTK,1,1,
12658,2014-08-31,CNTK,1,1,
6998,2014-09-01,CNTK,1,1,
10009,2014-09-02,CNTK,1,1,
...,...,...,...,...,...
3324,2023-04-01,zaproxy,26,8,7.0
4185,2023-05-01,zaproxy,48,8,7.0
5031,2023-06-01,zaproxy,40,5,5.0
5878,2023-07-01,zaproxy,60,5,4.0


### Suavizando gráfico com média móvel

In [26]:
window_size = 12
data['smoothed_qt_commit'] = data.groupby('project')['qt_commit'].transform(lambda x: x.rolling(window=window_size, min_periods = 2).mean())
data['smoothed_qt_author'] = data.groupby('project')['qt_author'].transform(lambda x: x.rolling(window=window_size, min_periods = 2).mean())
data['smoothed_relative_interest'] = data.groupby('project')['relative_interest'].transform(lambda x: x.rolling(window=window_size, min_periods = 2).mean())

# Visualização

## Commit / Interesse Relativo

### Spearman

In [4]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
import seaborn as sns
from ipywidgets import interact, widgets

correlation_spearman = data.groupby('project')[['smoothed_qt_commit', 'smoothed_relative_interest']].corr(method="spearman").iloc[0::2,-1].reset_index().rename(columns= {"smoothed_relative_interest": "spearman (commit/trends)"}).drop(columns= {"level_1"})

def update_plot(project_list):
    plt.figure(figsize=(10, 5))

    for project in project_list:
        data = correlation_spearman[correlation_spearman["project"] == project]
        sns.scatterplot(x=data['project'], y=data['spearman (commit/trends)'])
        plt.axvline(project, color='gray', linestyle='--', linewidth=0.5)

    plt.xlabel('Projeto')
    plt.ylabel('Correlação de Spearman')
    plt.title('Correlação de Spearman por Commit/Interesse Relativo')
    plt.xticks(rotation=90)
    plt.tight_layout()
    plt.show()

project_options = correlation_spearman['project'].unique()

initial_value = tuple(project_options)

project_selector = widgets.SelectMultiple(
    options=project_options,
    value=initial_value,
    description='Projetos'
)

interact(update_plot, project_list=project_selector)

interactive(children=(SelectMultiple(description='Projetos', index=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, …

<function __main__.update_plot(project_list)>

### Pearson

In [6]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
import seaborn as sns
from ipywidgets import interact, widgets

correlation_pearson = data.groupby('project')[['smoothed_qt_commit', 'smoothed_relative_interest']].corr(method="pearson").iloc[0::2,-1].reset_index().rename(columns= {"smoothed_relative_interest": "pearson (commit/trends)"}).drop(columns= {"level_1"})

def update_plot(project_list):
    plt.figure(figsize=(10, 5))

    for project in project_list:
        data = correlation_pearson[correlation_pearson["project"] == project]
        sns.scatterplot(x=data['project'], y=data['pearson (commit/trends)'])
        plt.axvline(project, color='gray', linestyle='--', linewidth=0.5)

    plt.xlabel('Projeto')
    plt.ylabel('Correlação de Pearson')
    plt.title('Correlação de Pearson por Projeto/Interesse Relativo')
    plt.xticks(rotation=90)
    plt.tight_layout()
    plt.show()

project_options = correlation_pearson['project'].unique()

initial_value = tuple(project_options)

project_selector = widgets.SelectMultiple(
    options=project_options,
    value=initial_value,
    description='Projetos'
)

interact(update_plot, project_list=project_selector)

interactive(children=(SelectMultiple(description='Projetos', index=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, …

<function __main__.update_plot(project_list)>

## Author / Interesse Relativo

### Spearman

In [9]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
import seaborn as sns
from ipywidgets import interact, widgets

correlation_spearman = data.groupby('project')[['smoothed_qt_author', 'smoothed_relative_interest']].corr(method="spearman").iloc[0::2,-1].reset_index().rename(columns= {"smoothed_relative_interest": "spearman (author/trends)"}).drop(columns= {"level_1"})

def update_plot(project_list):
    plt.figure(figsize=(10, 5))

    for project in project_list:
        data = correlation_spearman[correlation_spearman["project"] == project]
        sns.scatterplot(x=data['project'], y=data['spearman (author/trends)'])
        plt.axvline(project, color='gray', linestyle='--', linewidth=0.5)

    plt.xlabel('Projeto')
    plt.ylabel('Correlação de Spearman')
    plt.title('Correlação de Spearman por Author/Interesse Relativo')
    plt.xticks(rotation=90)
    plt.tight_layout()
    plt.show()

project_options = correlation_spearman['project'].unique()

initial_value = tuple(project_options)

project_selector = widgets.SelectMultiple(
    options=project_options,
    value=initial_value,
    description='Projetos'
)

interact(update_plot, project_list=project_selector)

interactive(children=(SelectMultiple(description='Projetos', index=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, …

<function __main__.update_plot(project_list)>

### Pearson

In [10]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
import seaborn as sns
from ipywidgets import interact, widgets

correlation_pearson = data.groupby('project')[['smoothed_qt_author', 'smoothed_relative_interest']].corr(method="pearson").iloc[0::2,-1].reset_index().rename(columns= {"smoothed_relative_interest": "pearson (author/trends)"}).drop(columns= {"level_1"})

def update_plot(project_list):
    plt.figure(figsize=(10, 5))

    for project in project_list:
        data = correlation_pearson[correlation_pearson["project"] == project]
        sns.scatterplot(x=data['project'], y=data['pearson (author/trends)'])
        plt.axvline(project, color='gray', linestyle='--', linewidth=0.5)

    plt.xlabel('Projeto')
    plt.ylabel('Correlação de Pearson')
    plt.title('Correlação de Pearson por Author/Interesse Relativo')
    plt.xticks(rotation=90)
    plt.tight_layout()
    plt.show()

project_options = correlation_pearson['project'].unique()

initial_value = tuple(project_options)

project_selector = widgets.SelectMultiple(
    options=project_options,
    value=initial_value,
    description='Projetos'
)

interact(update_plot, project_list=project_selector)

interactive(children=(SelectMultiple(description='Projetos', index=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, …

<function __main__.update_plot(project_list)>

## Commit / Author

### Spearman

In [11]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
import seaborn as sns
from ipywidgets import interact, widgets

correlation_spearman = data.groupby('project')[['smoothed_qt_commit', 'smoothed_qt_author']].corr(method="spearman").iloc[0::2,-1].reset_index().rename(columns= {"smoothed_qt_author": "spearman (commit/author)"}).drop(columns= {"level_1"})

def update_plot(project_list):
    plt.figure(figsize=(10, 5))

    for project in project_list:
        data = correlation_spearman[correlation_spearman["project"] == project]
        sns.scatterplot(x=data['project'], y=data['spearman (commit/author)'])
        plt.axvline(project, color='gray', linestyle='--', linewidth=0.5)

    plt.xlabel('Projeto')
    plt.ylabel('Correlação de Spearman')
    plt.title('Correlação de Spearman por Commit/Author')
    plt.xticks(rotation=90)
    plt.tight_layout()
    plt.show()

project_options = correlation_spearman['project'].unique()

initial_value = tuple(project_options)

project_selector = widgets.SelectMultiple(
    options=project_options,
    value=initial_value,
    description='Projetos'
)

interact(update_plot, project_list=project_selector)

interactive(children=(SelectMultiple(description='Projetos', index=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, …

<function __main__.update_plot(project_list)>

### Pearson

In [12]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
import seaborn as sns
from ipywidgets import interact, widgets

correlation_pearson = data.groupby('project')[['smoothed_qt_commit', 'smoothed_qt_author']].corr(method="pearson").iloc[0::2,-1].reset_index().rename(columns= {"smoothed_qt_author": "pearson (commit/author)"}).drop(columns= {"level_1"})

def update_plot(project_list):
    plt.figure(figsize=(10, 5))

    for project in project_list:
        data = correlation_pearson[correlation_pearson["project"] == project]
        sns.scatterplot(x=data['project'], y=data['pearson (commit/author)'])
        plt.axvline(project, color='gray', linestyle='--', linewidth=0.5)

    plt.xlabel('Projeto')
    plt.ylabel('Correlação de Pearson')
    plt.title('Correlação de Pearson por Commit/Author')
    plt.xticks(rotation=90)
    plt.tight_layout()
    plt.show()

project_options = correlation_pearson['project'].unique()

initial_value = tuple(project_options)

project_selector = widgets.SelectMultiple(
    options=project_options,
    value=initial_value,
    description='Projetos'
)

interact(update_plot, project_list=project_selector)

interactive(children=(SelectMultiple(description='Projetos', index=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, …

<function __main__.update_plot(project_list)>

In [1]:
import pandas as pd
correlacoes = pd.read_csv("correlacao_geral.csv", index_col = 0)

In [15]:
correlacoes

Unnamed: 0,project,spearman (commit/interest),spearman (author/interest),spearman (commit/author)
0,CNTK,,,0.936
1,Catch2,0.233,0.592,0.493
2,Chart.js,-0.127,0.142,0.670
3,ITK,0.816,0.177,0.468
4,PyTables,0.232,-0.307,0.310
...,...,...,...,...
80,vue,-0.428,-0.113,0.784
81,web2py,0.979,0.482,0.532
82,weka-3.8,,,-0.309
83,wt,-0.130,-0.481,-0.051


In [21]:
len(correlacoes[(correlacoes["spearman (commit/author)"] < -0.5)])

0