# Lifecycle trend analysis

In [24]:
import pandas as pd
pd.options.mode.chained_assignment = None
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
from ipywidgets import interact, widgets

#settings
chart_width=14
chart_height=7
chart_fontsize=16
chart_width_square=7
chart_height_square=7
chart_fontsize_square=12
ema_short=12
ema_long=24
linestyle_absolute='dotted'
linewidth_ema=3.5

In [2]:
data = pd.read_csv('data/commits-trends.csv', index_col = 0)
data['commit_date'] = pd.to_datetime(data['commit_date'], format='%Y-%m-%d')

In [3]:
commit_project_by_month = data.set_index(['commit_date','project'])
commit_project_by_month = commit_project_by_month.groupby([pd.Grouper(level='project'),
                                                           pd.Grouper(freq='M', level='commit_date')]).sum()
commit_project_by_month.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,qt_commit,qt_author,relative_interest
project,commit_date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Catch2,2010-11-30,27,27,0.0
Catch2,2010-12-31,32,32,0.0
Catch2,2011-01-31,41,41,0.0
Catch2,2011-02-28,25,25,0.0
Catch2,2011-03-31,41,41,0.0


## Lifecycle trend visualization

### Quantitative analysis - list of lifecycle trends

In [4]:
view = data.copy()
last_date = pd.DataFrame()

project_list = view['project'].unique()

for project in project_list:
    filter_view = view[view['project'] == project]
    filter_view['EMA_short_commit'] = filter_view['qt_commit'].ewm(span=ema_short).mean()
    filter_view['EMA_long_commit'] = filter_view['qt_commit'].ewm(span=ema_long).mean()
    filter_view['EMA_short_author'] = filter_view['qt_author'].ewm(span=ema_short).mean()
    filter_view['EMA_long_author'] = filter_view['qt_author'].ewm(span=ema_long).mean()
    filter_view['EMA_short_interest'] = filter_view['relative_interest'].ewm(span=ema_short).mean()
    filter_view['EMA_long_interest'] = filter_view['relative_interest'].ewm(span=ema_long).mean()
    
    last_date = pd.concat([last_date, filter_view])
    
last_date.drop_duplicates(subset='project', keep='last', inplace = True)

In [5]:
last_date['commit'] = np.where((last_date.EMA_short_commit > last_date.EMA_long_commit), 'high', 'low')
last_date['author'] = np.where(last_date.EMA_short_author > last_date.EMA_long_author, 'high', 'low')
last_date['interest'] = np.where(last_date.EMA_short_interest > last_date.EMA_long_interest, 'high', 'low')
last_date = last_date[['project', 'commit', 'author', 'interest']].sort_values(by=['project'])
last_date['trend'] = np.where(((last_date['commit'] == 'high') & (last_date['author'] == 'high')) 
                              | ((last_date['commit'] == 'high') & (last_date['interest'] == 'high')) 
                              | ((last_date['author'] == 'high') & (last_date['interest'] == 'high')), 'high', 'low')

In [6]:
last_date[(last_date['commit'] == 'low') & (last_date['author'] == 'low') & (last_date['interest'] == 'high')]

Unnamed: 0,project,commit,author,interest,trend
824246,brain.js,low,low,high,low
828146,cppcms,low,low,high,low
796264,cpprestsdk,low,low,high,low
845004,cxf,low,low,high,low
844977,cypress,low,low,high,low
844156,d3,low,low,high,low
836618,eve,low,low,high,low
827861,express,low,low,high,low
844919,faces,low,low,high,low
844634,fastapi,low,low,high,low


### Commit trends chart

In [23]:
def plot_commit_trends(project_list, start_date, end_date):
    plt.figure(figsize=(chart_width, chart_height))
    
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)

    for project in project_list:
        trends = commit_project_by_month[commit_project_by_month.index.get_level_values('project') == project]
        trends = trends[(trends.index.get_level_values('commit_date') >= start_date) 
                        & (trends.index.get_level_values('commit_date') <= end_date)]
        trends['ema_short'] = trends['qt_commit'].ewm(span=ema_short).mean()
        trends['ema_long'] = trends['qt_commit'].ewm(span=ema_long).mean()
        plt.plot(trends.index.get_level_values('commit_date'), trends['qt_commit'], label=f'{project}: # commits',linestyle=linestyle_absolute)
        plt.plot(trends.index.get_level_values('commit_date'), trends['ema_short'], label=f'{project}: EMA-{ema_short}', linewidth=linewidth_ema)
        plt.plot(trends.index.get_level_values('commit_date'), trends['ema_long'], label=f'{project}: EMA-{ema_long}', linewidth=linewidth_ema)
        
        if trends['ema_short'].iloc[-1] > trends['ema_long'].iloc[-1]:
            print(f'{project}\'s lifecycle is on a high trend for commits.')
        else:
            print(f'{project}\'s lifecycle is on a low trend for commits.')
                        
        print(f'EMA_short-term = {trends["ema_short"].iloc[-1]}')
        print(f'EMA_long-term = {trends["ema_long"].iloc[-1]}')
        
    #plt.xlabel('Date')
    plt.ylabel('Commits',fontsize=chart_fontsize)
    #plt.title('Commit trends')
    plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=4))
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
    plt.xticks(rotation=90,fontsize=chart_fontsize)
    plt.yticks(fontsize=chart_fontsize)
    plt.legend(fontsize=chart_fontsize)
    plt.tight_layout()
    plt.gca().spines['top'].set_visible(False)
    plt.gca().spines['right'].set_visible(False)
    plt.show()

project_options = data['project'].unique()
project_selector = widgets.SelectMultiple(options=project_options,description='Projects')
start_date_picker = widgets.DatePicker(value=data['commit_date'].min(),description='Start date')
end_date_picker = widgets.DatePicker(value=data['commit_date'].max(),description='End date')

interact(plot_commit_trends, project_list=project_selector, start_date=start_date_picker, end_date=end_date_picker)

interactive(children=(SelectMultiple(description='Projects', options=('Catch2', 'Chart.js', 'ITK', 'PyTables',…

<function __main__.plot_commit_trends(project_list, start_date, end_date)>

### Contributor trends chart

In [26]:
def plot_author_trends(project_list, start_date, end_date):
    plt.figure(figsize=(chart_width, chart_height))
    
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)

    for project in project_list:
        contributors = commit_project_by_month[commit_project_by_month.index.get_level_values('project') == project]
        contributors = contributors[(contributors.index.get_level_values('commit_date') >= start_date) 
                        & (contributors.index.get_level_values('commit_date') <= end_date)]
        contributors['ema_short'] = contributors['qt_author'].ewm(span=ema_short).mean()
        contributors['ema_long'] = contributors['qt_author'].ewm(span=ema_long).mean()
        plt.plot(contributors.index.get_level_values('commit_date'), contributors['qt_author'], label=f'{project}: # contributors',linestyle=linestyle_absolute)
        plt.plot(contributors.index.get_level_values('commit_date'), contributors['ema_short'], label=f'{project}: EMA-{ema_short}', linewidth=linewidth_ema)
        plt.plot(contributors.index.get_level_values('commit_date'), contributors['ema_long'], label=f'{project}: EMA-{ema_long}', linewidth=linewidth_ema)
        
        if contributors['ema_short'].iloc[-1] > contributors['ema_long'].iloc[-1]:
            print(f'{project}\'s lifecycle is on a high trend for authors.')
        else:
            print(f'{project}\'s lifecycle is on a low trend for authors.')
                        
        print(f'EMA_short-term = {contributors["ema_short"].iloc[-1]}')
        print(f'EMA_long-term = {contributors["ema_long"].iloc[-1]}')
        
    #plt.xlabel('Date')
    plt.ylabel('Authors',fontsize=chart_fontsize)
    #plt.title('Author trends')
    plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=4))
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
    plt.xticks(rotation=90,fontsize=chart_fontsize)
    plt.yticks(fontsize=chart_fontsize)
    plt.legend(fontsize=chart_fontsize)
    plt.gca().spines['top'].set_visible(False)
    plt.gca().spines['right'].set_visible(False)
    plt.tight_layout()
    plt.show()

project_options = data['project'].unique()
project_selector = widgets.SelectMultiple(options=project_options,description='Projects')
start_date_picker = widgets.DatePicker(value=data['commit_date'].min(),description='Start date')
end_date_picker = widgets.DatePicker(value=data['commit_date'].max(),description='End date')

interact(plot_author_trends, project_list=project_selector, start_date=start_date_picker, end_date=end_date_picker)

interactive(children=(SelectMultiple(description='Projects', options=('Catch2', 'Chart.js', 'ITK', 'PyTables',…

<function __main__.plot_author_trends(project_list, start_date, end_date)>

### Relative interest trends

In [100]:
def plot_interest_trends(project_list, start_date, end_date):
    plt.figure(figsize=(chart_width, chart_height))
    
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)

    for project in project_list:
        interests = commit_project_by_month[commit_project_by_month.index.get_level_values('project') == project]
        interests = interests[(interests.index.get_level_values('commit_date') >= start_date) 
                        & (interests.index.get_level_values('commit_date') <= end_date)]
        interests['ema_short'] = interests['relative_interest'].ewm(span=ema_short).mean()
        interests['ema_long'] = interests['relative_interest'].ewm(span=ema_long).mean()
        plt.plot(interests.index.get_level_values('commit_date'), interests['relative_interest'], label=f'{project}: # interest',linestyle=linestyle_absolute)
        plt.plot(interests.index.get_level_values('commit_date'), interests['ema_short'], label=f'{project}: EMA-{ema_short}', linewidth=linewidth_ema)
        plt.plot(interests.index.get_level_values('commit_date'), interests['ema_long'], label=f'{project}: EMA-{ema_long}', linewidth=linewidth_ema)
        
        if interests['ema_short'].iloc[-1] > interests['ema_long'].iloc[-1]:
            print(f'{project}\'s lifecycle is on a high trend for relative interest.')
        else:
            print(f'{project}\'s lifecycle is on a low trend for relative interest.')
                        
        print(f'EMA_short-term = {interests["ema_short"].iloc[-1]}')
        print(f'EMA_long-term = {interests["ema_long"].iloc[-1]}')
        
    #plt.xlabel('Date')
    plt.ylabel('Relative interest',fontsize=chart_fontsize)
    #plt.title('Relative interest trends')
    plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=4))
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
    plt.xticks(rotation=90,fontsize=chart_fontsize)
    plt.yticks(fontsize=chart_fontsize)
    plt.legend(fontsize=chart_fontsize)
    plt.gca().spines['top'].set_visible(False)
    plt.gca().spines['right'].set_visible(False)
    plt.tight_layout()
    plt.show()

project_options = data['project'].unique()
project_selector = widgets.SelectMultiple(options=project_options,description='Projects')
start_date_picker = widgets.DatePicker(value=data['commit_date'].min(),description='Start date')
end_date_picker = widgets.DatePicker(value=data['commit_date'].max(),description='End date')

interact(plot_interest_trends, project_list=project_selector, start_date=start_date_picker, end_date=end_date_picker)

interactive(children=(SelectMultiple(description='Projects', options=('Catch2', 'Chart.js', 'ITK', 'PyTables',…

<function __main__.plot_interest_trends(project_list, start_date, end_date)>