# Google Trends visualization

In [1]:
import pandas as pd
window_size = 12

trends = pd.read_csv("data/trends.csv")
trends['smoothed_relative_interest'] = trends.groupby('project')['relative_interest'].transform(lambda x: x.rolling(window=window_size, min_periods = 2).mean())

In [2]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from ipywidgets import interact, widgets

trends["date"] = pd.to_datetime(trends["date"])

def update_plot(project_list):
    plt.figure(figsize=(10, 5))

    for project in project_list:
        data = trends[trends["project"] == project]
        plt.plot(data['date'], data['smoothed_relative_interest'], linestyle='-', label=project)

    plt.xlabel('Data')
    plt.ylabel('Interesse Relativo')
    plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=4))
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
    plt.title('Interesse Relativo ao longo do Tempo')
    plt.xticks(rotation=90)
    plt.legend()
    plt.tight_layout()
    plt.show()

project_options = trends['project'].unique()

initial_value = tuple(project_options)

project_selector = widgets.SelectMultiple(
    options=project_options,
    value=initial_value,
    description='Projetos'
)

interact(update_plot, project_list=project_selector)


interactive(children=(SelectMultiple(description='Projetos', index=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, …

<function __main__.update_plot(project_list)>

In [6]:
trends[trends['project']=='hanami'].head(20)

Unnamed: 0.1,Unnamed: 0,date,project,relative_interest,smoothed_relative_interest
63,63,2004-01-01,hanami,51.0,
143,143,2004-02-01,hanami,0.0,25.5
223,223,2004-03-01,hanami,0.0,17.0
303,303,2004-04-01,hanami,0.0,12.75
383,383,2004-05-01,hanami,0.0,10.2
463,463,2004-06-01,hanami,53.0,17.333333
543,543,2004-07-01,hanami,0.0,14.857143
623,623,2004-08-01,hanami,0.0,13.0
703,703,2004-09-01,hanami,0.0,11.555556
783,783,2004-10-01,hanami,0.0,10.4


In [36]:
new_trends = trends[trends['project'].isin(['pytorch','tensorflow'])]
new_trends.head()

Unnamed: 0.1,Unnamed: 0,date,project,relative_interest,smoothed_relative_interest
48,48,2004-01-01,pytorch,0.0,
70,70,2004-01-01,tensorflow,0.0,
128,128,2004-02-01,pytorch,4.0,2.0
150,150,2004-02-01,tensorflow,4.0,2.0
208,208,2004-03-01,pytorch,0.0,1.333333


In [38]:
new_trends = new_trends[new_trends['date']>'2015-10-01']
new_trends.head(10)

Unnamed: 0.1,Unnamed: 0,date,project,relative_interest,smoothed_relative_interest
11408,11408,2015-11-01,pytorch,0.0,0.0
11430,11430,2015-11-01,tensorflow,12.0,1.0
11488,11488,2015-12-01,pytorch,0.0,0.0
11510,11510,2015-12-01,tensorflow,6.0,1.5
11568,11568,2016-01-01,pytorch,0.0,0.0
11590,11590,2016-01-01,tensorflow,8.0,2.166667
11648,11648,2016-02-01,pytorch,0.0,0.0
11670,11670,2016-02-01,tensorflow,9.0,2.916667
11728,11728,2016-03-01,pytorch,0.0,0.0
11750,11750,2016-03-01,tensorflow,11.0,3.833333


In [42]:
#settings
chart_width=14
chart_height=14
chart_fontsize =20
ema_short=12
ema_long=24
linestyle_absolute='dotted'
linewidth_ema=3.5

def plot_interest_trends(project_list, start_date, end_date):
    plt.figure(figsize=(chart_width, chart_height))
    
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)

    for project in project_list:
        interests = new_trends[new_trends["project"] == project]
        #interests = commit_project_by_month[commit_project_by_month.index.get_level_values('project') == project]
        interests = interests[(interests['date'] >= start_date) 
                        & (interests['date'] <= end_date)]
        interests['ema_short'] = interests['relative_interest'].ewm(span=ema_short).mean()
        interests['ema_long'] = interests['relative_interest'].ewm(span=ema_long).mean()
        plt.plot(interests['date'], interests['relative_interest'], label=f'{project}: # interest',linestyle=linestyle_absolute)
        plt.plot(interests['date'], interests['ema_short'], label=f'{project}: EMA-{ema_short}', linewidth=linewidth_ema)
        plt.plot(interests['date'], interests['ema_long'], label=f'{project}: EMA-{ema_long}', linewidth=linewidth_ema)
        
        if interests['ema_short'].iloc[-1] > interests['ema_long'].iloc[-1]:
            print(f'{project}\'s lifecycle is on a high trend for relative interest.')
        else:
            print(f'{project}\'s lifecycle is on a low trend for relative interest.')
                        
        print(f'EMA_short-term = {interests["ema_short"].iloc[-1]}')
        print(f'EMA_long-term = {interests["ema_long"].iloc[-1]}')
        
    #plt.xlabel('Date')
    plt.ylabel('Relative interest',fontsize=chart_fontsize)
    #plt.title('Relative interest trends')
    plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=4))
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
    plt.xticks(rotation=90,fontsize=chart_fontsize)
    plt.yticks(fontsize=chart_fontsize)
    plt.legend(fontsize=chart_fontsize)
    plt.gca().spines['top'].set_visible(False)
    plt.gca().spines['right'].set_visible(False)
    plt.tight_layout()
    plt.show()

project_options = trends['project'].unique()
project_selector = widgets.SelectMultiple(options=project_options,description='Projects')
start_date_picker = widgets.DatePicker(value=trends['date'].min(),description='Start date')
end_date_picker = widgets.DatePicker(value=trends['date'].max(),description='End date')

interact(plot_interest_trends, project_list=project_selector, start_date=start_date_picker, end_date=end_date_picker)

interactive(children=(SelectMultiple(description='Projects', options=('mlpack', 'robotframework', 'openssl', '…

<function __main__.plot_interest_trends(project_list, start_date, end_date)>