In [1]:
import time
import pandas as pd
import plotly.graph_objs as go

from ipywidgets import widgets
from plotly.widgets import GraphWidget
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from IPython.display import display, clear_output, Image

init_notebook_mode(connected=True)

<IPython.core.display.Javascript object>

In [2]:
df = pd.read_csv('diversity_data.csv', sep=';', header=0)
df.dtypes

row_id                int64
project_id            int64
owner_login          object
name                 object
language             object
created_at           object
domain               object
forks                 int64
watchers              int64
total_committers      int64
total_commits         int64
project_age           int64
windows               int64
window_idx            int64
num_commits           int64
num_pull_req          int64
num_comments          int64
num_issues            int64
num_committers        int64
num_team              int64
committers           object
commits              object
team                 object
project_tenures      object
github_tenures       object
commit_tenures       object
genders              object
countries            object
left                 object
stayed               object
joined               object
male                  int64
female                int64
unknown               int64
has_woman              bool
f_known_gender      

In [3]:
selected_df = df.iloc[:,[3,4,9,13,37,38]]
selected_df.dtypes

name                 object
language             object
total_committers      int64
window_idx            int64
blau_gender         float64
blau_country        float64
dtype: object

In [4]:
temp_series = selected_df.groupby(['name','language','window_idx'])

In [5]:
groupedby_name_language = temp_series.apply(lambda x: x)

In [6]:
diversity_overtime_df = selected_df.groupby('window_idx').mean()

In [7]:
popular_language_list = list(groupedby_name_language.sort_values(by='total_committers',ascending=False)['language'].unique())

In [8]:
language = widgets.Dropdown(
    options = popular_language_list,
    value = 'Python',
    description = 'Language:'
)

project_options = ['All']
project_options.extend(list(groupedby_name_language.sort_values(by='total_committers',ascending=False)['name'].loc[groupedby_name_language['language'] == language.value].unique()))

project = widgets.Dropdown(
    options = project_options,
    value = 'All',
    description = 'Project:'
)

g = GraphWidget('https://plot.ly/~cesarsl/3/')
container = widgets.HBox([language,project])

In [9]:
def filter_df(language, project):
    df = None
    if project != 'All':
        df = groupedby_name_language.loc[(groupedby_name_language['language'] == language) & (groupedby_name_language['name'] == project)]        
    else: 
        df = groupedby_name_language.loc[groupedby_name_language['language'] == language].groupby(['language','window_idx']).mean().reset_index()
    return df

temp_df = filter_df('Python','All')

In [10]:
x_values = temp_df.window_idx
y1 = temp_df.blau_gender
y2 = temp_df.blau_country

graph_object = {
    'data': {
        'x': [x_values, x_values],
        'y': [y1, y2],
        'name': ['Blau Gender', 'Blau Country'],
        'line.color': ['rgb(178,18,18)','rgb(9,113,178)'],
        'line.width': 3,
        'mode': 'line'
    },
    'traces': [0,1],
    'layout': {
        'title': 'Diversity by Language and Project',
        'showlegend':  True,
        'xaxis': { 'range': [0,24], 'title': 'Quarter' },
        'yaxis': { 'range': [0,1], 'title': 'Blau Index' }
    }
}

In [11]:
sorted_df = groupedby_name_language.sort_values(by='total_committers',ascending=False)
projects_df = sorted_df.groupby(['name','language','total_committers']).size().reset_index().sort_values(by='total_committers',ascending=False)

In [12]:
def validate():
    if language.value in popular_language_list:
        return True
    else:
        return False
    
def response(change):
    if validate():
        t_df = filter_df(language.value,project.value)
        x_values = t_df.window_idx
        y1 = t_df.blau_gender
        y2 = t_df.blau_country

        if change.new in popular_language_list:
            projects_list = list(projects_df.loc[projects_df['language'] == language.value]['name'])
            project_options = ['All']
            project_options.extend(projects_list)
            project.options = project_options
            project.value = 'All'
        
        g_object = {
            'data': {
                'x': [x_values, x_values],
                'y': [y1, y2],
                'name': ['Blau Gender', 'Blau Country'],
                'line.color': ['rgb(178,18,18)','rgb(9,113,178)'],
                'line.width': 3,
                'mode': 'line'
            },
            'traces': [0,1],
            'layout': {
                'title': 'Diversity by Language and Project',
                'showlegend':  True,
                'xaxis': { 'range': [0,24], 'title': 'Quarter' },
                'yaxis': { 'range': [0,1], 'title': 'Blau Index' }
            }
        }
        
        g.restyle(g_object['data'],indices =  g_object['traces'])
        g.relayout(g_object['layout'])
            
language.observe(response, names='value')
project.observe(response, names='value')

In [13]:
display(container)
display(g)

HBox(children=(Dropdown(description='Language:', options=('Python', 'C', 'CoffeeScript', 'Ruby', 'JavaScript',…

GraphWidget()

In [15]:
trace0 = go.Scatter(
    x = diversity_overtime_df.index,
    y = diversity_overtime_df.blau_gender,
    mode = 'lines',
    name = 'Blau Gender',
    line = {
        'color':  'rgb(178,18,18)',
        'width': 3
    }
)

trace1 = go.Scatter(
    x = diversity_overtime_df.index,
    y = diversity_overtime_df.blau_country,
    mode = 'lines',
    name = 'Blau Country',
    line = {
        'color': 'rgb(9,113,178)',
        'width': 3
    }
)

layout = go.Layout(
    title = 'Diversity Over Time',
    xaxis = {
        'title': 'Quarter'
    },
    yaxis = {
        'title':  'Blau Index'
    }
)

data = go.Data([trace0,trace1])
figure = go.Figure(data = data, layout = layout)

iplot(figure, filename = 'diversity-over-time')