# Visualization of EventBrite data: bar charts

In [None]:
# import packages

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv("../data/eventbrite.csv")

In [None]:
df.head()

In [None]:
df.info()

## Bar charts

#### Bar chart function

In [None]:
# Define function for bar plot

def barplot(df, x_column, y_column):
    sns.set()
    sns.set_style('white')
    f, ax = plt.subplots(figsize=(8,6))
    sns.barplot(x=x_column, y=y_column, data=df, color='deepskyblue')
    ax.tick_params(axis='both', which='major', labelsize=14)
    plt.xlabel(x_column, fontsize=16)
    plt.ylabel(y_column, fontsize=16)
    plt.suptitle(y_column + ' by ' + x_column, fontsize=18)
    sns.despine()
    
    return plt.show()


#### Horizontal bar chart function

In [None]:
# Define function for horizontal bar plot

def barplot_horiz(df, x_column, y_column):
    sns.set()
    sns.set_style('white')
    f, ax = plt.subplots(figsize=(8,6))
    sns.barplot(x=x_column, y=y_column, data=df, color='deepskyblue')
    ax.tick_params(axis='both', which='major', labelsize=14)
    plt.xlabel(x_column, fontsize=16)
    plt.ylabel("", fontsize=16)
    plt.suptitle(x_column + ' by ' + y_column, fontsize=18)
    sns.despine()
    
    return plt.show()


#### Participants by year

In [None]:
# filter out escience employees
df_external = df[df['affiliation'] != 'ESCIENCE CENTER']
df_external['affiliation'].value_counts()

In [None]:
# Create df with participants by year

df_participants_by_year = df_external.groupby('year').agg({'order_id': 'count'}).reset_index()
df_participants_by_year.rename(columns = {'order_id': 'participants'}, inplace = True)
df_participants_by_year.head()

In [None]:
# plot participants by year
barplot(df_participants_by_year, 'year', 'participants')

#### Digital skills: participants by year 

In [None]:
# select digital skills events only

# only escience events
df_escience = df_external[(df_external['org_id'] == 'NLeSc') | (df_external['org_id'] == 'ePlan')]

# filter out the escience symposia
df_digitalskills = df_escience[df_escience['event'].str.contains('Symposium') == False]

df_digitalskills['org_id'].value_counts()

In [None]:
# Create df with NL-RSE participants by year

df_digitalskills_participants_by_year = df_digitalskills.groupby('year').agg({'org_id': 'count'}).reset_index()
df_digitalskills_participants_by_year.rename(columns = {'org_id': 'participants'}, inplace = True)
df_digitalskills_participants_by_year.head()


In [None]:
# plot participants by year for digital skills events

barplot(df_digitalskills_participants_by_year, 'year', 'participants')

#### NL-RSE: participants by year 

In [None]:
# only NL-RSE events
df_nlrse = df_external[df_external['org_id'] == 'NL-RSE']
df_nlrse['org_id'].value_counts()

In [None]:
# Create df with NL-RSE participants by year
df_nlrse_participants_by_year = df_nlrse.groupby('year').agg({'order_id': 'count'}).reset_index()
df_nlrse_participants_by_year.rename(columns = {'order_id': 'participants'}, inplace = True)
df_nlrse_participants_by_year.head()

In [None]:
# plot participants by year for NL-RSE events

barplot(df_nlrse_participants_by_year, 'year', 'participants')

In [None]:
# adjust column width

sns.set()
sns.set_style('white')
f, ax = plt.subplots(figsize=(8,6))
sns.barplot(x='year', y='participants', data=df_nlrse_participants_by_year, color='deepskyblue')
ax.tick_params(axis='both', which='major', labelsize=14)
plt.xlabel('year', fontsize=16)
plt.ylabel('participants', fontsize=16)
plt.suptitle('participants by year', fontsize=18)
sns.despine()

def change_width(ax, new_value) :
    for patch in ax.patches :
        current_width = patch.get_width()
        diff = current_width - new_value

        # change the bar width
        patch.set_width(new_value)

        # recenter the bar
        patch.set_x(patch.get_x() + diff * .5)

change_width(ax, .40)
plt.show()

#### Participants by affiliation type

In [None]:
# Create df with participants by affiliation type

df_participants_by_affl = pd.DataFrame(df['affiliation_type'].value_counts())
df_participants_by_affl.reset_index (inplace = True)
df_participants_by_affl.rename(columns = {'index': 'affiliation type', 'affiliation_type': 'participants'}, inplace = True)
df_participants_by_affl.head()

In [None]:
# Create horizontal bar chart 

barplot_horiz(df_participants_by_affl, 'participants', 'affiliation type')

In [None]:
# Excluding participants from eScience Center
# Create df with participants by affiliation type

df_participants_by_affl = pd.DataFrame(df_external['affiliation_type'].value_counts())
df_participants_by_affl.reset_index (inplace = True)
df_participants_by_affl.rename(columns = {'index': 'affiliation type', 'affiliation_type': 'participants'}, inplace = True)
df_participants_by_affl.head()

In [None]:
# Create horizontal bar chart 

barplot_horiz(df_participants_by_affl, 'participants', 'affiliation type')

#### Participants by disciplinary section

In [None]:
# Create df with participants by disciplinary section
# quite a lot of missing data but that's because of the eventbrite forms of some of the events

df_participants_by_section = pd.DataFrame(df['NLeScdis'].value_counts())
df_participants_by_section.reset_index (inplace = True)
df_participants_by_section.rename(columns = {'index': 'disciplinary section', 'NLeScdis': 'participants'}, inplace = True)
df_participants_by_section.head()

In [None]:
df_participants_by_section['disciplinary section'].replace("Ph&B","NSE", inplace = True)
df_participants_by_section.head()

In [None]:
# create bar chart with adjusted column width

# adjust column width

sns.set()
sns.set_style('white')
f, ax = plt.subplots(figsize=(8,6))
sns.barplot(x='disciplinary section', y='participants', data=df_participants_by_section, color='deepskyblue')

#ax.set_xticklabels(df_participants_by_section['disciplinary section'], rotation=40, ha='right')
ax.tick_params(axis='both', which='major', labelsize=14)
plt.xlabel('disciplinary section', fontsize=16)
plt.ylabel('participants', fontsize=16)
plt.suptitle('participants by disciplinary section', fontsize=18)
sns.despine()

change_width(ax, .50)

plt.show()


### <span style='background:yellow'> Seems strange that Sust & Env is so low, classification issue? </span>

### <span style='background:yellow'> TO DO: Figure out how to display/save high-res images </span>
