In [1]:
import pandas as pd
import numpy as np
import matplotlib
from IPython.core.display import display, HTML
from bokeh.charts import Bar, Donut, Histogram
from bokeh.plotting import gridplot, output_notebook, show
from bokeh.mpl import to_bokeh
output_notebook()
%matplotlib inline

In [2]:
def random_age_in_range(age_range):
    if age_range == 'Less than 20 years old':
        return np.random.randint(15, high=19)
    elif age_range == '20 to 30 years old':
        return np.random.randint(20, high=30)
    elif age_range == '31 to 40 years old':
        return np.random.randint(31, high=40)
    elif age_range == '41 to 50 years old':
        return np.random.randint(41, high=50)
    elif age_range == '51 to 60 years old':
        return np.random.randint(51, high=60)

def attendee_of_label(meetup_rate, pyday_rate):
    if meetup_rate < 0.0:
        return 'PyDay'
    elif pyday_rate < 0.0:
        return 'Meetup'
    return 'Both'

def get_ratings(columns):
    values = []
    sources = []
    for column in columns:
        new_values = list(attendees_data[column])
        for v in new_values:
            sources.append(column)
        values.extend(new_values)
    return pd.DataFrame({'values': values, 'source': sources})

In [3]:
attendees_data = pd.read_csv('data/PyDay BCN 2016 attendee-report.csv')

In [4]:
attendees_data.columns = [
    '#',
    'age_range',
    'gender',
    'food',
    'Other',
    'language',
    'occupation',
    'meetup_rate',
    'meetup_length',
    'meetup_depth',
    'tensorflow_talk',
    'accessibility_talk',
    'citybikes_talk',
    'meetup_venue',
    'meetup_refreshment',
    'pyday_rate',
    'pyday_length',
    'pyday_depth',
    'intro_python',
    'intro_jupyter',
    'intro_django',
    'testing',
    'zappa',
    'asyncio',
    'mongodb',
    'pyday_venue',
    'pyday_lunch',
    'web',
    'subjects',
    'speakers',
    'heared',
    'Other.1',
    'comment',
    'start',
    'submit',
    'network',
]
attendees_data = attendees_data.drop(attendees_data[lambda df: df.meetup_rate.isnull()][lambda df: df.pyday_rate.isnull()].index)
attendees_data.heared = attendees_data[['heared', 'Other.1']].fillna('').sum(axis=1)
attendees_data.food = attendees_data[['food', 'Other']].fillna('').sum(axis=1)
meetup_rating = round(attendees_data.meetup_rate.mean(), 2)
pyday_rating = round(attendees_data.pyday_rate.mean(), 2)
attendees_data['meetup_rate_f'] = attendees_data.meetup_rate
attendees_data.meetup_rate_f = attendees_data.meetup_rate_f.fillna(-1)
attendees_data['meetup_length_f'] = attendees_data.meetup_length
attendees_data.meetup_length_f = attendees_data.meetup_length.fillna(-1)
attendees_data['meetup_depth_f'] = attendees_data.meetup_depth
attendees_data.meetup_depth_f = attendees_data.meetup_depth.fillna(-1)
attendees_data['pyday_rate_f'] = attendees_data.pyday_rate
attendees_data.pyday_rate_f = attendees_data.pyday_rate.fillna(-1)
attendees_data['pyday_length_f'] = attendees_data.pyday_length
attendees_data.pyday_length_f = attendees_data.pyday_length.fillna(-1)
attendees_data['pyday_depth_f'] = attendees_data.pyday_depth
attendees_data.pyday_depth_f = attendees_data.pyday_depth.fillna(-1)
del attendees_data['#']
del attendees_data['start']
del attendees_data['submit']
del attendees_data['network']
del attendees_data['Other']
del attendees_data['Other.1']
attendees_data = attendees_data.assign(age = 0)
attendees_data = attendees_data.assign(attendee_of = 'None')
attendees_data_length = len(attendees_data['age'])

In [5]:
import warnings
warnings.filterwarnings('ignore')
for index in attendees_data.index:
    attendees_data.age[index] = random_age_in_range(attendees_data.age_range[index])
    attendees_data.attendee_of[index] = attendee_of_label(
        attendees_data.meetup_rate_f[index],
        attendees_data.pyday_rate_f[index]
    )
    if attendees_data.attendee_of[index] == 'PyDay':
        attendees_data.meetup_rate_f[index] = -1.0
        attendees_data.meetup_length_f[index] = -1.0
        attendees_data.meetup_depth_f[index] = -1.0
        if attendees_data.pyday_length_f[index] == -1.0:
            attendees_data.pyday_length_f[index] = 0.0
        if attendees_data.pyday_depth_f[index] == -1.0:
            attendees_data.pyday_depth_f[index] = 0.0
    elif attendees_data.attendee_of[index] == 'Meetup':
        attendees_data.pyday_rate_f[index] = -1.0
        attendees_data.pyday_length_f[index] = -1.0
        attendees_data.pyday_depth_f[index] = -1.0
        if attendees_data.meetup_length_f[index] == -1.0:
            attendees_data.meetup_length_f[index] = 0.0
        if attendees_data.meetup_depth_f[index] == -1.0:
            attendees_data.meetup_depth_f[index] = 0.0
    else:
        if attendees_data.meetup_length_f[index] == -1.0:
            attendees_data.meetup_length_f[index] = 0.0
        if attendees_data.meetup_depth_f[index] == -1.0:
            attendees_data.meetup_depth_f[index] = 0.0
        if attendees_data.pyday_length_f[index] == -1.0:
            attendees_data.pyday_length_f[index] = 0.0
        if attendees_data.pyday_depth_f[index] == -1.0:
            attendees_data.pyday_depth_f[index] = 0.0
import warnings
warnings.filterwarnings('default')

In [6]:
import warnings
warnings.filterwarnings('ignore')
age_range = Donut(
    attendees_data,
    label='age_range',
    title='Attendees by age range',
)
gender = Donut(
    attendees_data,
    label='gender',
    title='Attendees by gender',
)
food = Donut(
    attendees_data,
    label='food',
    title='Attendees by food preference',
)
language = Donut(
    attendees_data,
    label='language',
    title='Attendees by language preference',
)
occupation = Donut(
    attendees_data,
    label='occupation',
    title='Attendees by occupation',
)
import warnings
warnings.filterwarnings('default')

In [7]:
average_attendee_age = np.ceil(attendees_data.age.mean())
females = attendees_data.gender.value_counts('gender')['Female'] * 100
non_binary = attendees_data.gender.value_counts('gender')['Non-binary'] * 100
males = attendees_data.gender.value_counts('gender')['Male'] * 100
non_male_percentage = round(
    females + non_binary,
    2,
)

In [8]:
display(HTML('<h1>Demographics on polled attendees</h1>'))
graphs = gridplot([[age_range, gender, food,], [language, occupation, None],])
show(graphs)
display(
    HTML(
        """
<table>
  <tr>
    <td colspan="2">
      <h1>Highlighted Demographic Statistics</h1>
    </td>
  </tr>
  <tr>
    <td>
      <h2>Polled attendees</h2>
    </td>
    <td>
      <h2>{}</h2>
    </td>
  </tr>
  <tr>
    <td>
      <h2>Average age</h2>
    </td>
    <td>
      <h2>{} years old</h2>
    </td>
  </tr>
  <tr>
    <td>
      <h2>Non-binary and female</h2>
    </td>
    <td>
      <h2>{} %</h2>
    </td>
  </tr>
</table>
""".format(
            attendees_data_length,
            average_attendee_age,
            non_male_percentage,
        )
    )
)

0,1
Highlighted Demographic Statistics,Highlighted Demographic Statistics
Polled attendees,66
Average age,31.0 years old
Non-binary and female,19.7 %


In [9]:
meetup_audience = attendees_data.attendee_of.value_counts('attendee_of')['Meetup']
meetup_audience += attendees_data.attendee_of.value_counts('attendee_of')['Both']
meetup_audience *= 100
meetup_percentage = round(
    meetup_audience,
    2,
)
pyday_audience = attendees_data.attendee_of.value_counts('attendee_of')['PyDay']
pyday_audience += attendees_data.attendee_of.value_counts('attendee_of')['Both']
pyday_audience *= 100
pyday_percentage = round(
    pyday_audience,
    2,
)
event_ratings = get_ratings(['meetup_rate', 'pyday_rate'])
length_ratings = get_ratings(['meetup_length', 'pyday_length'])
depth_ratings = get_ratings(['meetup_depth', 'pyday_depth'])
venue_ratings = get_ratings(['meetup_venue', 'pyday_venue'])
food_ratings = get_ratings(['meetup_refreshment', 'pyday_lunch'])
talk_ratings = get_ratings(['tensorflow_talk','accessibility_talk','citybikes_talk',])
beginners_ratings = get_ratings(['intro_python', 'intro_jupyter', 'intro_django',])
initiated_ratings = get_ratings(['testing', 'zappa', 'asyncio', 'mongodb',])

In [10]:
audience_per_event = Donut(
    attendees_data,
    label='attendee_of',
    title='Audience by event',
)
event_evaluation = Bar(
    event_ratings,
    'values',
    group='source',
    title='Event Evaluation',
    plot_width=400,
    plot_height=400,
)
length_evaluation = Bar(
    length_ratings,
    'values',
    group='source',
    title='Length Evaluation',
    plot_width=400,
    plot_height=400,
)
length_evaluation.legend.location="top_right"
depth_evaluation = Bar(
    depth_ratings,
    'values',
    group='source',
    title='Depth Evaluation',
    plot_width=400,
    plot_height=400,
)
depth_evaluation.legend.location="top_right"
venue_evaluation = Bar(
    venue_ratings,
    'values',
    group='source',
    title='Venue Evaluation',
    plot_width=400,
    plot_height=400,
)
food_evaluation = Bar(
    food_ratings,
    'values',
    group='source',
    title='Food Evaluation',
    plot_width=400,
    plot_height=400,
)
talk_evaluation = Bar(
    talk_ratings,
    'values',
    group='source',
    title='Evaluation by talk',
    plot_width=400,
    plot_height=400,
)
web_evaluation = Bar(
    attendees_data,
    'web',
    title='Evaluation on web',
    plot_width=400,
    plot_height=400,
)
beginners_evaluation = Bar(
    beginners_ratings,
    'values',
    group='source',
    title='Evaluation by beginners workshops',
    plot_width=400,
    plot_height=400,
)
initiated_evaluation = Bar(
    initiated_ratings,
    'values',
    group='source',
    title='Evaluation by initiated workshops',
    plot_width=400,
    plot_height=400,
)
heared_sources = Bar(
    attendees_data,
    'heared',
    title='Sources for attendees',
    plot_width=800,
    plot_height=800,
)
heared_sources.legend.location="top_right"

In [11]:
display(HTML('<h1>Audience statistics</h1>'))
graphs = gridplot([
        [audience_per_event, None,],
    ])
show(graphs)
display(
    HTML(
        """
<table>
  <tr>
    <td>
      <h1>Event Statistics</h1>
    </td>
    <td>
      <h1>Meetup</h1>
    </td>
    <td>
      <h1>PyDay</h1>
    </td>
  </tr>
  <tr>
    <td>
      <h2>Audience</h2>
    </td>
    <td>
      <h2>{} % </h2>
    </td>
    <td>
      <h2>{} %</h2>
    </td>
  </tr>
  <tr>
    <td>
      <h2>Rating</h2>
    </td>
    <td>
      <h2>{} out of 5</h2>
    </td>
    <td>
      <h2>{} out of 5</h2>
    </td>
</table>
""".format(
            meetup_percentage,
            pyday_percentage,
            meetup_rating,
            pyday_rating
        )
    )
)

0,1,2
Event Statistics,Meetup,PyDay
Audience,63.64 %,80.3 %
Rating,4.33 out of 5,4.47 out of 5


In [12]:
display(HTML('<h1>Audience Evaluation</h1>'))
graphs = gridplot([
        [event_evaluation, length_evaluation, depth_evaluation,],
        [venue_evaluation, food_evaluation, web_evaluation],
    ])
show(graphs)

In [13]:
display(HTML('<h1>Audience Evaluation II</h1>'))
graphs = gridplot([
        [talk_evaluation, beginners_evaluation, initiated_evaluation,],
    ])
show(graphs)

In [43]:
display(HTML('<h1>Audience Sources</h1>'))
graphs = gridplot([
        [heared_sources, None,],
    ])
show(graphs)

In [14]:
for index in attendees_data.subjects.dropna().index:
    print("---------------------------------------------\n{}".format(attendees_data.subjects[index]))

---------------------------------------------
Biopython
---------------------------------------------
More events
---------------------------------------------
Python on embedded devices (RPi, Arduino & similar)
---------------------------------------------
professional experiences with python
---------------------------------------------
We should have instructions to prepare the workshop before it happens not to lose time downloading and installing stuffs.
---------------------------------------------
use of odoo openERP
---------------------------------------------
more event like this 
---------------------------------------------
Natural Language Processing
Chatbots
---------------------------------------------
initiation workshop about scipy, opencv
security
---------------------------------------------
python and graphics/gaming (pyopengl/pygame/cocos/kiwi...)
---------------------------------------------
Sessions between "beginners" and "advanced"
------------------------------

In [15]:
for index in attendees_data.speakers.dropna().index:
    print("---------------------------------------------\n{}".format(attendees_data.speakers[index]))

---------------------------------------------
Skyscanner
---------------------------------------------
Mathieu Leplatre
---------------------------------------------
odoo
---------------------------------------------
I missed other fields like humanities or social sciences 
---------------------------------------------
Nuriaaaaaaaa !!
:-P
the guys from Skyscanner again please.
---------------------------------------------
Right now, not especially. But I would contact you if I have a proposition to do.
---------------------------------------------




In [16]:
for index in attendees_data.comment.dropna().index:
    print("---------------------------------------------\n{}".format(attendees_data.comment[index]))

---------------------------------------------
For complicated setups, could be nice to provide a checklist prior to the talk, so half of it is not spent on getting the setup. Also load tests could be done by the speakers. Wifi could be better.
---------------------------------------------
Thanks for organizing this event!! Looking forward to the next one :)
---------------------------------------------
Python Programming Introduction Course was an Introduction to Programming. Teacher was fine but for people without any programming experience.
---------------------------------------------
Thank you very much for the organization!! Really amazing!   
---------------------------------------------
Better wifi next time please :)
---------------------------------------------
Thanks a lot for preparing all meetups!
---------------------------------------------
a microphone would be great in some case
please consider a local server with alocal repository for the workshop. That would be awesom