In [20]:
import pandas as pd
from bokeh.layouts import row
from bokeh.models import ColumnDataSource, HoverTool, Select
from bokeh.plotting import figure, curdoc, show, output_notebook
# output_notebook()

# This exercise will be graded using the following Python and library versions:
###############
# Python 3.8
# Bokeh 2.2.1
# Pandas 1.1.2
###############

# define your callback function of the Select widget here. Only do this once you've followed the rest of the
# instructions below and you actually reach the part where you have to add and configure the Select widget.
# the general idea is to set the data attribute of the plots ColumnDataSource to the data entries of the different
# ColumnDataSources you construct during the data processing. This data change should then automatically be displayed
# in the plot. Take care that the bar-labels on the y axis also reflect this change.


In [21]:
# read data from .csv file
df = pd.read_csv('AZA_MLE_Jul2018_utf8.csv', encoding='utf-8')
# construct list of indizes to remove unnecessary columns
cols = [1, 3]
cols.extend([i for i in range(7, 15)])
df.drop(df.columns[cols], axis=1, inplace=True)

In [22]:
df

Unnamed: 0,Species Common Name,TaxonClass,Overall MLE,Overall CI - lower,Overall CI - upper,Male Data Deficient,Female Data Deficient
0,Addax,Mammalia,13.4,12.4,14.5,,
1,"Agouti, Brazilian",Mammalia,8.1,7.2,9.3,,
2,"Alligator, Chinese",Reptilia,30.9,25.9,34.3,yes,yes
3,"Anoa, Lowland",Mammalia,17.7,15.3,21.2,yes,yes
4,"Anteater, Giant",Mammalia,19.7,17.1,21.5,yes,yes
5,"Antelope, Roan",Mammalia,12.5,11.5,13.2,yes,
6,"Antelope, Sable",Mammalia,11.3,10.5,11.9,,
7,"Aracari, Green",Aves,7.9,6.3,9.3,,
8,"Argus, Great",Aves,10.8,9.4,12.5,,
9,"Armadillo, Southern Three-banded",Mammalia,17.6,15.0,18.8,yes,


In [23]:
# task 1

# rename the columns of the data frame according to the following mapping:
# 'Species Common Name': 'species'
# 'TaxonClass': 'taxon_class'
# 'Overall CI - lower': 'ci_lower'
# 'Overall CI - upper': 'ci_upper'
# 'Overall MLE': 'mle'
# 'Male Data Deficient': 'male_deficient'
# 'Female Data Deficient': 'female_deficient'

df.rename(columns={'Species Common Name': 'species',
                    'TaxonClass': 'taxon_class',
                    'Overall CI - lower': 'ci_lower',
                    'Overall CI - upper': 'ci_upper',
                    'Overall MLE': 'mle',
                    'Male Data Deficient': 'male_deficient',
                    'Female Data Deficient': 'female_deficient'}, 
                     inplace=True)

In [24]:
df

Unnamed: 0,species,taxon_class,mle,ci_lower,ci_upper,male_deficient,female_deficient
0,Addax,Mammalia,13.4,12.4,14.5,,
1,"Agouti, Brazilian",Mammalia,8.1,7.2,9.3,,
2,"Alligator, Chinese",Reptilia,30.9,25.9,34.3,yes,yes
3,"Anoa, Lowland",Mammalia,17.7,15.3,21.2,yes,yes
4,"Anteater, Giant",Mammalia,19.7,17.1,21.5,yes,yes
5,"Antelope, Roan",Mammalia,12.5,11.5,13.2,yes,
6,"Antelope, Sable",Mammalia,11.3,10.5,11.9,,
7,"Aracari, Green",Aves,7.9,6.3,9.3,,
8,"Argus, Great",Aves,10.8,9.4,12.5,,
9,"Armadillo, Southern Three-banded",Mammalia,17.6,15.0,18.8,yes,


In [25]:
# Remove outliers, split the dataframe by taxon_class and and construct a ColumnDataSource from the clean DataFrames
# hints:
# we only use the following three taxon classes: 'Mammalia', 'Aves', 'Reptilia'
# use dataframe.loc to access subsets of the original dataframe and to remove the outliers
# each time you sort the dataframe reset its index
# outliers are entries which have male and/or female data deficient set to yes
# reference dataframe: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html
# reference columndatasource: https://bokeh.pydata.org/en/latest/docs/reference/models/sources.html

# construct three independent dataframes based on the aforementioned taxon classes and remove the outliers

# sort the dataframes by 'mle' in descending order and then reset the index

# reduce each dataframe to contain only the 10 species with the highest 'mle'

# sort the dataframe in the correct order to display it in the plot and reset the index again.
# hint: the index decides the y location of the bars in the plot. You might have to modify it to have a visually
# appealing barchart

# There's an entry in the aves dataframe with a species named 'Penguin, Northern & Southern Rockhopper (combined)'.
# Rename that species to 'Penguin, Rockhopper'

# construct a ColumDataSource for each of the dataframes

# construct a fourth ColumnDataSource that is used as input for the plot and set its data to the Mammalian
# ColumnDataSource as initial value. This fourth ColmunDataSource is required to later be able to change the data
# interactively with the dropdown menu.



In [26]:
#I filtered the outliers here
df=df[(df["male_deficient"]!="yes") & (df["female_deficient"]!="yes")] 
df=df.drop(columns=["male_deficient","female_deficient"])

In [27]:
#I split the dataframe into three according to taxon class
df_mammal=df[df["taxon_class"]=="Mammalia"]
df_aves=df[df["taxon_class"]=="Aves"]
df_reptile=df[df["taxon_class"]=="Reptilia"]

In [28]:
#Here I sort the mammal by the 10 highest mle and reset the index
df_mammal=df_mammal.sort_values(by="mle",ascending=False)
df_mammal=df_mammal.reset_index(drop=True)
df_mammal=df_mammal.nlargest(10, "mle")
df_mammal=df_mammal.sort_values(by="mle",ascending=True)
df_mammal=df_mammal.reset_index(drop=True)
df_mammal

Unnamed: 0,species,taxon_class,mle,ci_lower,ci_upper
0,"Bear, Andean Spectacled",Mammalia,26.1,23.8,27.7
1,"Macaque, Lion-tailed",Mammalia,26.6,25.4,27.8
2,Siamang,Mammalia,27.2,23.8,29.4
3,"Orangutan, Sumatran",Mammalia,28.7,25.6,32.6
4,"Rhinoceros, Southern White",Mammalia,31.4,29.1,33.0
5,"Gorilla, Western Lowland",Mammalia,35.3,33.3,37.1
6,"Hippopotamus, River",Mammalia,35.5,32.5,39.5
7,"Elephant, African",Mammalia,36.3,32.2,38.7
8,Chimpanzee,Mammalia,36.3,32.7,39.4
9,"Elephant, Asian",Mammalia,42.0,37.5,45.7


In [29]:
#Here I sort the aves by the 10 highest mle and reset the index
df_aves=df_aves.sort_values(by="mle",ascending=False)
df_aves=df_aves.reset_index(drop=True)
df_aves=df_aves.nlargest(10, "mle")
df_aves=df_aves.sort_values(by="mle",ascending=True)
df_aves=df_aves.reset_index(drop=True)

#Here I rename the 4th entry species to 'Penguin, Rockhopper'
df_aves.loc[5, "species"]='Penguin, Rockhopper'
df_aves

Unnamed: 0,species,taxon_class,mle,ci_lower,ci_upper
0,"Penguin, Adelie",Aves,17.3,16.3,19.2
1,"Penguin, African",Aves,17.4,16.3,18.8
2,"Seriema, Red-legged",Aves,17.9,13.6,19.5
3,"Frogmouth, Tawny",Aves,18.1,15.8,19.7
4,"Macaw, Hyacinth",Aves,24.3,22.2,25.8
5,"Penguin, Rockhopper",Aves,24.9,22.5,27.15
6,"Penguin, King",Aves,25.4,23.7,26.3
7,"Flamingo, Caribbean",Aves,25.7,24.8,26.9
8,"Penguin, Macaroni",Aves,26.3,25.6,28.5
9,"Flamingo, Chilean",Aves,27.8,26.3,29.6


In [30]:
#Here I sort the reptile by the 10 highest mle and reset the index
df_reptile=df_reptile.sort_values(by="mle",ascending=False)
df_reptile=df_reptile.reset_index(drop=True)
df_reptile=df_reptile.nlargest(10, "mle")
df_reptile=df_reptile.sort_values(by="mle",ascending=True)
df_reptile=df_reptile.reset_index(drop=True)
df_reptile

Unnamed: 0,species,taxon_class,mle,ci_lower,ci_upper
0,"Bushmaster, South American",Reptilia,7.8,6.4,9.1
1,"Rattlesnake, Mexican Lance-headed",Reptilia,8.4,7.2,9.0
2,"Rattlesnake, Eastern Massasauga",Reptilia,8.7,6.8,9.5
3,"Iguana, Fiji Island Banded",Reptilia,9.7,8.5,11.6
4,"Snake, Eastern Indigo",Reptilia,11.4,10.1,12.1
5,"Lizard, Chinese Crocodile",Reptilia,12.2,10.4,13.4
6,"Tortoise, Egyptian",Reptilia,12.5,10.8,14.7
7,"Turtle, Wood",Reptilia,13.1,10.3,17.2
8,"Boa, Jamaican",Reptilia,17.4,15.7,19.0
9,"Lizard, Rio Fuerte Beaded",Reptilia,24.2,22.0,25.4


In [31]:
#I constructed a ColumDataSource for each of the dataframes
mammal_source=ColumnDataSource(df_mammal)
aves_source = ColumnDataSource(data=df_aves)
reptile_source= ColumnDataSource(data=df_reptile)
init_source = ColumnDataSource(data=df_mammal)

In [32]:
mammal_source.data

{'index': array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int64),
 'species': array(['Bear, Andean Spectacled', 'Macaque, Lion-tailed', 'Siamang',
        'Orangutan, Sumatran', 'Rhinoceros, Southern White',
        'Gorilla, Western Lowland', 'Hippopotamus, River',
        'Elephant, African', 'Chimpanzee', 'Elephant, Asian'], dtype=object),
 'taxon_class': array(['Mammalia', 'Mammalia', 'Mammalia', 'Mammalia', 'Mammalia',
        'Mammalia', 'Mammalia', 'Mammalia', 'Mammalia', 'Mammalia'],
       dtype=object),
 'mle': array([26.1, 26.6, 27.2, 28.7, 31.4, 35.3, 35.5, 36.3, 36.3, 42. ]),
 'ci_lower': array([23.8, 25.4, 23.8, 25.6, 29.1, 33.3, 32.5, 32.2, 32.7, 37.5]),
 'ci_upper': array([27.7, 27.8, 29.4, 32.6, 33. , 37.1, 39.5, 38.7, 39.4, 45.7])}

In [33]:
aves_source.data

{'index': array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int64),
 'species': array(['Penguin, Adelie', 'Penguin, African', 'Seriema, Red-legged',
        'Frogmouth, Tawny', 'Macaw, Hyacinth', 'Penguin, Rockhopper',
        'Penguin, King', 'Flamingo, Caribbean', 'Penguin, Macaroni',
        'Flamingo, Chilean'], dtype=object),
 'taxon_class': array(['Aves', 'Aves', 'Aves', 'Aves', 'Aves', 'Aves', 'Aves', 'Aves',
        'Aves', 'Aves'], dtype=object),
 'mle': array([17.3, 17.4, 17.9, 18.1, 24.3, 24.9, 25.4, 25.7, 26.3, 27.8]),
 'ci_lower': array([16.3, 16.3, 13.6, 15.8, 22.2, 22.5, 23.7, 24.8, 25.6, 26.3]),
 'ci_upper': array([19.2 , 18.8 , 19.5 , 19.7 , 25.8 , 27.15, 26.3 , 26.9 , 28.5 ,
        29.6 ])}

In [34]:
reptile_source.data

{'index': array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int64),
 'species': array(['Bushmaster, South American', 'Rattlesnake, Mexican Lance-headed',
        'Rattlesnake, Eastern Massasauga', 'Iguana, Fiji Island Banded',
        'Snake, Eastern Indigo', 'Lizard, Chinese Crocodile',
        'Tortoise, Egyptian', 'Turtle, Wood', 'Boa, Jamaican',
        'Lizard, Rio Fuerte Beaded'], dtype=object),
 'taxon_class': array(['Reptilia', 'Reptilia', 'Reptilia', 'Reptilia', 'Reptilia',
        'Reptilia', 'Reptilia', 'Reptilia', 'Reptilia', 'Reptilia'],
       dtype=object),
 'mle': array([ 7.8,  8.4,  8.7,  9.7, 11.4, 12.2, 12.5, 13.1, 17.4, 24.2]),
 'ci_lower': array([ 6.4,  7.2,  6.8,  8.5, 10.1, 10.4, 10.8, 10.3, 15.7, 22. ]),
 'ci_upper': array([ 9.1,  9. ,  9.5, 11.6, 12.1, 13.4, 14.7, 17.2, 19. , 25.4])}

In [35]:
# task 2:

# configure mouse hover tool
# reference: https://bokeh.pydata.org/en/latest/docs/user_guide/categorical.html#hover-tools
# your tooltip should contain the data of 'ci_lower' and 'ci_upper' named 'low' and 'high' in the visualization

# construct a figure with the correct title, axis labels, x and y range, add the hover tool and disable the toolbar

# add the horizontal bar chart to the figure and configure it correctly
# the lower limit of the bar should be ci_lower and the upper limit ci_upper

# add a Select tool (dropdown selection) and configure its 'on_change' callback. Define the callback function in the
# beginning of the document and write it such that the user can choose which taxon_class is visualized in the plot.
# the default visualization at startup should be 'Mammalia'

# use curdoc to add your plot and selection widget such that you can start a bokeh server and an interactive plotting
# session.
# you should be able to start a plotting session executing one of the following commands in a terminal:
# (if you're using a virtual environment you first have to activate it before using these commands. You have to be in
# the same folder as your dva_hs20_ex1_skeleton.py file.)
# Interactive session: bokeh serve --show dva_hs20_ex1_skeleton.py
# If the above doesn't work use the following: python -m bokeh serve --show dva_hs20_ex1_skeleton.py
# For interactive debugging sessions you can use one of the two commands below. As long as you don't close your last
# browser tab you can save your changes in the python file and the bokeh server will automatically reload your file,
# reflecting the changes you just made. Be aware that after changes leading to errors you usually have to restart
# the bokeh server by interrupting it in your terminal and executing the command again.
# bokeh serve --dev --show dva_hs20_ex1_skeleton.py
# python -m bokeh serve --dev --show dva_hs20_ex1_skeleton.py

In [37]:
def bkapp(doc):
    
    #Here, y_ranges and x_ranges are very important, if not specified the plot will be empty
    p = figure(title="Medium Life Expectancy of Animals in Zoos", 
                x_axis_label="Medium Life Expectancy (Years)", 
                y_axis_label="Species",
                y_range=init_source.data["species"].tolist(),
                x_range=[0,50],
                plot_width=825, 
                plot_height=500,
                toolbar_location=None)
    
#     print(source.data)
    # Finally we add a line glyph to represent our data.
    # The data can given, by referencing the column in a ColumnDataSource.
    p.hbar(y="species", left="ci_lower", right="ci_upper", height=0.6, source=init_source)
    
    hover = HoverTool()
    hover.tooltips=[
    ('low', '@ci_lower'),
    ('high', '@ci_upper')]

    p.add_tools(hover)
    
    def update(attr, old, new):
        # Instead of using the new parameter, it is often better to 
        # fetch all current settings directly, especially when having multiple buttons/sliders etc.
        current_taxon = dropdown.value

        if current_taxon=="Reptilia":
            new_source=reptile_source
        elif current_taxon=="Aves":
            new_source=aves_source
        else:
            new_source=mammal_source
            
        init_source.data.update(new_source.data)
        
        #this line allows the ylabels to change accordingly, otherwise the plot looks empty
        p.y_range.factors=init_source.data["species"].tolist()

    headers=["Mammalia", "Aves", "Reptilia"]
    
    # We create the dropdown menu with the different taxons
    dropdown = Select(value="Mammalia", options=headers, title="Select Taxon Class", width=100)
    dropdown.on_change('value', update)
    
    #this is similar to layout however easier to put two things in a row using this
    lt=row(p, dropdown)
    
    doc.add_root(lt)

show(bkapp)