In [17]:
import typing

import pandas as pd
from bokeh.layouts import layout
from bokeh.models import ColumnDataSource, Select, HoverTool
from bokeh.plotting import figure, curdoc,show

#curdoc().clear()

In [18]:
# Load the datasets
df_sleep = pd.read_csv("mammals_sleep.csv")
df_predation = pd.read_csv("mammals_predation.csv")

# TIPP: Before advancing any further, inspect the datasets, how are they structured, what are the columns?
#       You can do this by using a simple print(df_sleep) or df_sleep.info()
#       Real-world datasets typically contain mistakes, typos, NaNs and other problems we have to deal with
#       Try to spot them early!
#df_sleep
#df_sleep
# (0.1) In both datasets, drop all rows where the species in nan                                (please keep this line)
# TODO Here comes your code
#df_sleep = df_sleep.dropna()
#df_predation = df_predation.dropna()
df_sleep.dropna()
df_predation.dropna()

Unnamed: 0,Species,Predation,Exposure,Danger
0,Africanelephant,3,5,3
1,Africangiantpouchedrat,3,1,3
2,ArcticFox,1,1,1
3,Arcticgroundsquirrel,5,2,3
4,Asianelephant,3,5,4
...,...,...,...,...
57,Treehyrax,3,1,3
58,Treeshrew,3,2,2
59,Vervet,4,3,4
60,Wateropossum,2,1,1


In [19]:
# (0.1) Join the datasets based on the species column to one dataset                            (please keep this line)
# TODO Here comes your code
df_predation.columns = df_predation.columns.str.lower()
df_combined = pd.merge(df_sleep,df_predation,on='species')
#df_combined.dropna()

# (0.1) Remove all species where the body_wt is larger than 1000kg from the combined dataset    (please keep this line)
# TODO Here comes your code
df_combined.drop(df_combined[df_combined['body_wt'] > 1000].index,inplace=True)

# (0.2) Rename all columns such that they do not contain any                                    (please keep this line)
# whitespaces and uppercase letters anymore
# eg. "Peter pan" -> "peter_pan"
# TODO Here comes your code
df_combined.columns = df_combined.columns.str.lower()
df_combined.columns = df_combined.columns.str.replace(" ","_")
df_combined

Unnamed: 0,species,body_wt,brain_wt,non_dreaming,dreaming,total_sleep,life_span,gestation,predation,exposure,danger
0,Muskshrew,0.048,0.33,10.8,2.0,12.8,2.0,30.0,4,1,3
1,Treeshrew,0.104,2.50,13.2,2.6,15.8,2.3,46.0,3,2,2
2,Lessershort-tailedshrew,0.005,0.14,7.7,1.4,9.1,2.6,21.5,5,2,4
3,Wateropossum,3.500,3.90,12.8,6.6,19.4,3.0,14.0,2,1,1
4,Mouse,0.023,0.40,11.9,1.3,13.2,3.2,19.0,4,1,3
...,...,...,...,...,...,...,...,...,...,...,...
61,Arcticgroundsquirrel,0.920,5.70,,,16.5,,25.0,5,2,3
62,,0.750,6.00,,,17.0,,26.0,2,1,1
63,,0.750,6.00,,,17.0,,26.0,2,2,1
64,,123.000,45.00,,,15.0,,24.0,2,1,1


In [26]:
"""
######################################
(1.5 Points) Section 2: Visualization
######################################

In this section, we will implement the actual visualization in Bokeh
The concept is as follows:

1. We have a ColumnDataSource "source" which holds our current data
   (x-coordinates (xs), y-coordinates (ys) and the names of the species (species))
2. We have a Plot which displays the above ColumnDataSource using a circle glyph
3. We have two Dropdown menues (called Select in Bokeh), where the user can select which column is used for the x-axis
   or the y-axis respectively.
4. Whenever the Selects change, we have to update the ColumnDataSource, this is done via a callback function

We begin with implementing a fetch_data function, which takes two Column names from the input table,
and returns a new dataset with the corresponding values as x and y coordinates.
"""

# (0.5 Points) Implement the fetch_data function according to it's docstrings                   (please keep this line)
def fetch_data(x_column_name:str, y_column_name:str):
    """
    (0.3 Points) Given two column names, this function returns a dictionary with
    - (xs): a list of x-coordinates
    - (ys): a list of y-coordinates
    - (species): a list of species names

    (0.2 Points) Ensures, the result does not contain any NaNs
    """
    # TODO Here comes your code
    df_combined_new = df_combined.dropna()

    xs = df_combined_new[x_column_name].tolist()
    ys = df_combined_new[y_column_name].tolist()
    species = df_combined_new.species.tolist()
    for n in xs:
        if n == "NaN":
            raise ValueError
    for m in ys:
        if m == "NaN":
            raise ValueError
    for k in species:
        if k == "NaN":
            raise ValueError
    return dict(xs = xs, ys = ys, species=species)

data = fetch_data("body_wt","brain_wt")
data_1 = fetch_data("predation","dreaming")
#data

In [31]:
# (0.2 Points) Create a ColumnDataSource with the data from fetch_data()                        (please keep this line)
# You can use any columns you want as the initial values
# TODO Here comes your code
source = ColumnDataSource(fetch_data("body_wt","brain_wt"))
#source = ColumnDataSource(data_1)

# (0.2 Points) Create a figure with log axes, set initial axis labels correctly based            (please keep this line)
# on the previous step
p=figure(y_axis_type="log", x_axis_type="log", title='DVA: Mammal data sleeping inspector')
p.yaxis.axis_label = "brain_wt"
p.xaxis.axis_label = "body_wt"
p.sizing_mode = "stretch_both"
p.xgrid.grid_line_color = None

# (0.1 Points) add tooltips with the species names, x and y coordinates                          (please keep this line)
# TODO Here comes your code
plot = p.add_tools(HoverTool(
    tooltips=[("Species","@species"),
        ("x", "$x"),
        ("y", "$y"),
        ]))
# (0.1 Points) Create a circle glyph and bind it to the ColumnDataSource created previously      (please keep this line)
# TODO Here comes your code
p.circle(x = 'xs', y='ys', color={'species': 'color'}, source = source,line_color="#3288bd", fill_color="white", line_width=3)
#show(p)

#select = Select(title="Selection of variables", value ='', options=['data_01','data_02', 'data'])

select_xaxis = Select(title="X-axis variables", options=['body_wt', 'predation'], value='')
select_yaxis = Select(title="Y-axis variables", options=['brain_wt','dreaming'], value='')

def callback(attr, old, new):
    """
    This function is called whenever the current value of the select_xaxis or select_yaxis changes.

    Here, we have to
    - (0.1 Points) update the ColumnDataSource.data with a new dictionary returned from fetch_data
    - (0.1 Points) update the axis labels according to the new columns selected.

    Tipp:   Bokeh callbacks typically have this attr, old, new signature, however, in this case you can ignore
            them and fetch the current value of the two Select menus directly by accessing doing <your_select>.value

    """
    # TODO Here comes your code
    if select_yaxis.value == 'predation':
        new_data = fetch_data('predation','dreaming')
        p.xaxis.axis_label = "predation"
        p.yaxis.axis_label = 'dreaming'
    # Else, update 'y' to new
    elif select_yaxis.value == 'dreaming':
        new_data = fetch_data('predation','exposure')
        p.yaxis.axis_label = 'dreaming'
        p.xaxis.axis_label = "predation"

    source.data = ColumnDataSource(new_data)

# (0.3) Implement two Select Widgets and connect them to the callbacks                          (please keep this line)
#       Remove "species" from the list
# Tipp: If you are unsure on how to do this, have a look at the Callbacks Section and the Select Widget here:
# https://docs.bokeh.org/en/latest/docs/user_guide/interaction/widgets.html

# TODO Here comes your code
select_yaxis.on_change('value',callback)
select_yaxis.on_change('value',callback)
# (0.1 Point) Add everything to the layout                                                      (please keep this line)
lt = layout([select_xaxis,select_yaxis], p)
    # ...
show(lt)
#curdoc().clear()
curdoc().add_root(lt)
curdoc().title = 'dva_ex1'


You are generating standalone HTML/JS output, but trying to use real Python
callbacks (i.e. with on_change or on_event). This combination cannot work.

Only JavaScript callbacks may be used with standalone output. For more
information on JavaScript callbacks with Bokeh, see:

    https://docs.bokeh.org/en/latest/docs/user_guide/interaction/callbacks.html

Alternatively, to use real Python callbacks, a Bokeh server application may
be used. For more information on building and running Bokeh applications, see:

    https://docs.bokeh.org/en/latest/docs/user_guide/server.html



RuntimeError: Models must be owned by only a single document, DataRange1d(id='4226', ...) is already in a doc