In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, MinMaxScaler, MaxAbsScaler, normalize

from bokeh.plotting import figure, show, ColumnDataSource
from bokeh.models import HoverTool, CustomJS, Slider
from bokeh.models.widgets import Div, Paragraph, Select
from bokeh.events import Tap, DoubleTap
from bokeh.layouts import layout
from bokeh.io import output_notebook, push_notebook

from ipywidgets import interact, interactive, SelectMultiple

from IPython.core.display import display, HTML
# Setting full width for this notebook so we can arrange elements next to eachother
display(HTML("<style>.container { width:100% !important; }</style>"))

import preprocessing_data

output_notebook()

In [2]:
df = preprocessing_data.read_and_preprocess_from_csv('data/csv/oving5/1395669706/Partner.csv')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


In [34]:
df = preprocessing_data.read_and_preprocess_from_csv('data/csv/oving5/0141126194/Card.csv')

This file has only been edited once, skipping.


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


In [None]:
df = preprocessing_data.get_df_from_csv('data/csv/oving5/1395669706/Partner.csv')

In [3]:
file_number = 0
div = None

In [4]:
def format_source_code(source_code):
    source_code = source_code.replace("\n", "<br />").replace("\t", "&emsp;")
    return "<p style='font-size: 90%; line-height: 100%'>{}</p>".format(source_code)

In [5]:
def show_source_code(x, y):
    x = round(x)
    y = round(y)
    date_time = pd.to_datetime(x, unit="ms")
    nearest_row = df.iloc[df.index.get_loc(date_time, method='nearest')]
    source_code = nearest_row['SourceCode' + file_number]
    div.text = format_source_code(source_code)
    push_notebook()

In [6]:
def register_event():
    "Build a suitable CustomJS to get the selected x and y values"
    return CustomJS(code="""
        var x = cb_obj['x'];
        var y = cb_obj['y'];
        var sx = cb_obj['sx'];
        var sy = cb_obj['sy'];
        if (IPython.notebook.kernel !== undefined) {
            var kernel = IPython.notebook.kernel;
            cmd = "show_source_code(" + x + ", " + y + ")";
            kernel.execute(cmd, {}, {});
        }
    """)

In [7]:
def show_size_measure(SizeMeasure, columns):
    global file_number, div, df
    
    if SizeMeasure not in columns:
        columns += (SizeMeasure,)
    
    # If no columns are selected, don't do anything
    if not columns:
        return
    
    df_copy = df.copy()
    
    df_copy['Index_formatted'] = df_copy.index.strftime("%d-%m %H:%M:%S")
    
    if len(columns) > 1:
        # Scaling just the columns we want to plot
        scaled = MinMaxScaler().fit_transform(df_copy.loc[:, columns])
    else:
        scaled = df_copy.loc[:, columns].values
    # Create a new DataFrame from the scaled Series, adding a '_scaled' suffix to the column names
    scaled_df = pd.DataFrame(scaled, index=df_copy.index, columns=[col + '_scaled' for col in columns])
    # Lastly, merge the scaled columns into the original df
    df_copy = df_copy.merge(scaled_df, left_index=True, right_index=True)
    
    source = ColumnDataSource(df_copy)
    
    file_number = ''.join(filter(str.isdigit, SizeMeasure))
    if len(file_number) < 1:
        file_number = '1'
    
    line_color = [
        '#2196F3',
        '#FF9800',
        '#F44336',
        '#9C27B0',
        '#8BC34A',
        '#795548',
        '#607D8B'
    ]
    
    hover_text = ''
    for i in range(len(columns)):
        hover_text += '<span style="font-size: 10px; color: {1};">{0}: @{0}</span><br />'.format(columns[i], line_color[i])
    
    hover = HoverTool( tooltips="""
        <div>
            <div>
                <span style="font-size: 12px; color: #01579B;">Timestamp: @Index_formatted</span><br />
                {0}
            </div>
        </div>
        """.format(hover_text)
    )
    
    p = figure(x_axis_type="datetime", plot_width=900, plot_height=400, tools=[hover,"pan","wheel_zoom","box_zoom","reset"],
               title="SizeMeasure and SourceCode")
    
    # Retrieving the first row where time completion == 1:
    first_completed = df_copy[df_copy['Completion' + file_number] == 1.0].iloc[0]
    
    # Drawing a green circle to indicate where the student first achieved completion == 1
    p.circle(first_completed.name, first_completed[SizeMeasure + '_scaled'], line_width=8, line_color="#1B5E20", fill_color='#1B5E20')

    for j in range(len(columns)):
        p.line('index', columns[j] + '_scaled', line_width=2, line_color=line_color[j], source=source)
    
    p.js_on_event(Tap, register_event())
    p.js_on_event(DoubleTap, register_event())
    
    div = Div(text="", width=640)
    
    # Setting the div to max number of empty lines
    div.text = format_source_code('\n' * int(df_copy[SizeMeasure].max()))
    
    plot_layout = layout([
        [p, div],
    ])
    
    # Set the div's text to the source code of the first (valid) index. Does not work!
    # div.text = format_source_code(df.loc[df[sm_selected].first_valid_index()])
    
    show(plot_layout, notebook_handle=True)

In [8]:
sel = SelectMultiple(
    options=list(df.columns),
    value=[],
    rows=10,
    description='Columns:',
    disabled=False
)

In [9]:
interact(show_size_measure,
         SizeMeasure=list(df.filter(regex='^SizeMeasure\d{1,}$').columns),
         columns=sel,
        )

<function __main__.show_size_measure>

In [None]:
plot_layout = layout([
    [p],
    [div]
])

In [None]:
show(plot_layout, notebook_handle=True)

In [None]:
show(p, notebook_handle=True)

In [None]:
show(div, notebook_handle=True)