# Preparating the graphs with Plotly

The following code presents two different ways of plotting the results (measured blood glucose values vs. predicted blood glucose values).

1. Approach: Scatter plot with *opacity*
- Advantages: Easy to implement, low opacity makes the dots distinguishable
- Disadvantages: Low opacity makes dots that are distanced from the center hard to see

2. Approach: Representation via *"heat-map"* approach
- Advantages: Intuitive understanding, color scale can be adjusted as wished, e.g. having the scale start at a non-white value so that every single dot can be seen and then gradually changing to other colors, smooth contours look fancy
- Disadvantages:

In [None]:
import plotly.express as px
import pandas as pd  
import numpy as np
import plotly.graph_objects as go
import os
from plotly.offline import iplot
from plotly.subplots import make_subplots

In [None]:
# define colors
SPIRITED_STANFORD_IDENT_COL = '#E04F39' # stanford identity color "Spirited"
ILLUMINATING_STANFORD_IDENT_COL = '#FEC51D' # stanford identity color "Illuminating"
SKY_STANFORD_IDENT_COL = '#4298B5' # stanford identity color "Sky"

# define ranges of both axes
X_AXIS_RANGE = [0,450]
Y_AXIS_RANGE = [0,450]

In [None]:
# load example dataset
DATA_DIR = 'Example_Dataset'
filename = 'kc_house_data.csv'

df = pd.read_csv(
    filepath_or_buffer = os.path.join("..", "..", DATA_DIR, filename)
)
print(df)

# scale x and y values
x_unscaled = df.sqft_living15
y_unscaled = np.log(df.price)
x_shifted = (120 - 40) / (5000 - 1000) * (x_unscaled) + (5000*40 - 1000*120) / (5000 - 1000)
x_scaled = x_shifted * 1.5
y_scaled = (140 - 40) / (14 - 12) * y_unscaled + (14 * 40 - 12 * 140) / (14 - 12)

In [None]:
# load the real dataset

filename = 'base_1_leander.csv'

# load the CSV file that contains the dataset
df_raw = pd.read_csv(
    filepath_or_buffer = os.path.join("..", filename)
)
print(np.shape(df_raw))
df_filtered = df_raw[(df_raw['split'] == "test") & (df_raw['M'] == 10)]
print(df_filtered)

# extract the predictions from the dataset
y_hat = df_filtered['y_hat'].values
# extract the real measurements from the dataset
y = df_filtered['y'].values # the .values extension allows to index the variable


In [None]:
# insert linear regression line
def get_lin_reg(y, y_hat):
    
    m, b = np.polyfit(y, y_hat, 1)
    lin_reg = (y * m + b)

    lin_reg_line = go.Scattergl(
        x = y,
        y = lin_reg,
        name = 'Line of best fit',
        line = dict(
            color = ILLUMINATING_STANFORD_IDENT_COL,
            width = 3,
        )
    )
    return lin_reg_line

In [None]:
# insert the reference line (optimal prediction)
def get_ref_line():
    ref_line = go.Scattergl(
        x = np.linspace(start = X_AXIS_RANGE[0], stop = X_AXIS_RANGE[1], num = 2),
        y = np.linspace(start = X_AXIS_RANGE[0], stop = X_AXIS_RANGE[1], num = 2),
        name = 'Optimal prediction',
        mode='lines',
        line = dict(
            color = SPIRITED_STANFORD_IDENT_COL, 
            width = 3,
        )
    )
    return ref_line

In [None]:
# generate plot version 1

trace1 = go.Scattergl(
    x= y,
    y= y_hat,
    name = 'Data points',
    mode='markers',
    marker=dict(
        opacity=0.1, 
        color = SKY_STANFORD_IDENT_COL,
        size = 5,
    ),
    #showlegend=True
)

lin_reg_line = get_lin_reg(y, y_hat)
ref_line = get_ref_line()
data=[trace1, lin_reg_line, ref_line]

layout = go.Layout(
    title={
        'text': "Measurements vs. Prediction",
        'x': 0.4,
        'xanchor': 'center',
    },
    font = dict(
        family = "Arial",
        size = 20,
        color = "black",
    ),
    xaxis=dict(
        title='True Blood Glucose Value',
        showgrid = False,
        gridcolor = "#DAD7CB",
        gridwidth = 1,
        #equal_aspect_ratio=True,
    ),
    yaxis=dict(
        title='Prediction of Blood Glucose Value',
        showgrid = False,
        gridcolor = "#DAD7CB",
        gridwidth = 1,
        #equal_aspect_ratio=True,
    ),
    height = 600,
    width = 600,
    legend = dict(
        yanchor = "bottom",
        y = 0.1,
        xanchor = "left",
        x = 0.9,
    ),
    hovermode='closest',
    paper_bgcolor = 'rgba(0, 0, 0, 0)',
    plot_bgcolor = 'rgba(0, 0, 0, 0)',
    
    
    #showlegend=True
)

figure = go.Figure(data=data, layout=layout, layout_xaxis_range = X_AXIS_RANGE, layout_yaxis_range = Y_AXIS_RANGE)
#figure.update_layout(showlegend = False)
# figure.update_yaxes(
#     scaleanchor = "x",
#     scaleratio = 1,
# )

iplot(figure)

In [None]:
# generate plot version 2

MAX_GLUCOSE = 400 # can be adjusted, currently cut off at this point
MIN_GLUCOSE = 0

# create the grid

grid_dim = 100
grid = np.zeros((grid_dim, grid_dim))
invalid = 0 # count of data points that were ignored
cell_size = MAX_GLUCOSE / grid_dim
print("Shape of the grid is " , np.shape(grid))


def populate_grid(y, y_hat,invalid):
    # go through all datapoints and assign them a cell in the grid
    for i in range (0, len(y)):
        # make sure only values smaller than MAX_GLUCOSE are taken into consideration
        # otherwise, index will be out of bounds
        if y[i] < MAX_GLUCOSE and y_hat[i] < MAX_GLUCOSE:
            grid_col_index = int(y[i] / cell_size)
            grid_row_index = int(y_hat[i] / cell_size) 
            grid[grid_row_index][grid_col_index] = grid[grid_row_index][grid_col_index] + 1
        else:
            invalid = invalid + 1

    print("All numbers in the cells sum up to: ", int(np.sum(grid)))
    print("Check: Number of data points: ", len(y))
    print("Cell that is populated the most contains ", int(np.amax(grid)), "data points.")  
    print(invalid, " data points were ignored.")
    # now grid is populated with the number of points that are within that grid cell
    return grid


layout = go.Layout(
    title={
        'text': "Measurements vs. Prediction",
        'x': 0.5,
        'xanchor': 'center',
    },
    font = dict(
        family = "Arial",
        size = 20,
        color = "black",
    ),
    xaxis=dict(
        title='True Blood Glucose Value',
        showgrid = False,
        gridcolor = "#DAD7CB",
        gridwidth = 1,
#         autorange = False,
#         range = [0, MAX_GLUCOSE],
#         scaleanchor = 'y',
#         scaleratio = 1,
        #equal_aspect_ratio=True,
    ),
    yaxis=dict(
        title='Prediction of Blood Glucose Value',
        showgrid = False,
        gridcolor = "#DAD7CB",
        gridwidth = 1,
        #equal_aspect_ratio=True,
    ),
    height = 600,
    width = 600,
    legend = dict(
        yanchor = "bottom",
        y = 0.05,
        xanchor = "left",
        x = 0.5,
    ),
    hovermode='closest',
    paper_bgcolor = 'rgba(0, 0, 0, 0)',
    plot_bgcolor = 'rgba(0, 0, 0, 0)',
    
)

grid = populate_grid(y, y_hat,invalid)

fig1 = go.Figure(
    data = go.Heatmap(
        z = grid,
        x = [0,cell_size],
        y = [0,cell_size],
    ),
    layout = layout,
)

fig2 = go.Figure(
    data = go.Heatmap(
        z = grid,
        x = [0,cell_size],
        y = [0,cell_size],
        zsmooth = 'best',
    ),
    layout = layout,
)

fig3 = go.Figure(
    data = go.Heatmap(
        z = grid,
        x = [0,cell_size],
        y = [0,cell_size],
        zsmooth = 'best',
        colorscale = 'Rainbow' # can be one of 'Blackbody,Bluered,Blues,C ividis,Earth,Electric,Greens,Greys,Hot,Jet,Picnic,Portl and,Rainbow,RdBu,Reds,Viridis,YlGnBu,YlOrRd'
    ),
    layout = layout,
)

heatTrace = go.Heatmap(
        z = grid,
        x = [0,cell_size],
        y = [0,cell_size],
        #zsmooth = 'best',
        #colorscale = 'Rainbow' # can be one of 'Blackbody,Bluered,Blues,C ividis,Earth,Electric,Greens,Greys,Hot,Jet,Picnic,Portl and,Rainbow,RdBu,Reds,Viridis,YlGnBu,YlOrRd'
#         colorscale = 'Reds'
        colorscale=[
            
            [0, 'rgb(255,255,255)'],  # White
            [0.001, 'rgb(230,249,255)'],
            [0.2, 'rgb(66,152,181)'],  # SKY_STANFORD_IDENT_COL
            [0.5, 'rgb(0, 0, 153)'], # 
            [1, 'rgb(150, 0, 89)'],
        ],
)

heatFigM10 = go.Figure(
    data = [heatTrace, ref_line],
    layout = layout,
    layout_xaxis_range = [MIN_GLUCOSE, MAX_GLUCOSE],
    layout_yaxis_range = [MIN_GLUCOSE, MAX_GLUCOSE],
)

heatFigM100 = go.Figure(
    data = [heatTrace, ref_line],
    layout = layout,
    layout_xaxis_range = [MIN_GLUCOSE, MAX_GLUCOSE],
    layout_yaxis_range = [MIN_GLUCOSE, MAX_GLUCOSE],
)

subplot = make_subplots(
    rows=1, cols=2,
#     specs=[[{}, {}],
#            [{}, {}],
#            [{'colspan': 2}, {}]],
    horizontal_spacing = 0.2,
    subplot_titles=('Subplot title1',
                    'Subplot title2'),
)

for t in fig.data:
    subplot.append_trace(t, row=1, col=1)
for t in fig.data:
    subplot.append_trace(t, row=1, col=2)
    
subplot.update_layout(
    title={
        'text': "Measurements vs. Prediction",
        'x': 0.5,
        'xanchor': 'center',
    },
    font = dict(
        family = "Arial",
        size = 20,
        color = "black",
    ),
    xaxis=dict(
        title='True Blood Glucose Value',
        showgrid = False,
        gridcolor = "#DAD7CB",
        gridwidth = 1,
#         autorange = False,
#         range = [0, MAX_GLUCOSE],
#         scaleanchor = 'y',
#         scaleratio = 1,
        #equal_aspect_ratio=True,
    ),
    yaxis=dict(
        title='Prediction of Blood Glucose Value',
        showgrid = False,
        gridcolor = "#DAD7CB",
        gridwidth = 1,
        #equal_aspect_ratio=True,
    ),
    height = 600,
    width = 900,
    legend = dict(
        yanchor = "bottom",
        y = 0.05,
        xanchor = "left",
        x = 0.5,
    ),
    showlegend = False,
    hovermode='closest',
    paper_bgcolor = 'rgba(0, 0, 0, 0)',
    plot_bgcolor = 'rgba(0, 0, 0, 0)',
)

# edit axis labels
subplot['layout']['xaxis']['title']='True Blood Glucose Value'
subplot['layout']['xaxis2']['title']='True Blood Glucose Value'
subplot['layout']['yaxis']['title']='Prediction of Blood Glucose Value'
subplot['layout']['yaxis2']['title']='Prediction of Blood Glucose Value'
    
iplot(fig1)
iplot(fig2)
iplot(fig3)
iplot(fig)
iplot(subplot)



# Import the data for the loss function plot


The following cells are all copied (original in "loss_function.ipynb") to work with the final plot that resulted from that notebook.

In [None]:
y = np.linspace(0,400,500)
y_hat = np.linspace(0,400,500)

Y , Y_hat = np.meshgrid(y, y_hat)

In [None]:
def xi(x, a, epsilon):
    return 2/epsilon * (x-a-2/epsilon)

def xi_corrected(x, a, epsilon):
    return 2/epsilon * (x-a-epsilon/2)

def sigmoid(x, a, epsilon):
    XI = xi(x, a, epsilon)
    #print(XI)
    return np.where(
        x<=a,
        0,
        np.where(
            x <= a + epsilon/2,
            -1/2 *  (XI**4) - (XI**3) + XI + 1/2,
            np.where(
                x <= a + epsilon,
                1/2 * XI**4 - XI**3 + XI + 1/2,
                1
            )
        )
    )


# corrected
def sigmoid_corrected(x, a, epsilon):
    XI = xi_corrected(x, a, epsilon)
    #print(XI)
    return np.where(
        x<=a,
        0,
        np.where(
            x <= a + epsilon/2,
            -1/2 *  (XI**4) - (XI**3) + XI + 1/2,
            np.where(
                x <= a + epsilon,
                1/2 * XI**4 - XI**3 + XI + 1/2,
                1
            )
        )
    )

# b = np.linspace(0,500,500)
# fig = px.scatter(x=b, y= sigmoid(b, 155, 100), title='original')
# fig.show()
# fig = px.scatter(x=b, y=sigmoid_corrected(b, 155, 100), title='corrected')
# fig.show()



In [None]:
def xi_neg(x, a, epsilon):
    return -2/epsilon * (x-a+2/epsilon)

def xi_neg_corrected(x, a, epsilon):
    return -2/epsilon * (x-a+epsilon/2)

def sigmoid_neg(x, a, epsilon):
    XI = xi_neg(x, a, epsilon)
    #print(XI)
    return np.where(
        x<=a-epsilon,
        1,
        np.where(
            x <= a - epsilon/2,
            1/2 *  (XI**4) - (XI**3) + XI + 1/2,
            np.where(
                x <= a ,
                -1/2 * XI**4 - XI**3 + XI + 1/2,
                0
            )
        )
    )


# corrected
def sigmoid_neg_corrected(x, a, epsilon):
    XI = xi_neg_corrected(x, a, epsilon)
    #print(XI)
    return np.where(
        x<=a-epsilon,
        1,
        np.where(
            x <= a - epsilon/2,
            1/2 *  (XI**4) - (XI**3) + XI + 1/2,
            np.where(
                x <= a ,
                -1/2 * XI**4 - XI**3 + XI + 1/2,
                0
            )
        )
    )

# b = np.linspace(0,500,1000)
# fig = px.scatter(x=b, y= sigmoid_neg(b, 85, 30), title='original')
# fig.show()
# fig = px.scatter(x=b, y=sigmoid_neg_corrected(b, 85, 30), title='corrected')
# fig.show()



# Putting the loss function together

In [None]:
alpha_L, alpha_H, beta_L, beta_H, gamma_L, gamma_H, t_L, t_H = 1.5, 1, 30, 100, 10, 20, 85, 155

def Pen(g, g_hat, alpha_L = alpha_L, alpha_H = alpha_H, beta_L = beta_L, beta_H = beta_H, gamma_L = gamma_L, gamma_H = gamma_H, t_L = t_L, t_H = t_H):
    return 1 + alpha_L * sigmoid_neg_corrected(g, t_L, beta_L) * sigmoid_corrected(g_hat, g, gamma_L) + alpha_H * sigmoid_corrected(g, t_H, beta_H) * sigmoid_neg_corrected(g_hat, g, gamma_H)

def gSE(g, g_hat, alpha_L = alpha_L, alpha_H = alpha_H, beta_L = beta_L, beta_H = beta_H, gamma_L = gamma_L, gamma_H = gamma_H, t_L = t_L, t_H = t_H): 

    return np.square(g - g_hat) * Pen(g, g_hat, alpha_L, alpha_H, beta_L, beta_H, gamma_L, gamma_H, t_L, t_H)
    
def gMSE(g, g_hat, alpha_L = alpha_L, alpha_H = alpha_H, beta_L = beta_L, beta_H = beta_H, gamma_L = gamma_L, gamma_H = gamma_H, t_L = t_L, t_H = t_H):
    np.mean(gSE(g, g_hat, alpha_L, alpha_H, beta_L, beta_H, gamma_L, gamma_H, t_L, t_H))

In [None]:
#px.imshow([Y.flatten(), Y_hat.flatten(), gSE(Y, Y_hat).flatten()])
#
#gSE(Y, Y_hat).flatten()
matrix = np.empty((400, 400))
for g in range (0,400): 
    for g_hat in range(0,400): 
        matrix[g_hat, g] = Pen(g, g_hat)
    
# matrix

# px.imshow(matrix)


Translate px.scatter code into go.Scattergl code so that several plots can be combined

In [None]:
# Define custom color scale
# Blackbody,Bluered,Blues,C ividis,Earth,Electric,Greens,Greys,Hot,Jet,Picnic,Portl and,Rainbow,RdBu,Reds,Viridis,YlGnBu,YlOrRd'
color_scale = 'RdYlGn_r'

loss_trace = go.Scattergl(
    x=Y.flatten(),
    y=Y_hat.flatten(),
    mode='markers',
    marker=dict(
        color=Pen(Y, Y_hat).flatten(),
        colorscale=color_scale,
#         colorbar=dict(
#             title='Custom Colorbar',
#         )
        #opacity = 0.005,
    ),
    showlegend = False,

)

layout = go.Layout(
    xaxis=dict(
        title='y',
    ),
    yaxis=dict(
        title='y_hat',
    ),
    height = 400,
    width = 400,
    paper_bgcolor = 'rgba(0, 0, 0, 0)',
    plot_bgcolor = 'rgba(0, 0, 0, 0)',
)
    
fig = go.Figure(
    data = [loss_trace],
    layout = layout,
)

iplot(fig)  




# Now combine both graphs

In [None]:
# insert the reference line (optimal prediction)

optimal_pred = go.Scattergl(
    x = np.linspace(start = 0, stop = 400, num = 2),
    y = np.linspace(start = 0, stop = 400, num = 2),
    name = 'Optimal prediction',
    mode='lines',
    line = dict(
        color = SPIRITED_STANFORD_IDENT_COL, 
        width = 3,
    )
)

# create a new layout
layout = go.Layout(
    title={
        'text': "Data points overlay",
        'x': 0.4,
        'xanchor': 'center',
    },
    font = dict(
        family = "Arial",
        size = 20,
        color = "black",
    ),
    xaxis=dict(
        title='y',
    ),
    yaxis=dict(
        title='y_hat',
    ),
    height = 500,
    width = 700,
    hovermode='closest',
    paper_bgcolor = 'rgba(0, 0, 0, 0)',
    plot_bgcolor = 'rgba(0, 0, 0, 0)',
)

# combine all plots together
combined_fig = go.Figure(
    data = [loss_trace, trace1, optimal_pred],
    layout = layout,
)

iplot(combined_fig)