In [449]:
import datetime
import numpy as np
import pandas as pd

import plotly.graph_objects as go
from ipywidgets import widgets
import chart_studio.plotly as py


### Overview

There are two goals in this post. 

1. Become more familiar with plotly and put in a simple interactive widget.
2. Create training data for fitting polynomials by sampling from a sin wave with gaussian noise.

### Generating data for sin wave

First we will generate some data to plot the sin wave

x vector (x axis) = 10 points from 0 to 1 

t vector (y axis) = apply sin(2 * pi * x) to the previous generated points

It is important to think the of t vector as a the result of a function broadcasted on the x vector

In [450]:
#Example of broadcasting
x = np.matrix([1,2,3]) # shape (1,3)
t = 2*x                # Broadcast multiplication of 2 on vector x
t

matrix([[2, 4, 6]])

In [451]:
# Create random data with numpy
np.random.seed(1)
N = 10
x = np.linspace(0, 1, N)    # Sample 10 evenly spaced points from 0 to 1
t = np.sin(2*np.pi*x)       # Apply the sin function with 2*pi in it
x,t

(array([0.        , 0.11111111, 0.22222222, 0.33333333, 0.44444444,
        0.55555556, 0.66666667, 0.77777778, 0.88888889, 1.        ]),
 array([ 0.00000000e+00,  6.42787610e-01,  9.84807753e-01,  8.66025404e-01,
         3.42020143e-01, -3.42020143e-01, -8.66025404e-01, -9.84807753e-01,
        -6.42787610e-01, -2.44929360e-16]))

### Plotting sin wave in plotly

Now that we have some data we can plot it!!!

I went to a few different tutorials to find out how to plot what I want.

The first place you should look is here https://plot.ly/python/line-charts/ which gives the basics on making plotly graph objects so we can add cool widgets in later. Basically what we are doing is making a scatter plot of the 10 points then putting it in line mode so it looks smooth. Changing color was a little more difficult but this page https://plot.ly/python/marker-style/ gives good examples

In [452]:
fig = go.Figure()

sinwave = go.Scatter(x=x, y=t,
    mode='lines',
    name='sinwave',
    marker=dict(
        color='green',
        size=20,
        line=dict(color='green',width=2)
            )
        )
    


# Add title info to figure
fig.update_layout(
    title="Sin wave",
    xaxis_title="x",
    yaxis_title="t",
)

fig.add_trace(sinwave)

py.iplot(fig, filename = 'Sin', auto_open=False)

### Sampling points from the Sin wave with Gaussian noise

Now we want to sample points from our sin wave but add some Gaussian/Normal noise to it. This is important because everything you measure in the real world has some noise or error to it.

As we move along the x axis, we want to use the value of t in our sin wave as the mean for the normal distribution function.


This is a great introduction to histograms

https://help.plot.ly/histogram/

You can get more information on the normal distribution here

https://docs.scipy.org/doc/numpy-1.15.0/reference/generated/numpy.random.normal.html

Don't worry if none of this makes sense to you right now hopefully the plots below will give you some good visual intuition

### Example of normal distribution from one value of t

We will start off simple and take one value of t as input for mean of the normal distribution

Lets take a value near the middle of the plot

The standard deviation (sigma) is set to 0.1 

We will draw 1000 points from the gaussian distribution and choose one for our sample

This will be a data point for fitting polynomials in the future and the most important thing we care about in this post.

In [453]:
middle = int((N - 1)/2) # Get index around half of our points
mid_x = x[middle]       # This will return 0.4444
mid_t = t[middle]       # Remember this is x transformed by function sin(2*x*pi)

In [454]:
np.random.seed(1)
G_N = 1000                          # Choose 1000 points for gaussian sample
mu, sigma = mid_t, 0.1              # mean (mid_t) and standard deviation 

G = np.random.normal(mu, sigma, G_N)


random_index = np.random.choice(len(G), 1) # Draws one values randomly G distribution
onesample = G[random_index]   

In [455]:
fig = go.Figure()

t_hist = go.Histogram(x=G,
                      name='t_hist')

fig.update_layout(
    title="histogram of normal distribution using mid t as mean",
    xaxis_title="t",
    yaxis_title="count",
)

sample = go.Scatter(x=onesample, y=[1],
                    mode='markers',
                    name='sample',
                    marker=dict(
                        color='blue',
                        size=30)
                        )



fig.add_trace(t_hist)
fig.add_trace(sample)
py.iplot(fig, filename = 't_hist', auto_open=False)

### Plotting the normal curve over the normal histogram

We will now do something a little more complicated... 

lets calculate the amount in the bins by hand and plot these as a curve over the histogram

Let's wrap this in a function so we can use it again later

This is called the normal curve

In [456]:
def binCounts(G, bins=20):
    # First lets find the bin length. I am choosing 20 bins for no reason
    G_min = np.min(G)               # Smallest value of gauss
    G_max = np.max(G)               # Get max  value of gauss
    G_range = G_max - G_min         # Gaussian range
    bin_length = G_range / bins     # Get length of bins


    # Initialize the first bin
    b = 0
    b_start = G_min
    b_stop  = G_min + bin_length

    bin_counts = []
    bin_starts = []
    while b < bins:                    # Loop over bins
        b_count = 0
        for g in G:                    # Loop over gaussian points
            if b_start <= g < b_stop:  # If our gaussian point is inside a bin 
                b_count += 1           # add to our bin count

        bin_counts.append(b_count)
        bin_starts.append(b_start)
        
        # go to next bin values
        b_start = b_start + bin_length
        b_stop  = b_stop + bin_length
        b += 1
    return bin_counts, bin_starts


In [457]:
bin_counts, bin_starts = binCounts(G)
bin_counts

[4, 6, 15, 22, 41, 62, 102, 121, 153, 133, 122, 102, 54, 36, 18, 6, 0, 1, 1, 0]

### Plotting the normal curve over the histogram of normal distribution

Below you will see the histogram function modified by our calculated bin_length.

As well as the count of our 20 bins as dots on the normal curve. 

Our bin amounts don't match up exactly with the plotly plot but it is enough to get the point across.

The take home is that when we sample from a normal distribution we are more likely to get values near the middle because they have higher counts.

In [458]:
t_hist = go.Histogram(x=G,
                      name='t_hist',
                      xbins=dict(
                        start=G_min,
                        end=G_max,
                        size=bin_length
                    ))

sample = go.Scatter(x=onesample, y=[1],
                    mode='markers',
                    name='sample',
                    marker=dict(
                        color='blue',
                        size=30)
                        )




normal_curve = go.Scatter(x=bin_starts, y=bin_counts,
                    mode='lines+markers',
                    name='normal curve',
                    marker=dict(
                        color='purple',
                        size=20,
                        line=dict(color='red',width=2)
                            )
                        )


fig = go.Figure()
fig.update_layout(
    title="normal histogram using mid t as mean with normal curve",
    xaxis_title="t",
    yaxis_title="count",
)

fig.add_trace(t_hist)
fig.add_trace(normal_curve)
fig.add_trace(sample)

py.iplot(fig, filename = 't_hist_normalcurve', auto_open=False)

### Plot of gaussian noise on middle value of Sin wave

We will now plot the gaussian noise on the sin wave. Essentially the plot above is rotated to the right

In [459]:
def normalizeBinCounts(x, G, bin_counts, norm_number=10):
    # Now we can add in the points for the normal curve that we had above
    # I am dividing each count by the max count and then dividing
    # that by 10 to make it look good on the plot
    bin_counts_normalized = (bin_counts/np.max(bin_counts)) / norm_number

    #Now lets add that to mid_x so we will see the distribution in the correct place on x axis
    x_bin_counts_normalized = (x * np.ones(bins)) + bin_counts_normalized
    
    # Making a vector of x the size of gauss sample. 
    # Just for plotting
    x_vec = x * np.ones(len(G))
    return x_bin_counts_normalized, x_vec


In [460]:
# Now lets pass in the mid x value
mid_x_bin_counts_normalized, mid_x_vector = normalizeBinCounts(mid_x, G, bin_counts)

In [461]:
sinwave = go.Scatter(x=x, y=t,
                    mode='lines',
                    name='sinwave',
                    marker=dict(
                        color='green',
                        size=20,
                        line=dict(color='green',width=2)
                            )
                        )



normal_dist = go.Scatter(x=mid_x_vector, y=G,
                    mode='markers',
                    name='normal dist',
                    opacity=0.50,
                    marker=dict(
                        color='blue',
                        size=2,
                        line=dict(color='red',width=2)
                            )
                        )


# The x value looks a little funky.
# What we are doing is grabbing the first value from mid_x_vector list
# then passing the first value in as a list
sample = go.Scatter(x=[mid_x_vector[0]], y=onesample,
                    mode='markers',
                    name='sample',
                    marker=dict(
                        color='blue',
                        size=30)
                        )




normal_curve = go.Scatter(x=mid_x_bin_counts_normalized, y=bin_starts,
                    mode='lines+markers',
                    name='normal curve (20 bins)',
                    marker=dict(
                        color='purple',
                        size=10,
                        line=dict(color='red',width=2)
                            )
                        )



fig = go.Figure()
fig.update_layout(
    title="Sin wave with normal curve at mid t",
    xaxis_title="x",
    yaxis_title="t",
)

fig.add_trace(sinwave)
fig.add_trace(normal_dist)
fig.add_trace(normal_curve)
fig.add_trace(sample)



py.iplot(fig, filename = 'SinSample', auto_open=False)

### Creating functions to add multiple samples to a plot

I think the above plot looks pretty good. We are almost done hang in there!!!

I just want address two more things then 

we can pop in a cool widget and admire our handywork

In [462]:
# fstrings - This is an easy way to put variables into strings. 
variable = 10
f'{variable}'

'10'

We will now plot the sin wave with 1000 points and draw two samples uniformly from that

This will smooth out the sin wave and 
get us values that don't look like 0.333, 0.444 etc

Lets draw 2 values from our 1000 points as x inputs for the graph

In [463]:
np.random.seed(1)
N = 1000

x = np.linspace(0, 1, N)    
t = np.sin(2*np.pi*x)  

random_index = np.random.choice(len(x), 2) # Draws two values randomly from 0,999
x_samples = x[random_index]                # Get the x corresponding to random index
x_samples

array([0.03703704, 0.23523524])

First lets make a function that can take in an...

x value, apply the sin function, generate a gaussian distribution, and bin counts

Basically everything we did above

In [464]:
def sinSample(x, bins=20, sigma=0.1, G_N=1000):
    t = np.sin(2*np.pi*x)   
    G = np.random.normal(t, sigma, G_N)
    random_index = np.random.choice(len(G), 1)
    onesample = G[random_index]  
    bin_counts, bin_starts = binCounts(G)
    x_bin_counts_normalized, x_vector = normalizeBinCounts(x, G, bin_counts)
    return t, G, bin_counts, bin_starts, x_bin_counts_normalized, x_vector, onesample

Now lets make another function to create lists of traces of normal dist and normal curve for plotting

In [465]:

def sinSampleTraces(x):
    t, G, bin_counts, bin_starts, x_bin_counts_normalized, x_vector, onesample = sinSample(x)
    
    normal_dist = go.Scatter(x=x_vector, y=G,
                        mode='markers',
                        name=f'normal dist x:{x}',
                        opacity=0.50,
                        marker=dict(
                            color='blue',
                            size=2,
                            line=dict(color='red',width=2)
                                )
                            )

    sample = go.Scatter(x=[x_vector[0]], y=onesample,
                        mode='markers',
                        name='sample',
                        marker=dict(
                            color='blue',
                            size=25)
                            )    

    normal_curve = go.Scatter(x=x_bin_counts_normalized, y=bin_starts,
                        mode='lines+markers',
                        name=f'normal curve x:{x}',
                        marker=dict(
                            color='purple',
                            size=10,
                            line=dict(color='red',width=2)
                                )
                            ) 
    
    return normal_dist, sample, normal_curve 


Now lets loop through the two samples and add the traces to a figure

In [466]:
fig = go.Figure()

for x_sample in x_samples:
    normal_dist, sample, normal_curve = sinSampleTraces(x_sample)
    fig.add_trace(normal_dist)
    fig.add_trace(sample)
    fig.add_trace(normal_curve)

sinwave = go.Scatter(x=x, y=t,
                    mode='lines',
                    name='sinwave',
                    marker=dict(
                        color='green',
                        size=20,
                        line=dict(color='green',width=2)
                            )
                        )

fig.add_trace(sinwave)

fig.update_layout(
    title="Sin wave with two samples",
    xaxis_title="x",
    yaxis_title="t",
)


py.iplot(fig, filename = 'TwoSinSample', auto_open=False)



In [467]:
df = pd.read_csv(
    'https://raw.githubusercontent.com/yankev/testing/master/datasets/nycflights.csv')
df = df.drop(df.columns[[0]], axis=1)

In [468]:
month = widgets.IntSlider(
    value=1.0,
    min=1.0,
    max=12.0,
    step=1.0,
    description='Month:',
    continuous_update=False
)

use_date = widgets.Checkbox(
    description='Date: ',
    value=True,
)

container = widgets.HBox(children=[use_date, month])

textbox = widgets.Dropdown(
    description='Airline:   ',
    value='DL',
    options=df['carrier'].unique().tolist()
)

origin = widgets.Dropdown(
    options=list(df['origin'].unique()),
    value='LGA',
    description='Origin Airport:',
)


# Assign an empty figure widget with two traces
trace1 = go.Histogram(x=df['arr_delay'], opacity=0.75, name='Arrival Delays')
trace2 = go.Histogram(x=df['dep_delay'], opacity=0.75, name='Departure Delays')
g = go.FigureWidget(data=[trace1, trace2],
                    layout=go.Layout(
                        title=dict(
                            text='NYC FlightDatabase'
                        ),
                        barmode='overlay'
                    ))

In [469]:
def validate():
    if origin.value in df['origin'].unique() and textbox.value in df['carrier'].unique():
        return True
    else:
        return False


def response(change):
    if validate():
        if use_date.value:
            filter_list = [i and j and k for i, j, k in
                           zip(df['month'] == month.value, df['carrier'] == textbox.value,
                               df['origin'] == origin.value)]
            temp_df = df[filter_list]

        else:
            filter_list = [i and j for i, j in
                           zip(df['carrier'] == 'DL', df['origin'] == origin.value)]
            temp_df = df[filter_list]
        x1 = temp_df['arr_delay']
        x2 = temp_df['dep_delay']
        with g.batch_update():
            g.data[0].x = x1
            g.data[1].x = x2
            g.layout.barmode = 'overlay'
            g.layout.xaxis.title = 'Delay in Minutes'
            g.layout.yaxis.title = 'Number of Delays'


origin.observe(response, names="value")
textbox.observe(response, names="value")
month.observe(response, names="value")
use_date.observe(response, names="value")

In [470]:


container2 = widgets.HBox([origin, textbox])
widgets.VBox([container,
              container2,
              g])



VBox(children=(HBox(children=(Checkbox(value=True, description='Date: '), IntSlider(value=1, continuous_update…

In [471]:
from ipywidgets import interactive, HBox, VBox

# py.init_notebook_mode()

x = y = np.arange(-5, 5, 0.1)
yt = x[:, np.newaxis]
z = np.cos(x * yt) + np.sin(x * yt) * 2

f = go.FigureWidget(
    data=[
        go.Surface(z=z, x=x, y=y,
                   colorscale='Viridis')],
    layout=go.Layout(scene=go.layout.Scene(
        camera=go.layout.scene.Camera(
            up=dict(x=0, y=0, z=1),
            center=dict(x=0, y=0, z=0),
            eye=dict(x=1.25, y=1.25, z=1.25))
    ))
)


def update_z(frequency):
    f.data[0].z = np.cos(x * yt * frequency / 10.0) + np.sin(x * yt * frequency / 10.0) * 2


freq_slider = interactive(update_z, frequency=(1, 50, 0.1))
vb = VBox((f, freq_slider))
vb.layout.align_items = 'center'
vb


VBox(children=(FigureWidget({
    'data': [{'colorscale': [[0.0, '#440154'], [0.1111111111111111, '#482878'],
…


https://ipywidgets.readthedocs.io/en/latest/examples/Using%20Interact.html



In [472]:
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets


In [473]:
def f(x):
    return x

In [474]:
interact(f, x=10);

interactive(children=(IntSlider(value=10, description='x', max=30, min=-10), Output()), _dom_classes=('widget-…

In [475]:

interact(f, x=True);



interactive(children=(Checkbox(value=True, description='x'), Output()), _dom_classes=('widget-interact',))

In [476]:
fig = go.FigureWidget()
scatt = fig.add_scatter()

xs=np.linspace(0, 6, 100)

@interact(a=(1.0, 4.0, 0.01), b=(0, 10.0, 0.01), color=['red', 'green', 'blue'])
def update(a=3.6, b=4.3, color='blue'):
    with fig.batch_update():
        scatt.x=xs
        scatt.y=np.sin(a*xs-b)
        scatt.line.color=color
        
# py .iplot(fig, filename = 'TwoSinSample', auto_open=False)

     

interactive(children=(FloatSlider(value=3.6, description='a', max=4.0, min=1.0, step=0.01), FloatSlider(value=…