<div class="cover">
    <img src="img/ifood-logo.png" align="left"/><br><br><br>
    <h1>Experimentações com grandes volumes de dados usando Notebooks</h1>
    <h2>Gilmar Souza - Data &amp; Analytics Principal</h2>
</div>

<div class="divider">
    <h1>Notebooks</h1>
</div>

<center>![noimg](img/data_science_notebook.jpg)</center>

<center>![noimg](img/notebooks_logos.png)</center>

<center>![noimg](img/jupyter_languages.png)</center>

<div class="divider">
    <h1>Por que Notebooks?</h1>
</div>

### REPL: Repeat-Eval-Print Loop

In [1]:
import time
def paused_print(words_csv, delay):
    words = words_csv.split(',')
    for word in words:
        print(word)
        time.sleep(delay)
        
paused_print('read,eval,print', 2)

read
eval
print


In [2]:
#source: https://anaconda.org/jbednar/plotting_pitfalls/notebook
import numpy as np
np.random.seed(42)

import holoviews as hv
hv.notebook_extension('matplotlib')

%opts Points [color_index=2] (cmap="bwr" edgecolors='k' s=50 alpha=1.0)
%opts Scatter3D [color_index=3 fig_size=250] (cmap='bwr' edgecolor='k' s=50 alpha=1.0)
%opts Image (cmap="gray_r") {+axiswise}
%opts RGB [bgcolor="black" show_grid=False]

import holoviews.plotting.mpl
holoviews.plotting.mpl.MPLPlot.fig_alpha = 0
holoviews.plotting.mpl.ElementPlot.bgcolor = 'white'

from holoviews.operation.datashader import datashade
from colorcet import fire
datashade.cmap=fire[50:]

### Multimedia

In [3]:
def blues_reds(offset=0.5,pts=300):
    blues = (np.random.normal( offset,size=pts), np.random.normal( offset,size=pts), -1*np.ones((pts)))
    reds  = (np.random.normal(-offset,size=pts), np.random.normal(-offset,size=pts),  1*np.ones((pts)))
    return hv.Points(blues, vdims=['c']), hv.Points(reds, vdims=['c'])

blues,reds = blues_reds()
blues + reds + reds*blues + blues*reds  

<div class="source">source: https://anaconda.org/jbednar/plotting_pitfalls/notebook</div>

In [4]:
hmap = hv.HoloMap({0:blues,0.000001:reds,1:blues,2:reds}, kdims=['level'])
hv.Scatter3D(hmap.table(), kdims=['x','y','level'], vdims=['c'])

In [5]:
from ipywidgets import interact
from sympy import Symbol, Eq, factor
from sympy import init_printing
init_printing()

x = Symbol('x')

def  factorit (n):
    return Eq(x**n-1, factor(x**n-1))

interact(factorit, n=(2,20));

interactive(children=(IntSlider(value=11, description='n', max=20, min=2), Output()), _dom_classes=('widget-in…

<div class="source">source: https://github.com/jupyter-widgets/ipywidgets/blob/766cad54a47c07520e9d695534c4664c3391e7ec/docs/source/examples/Factoring.ipynb</div>

In [6]:
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit as cf
from ipywidgets import interactive, fixed, interact_manual
from IPython.display import display

N_samples = 25
x=np.linspace(-2,2,N_samples)

def f(x,a,mu,sigma):
    r=a*np.exp(-(x-mu)**2/(2*sigma**2))
    return (r)

def func(amplitude,ideal_mu,ideal_sigma,noise_sd,noise_mean):
    r=amplitude*np.exp(-(x-ideal_mu)**2/(2*ideal_sigma**2))
    plt.figure(figsize=(8,5))
    plt.plot(x,r,c='k',lw=3)
    r= r+np.random.normal(loc=noise_mean,scale=noise_sd,size=N_samples)
    plt.scatter(x,r,edgecolors='k',c='yellow',s=60)
    plt.grid(True)
    plt.show()
    return (r)

In [7]:
y=interactive(func,amplitude=[1,2,3,4,5],ideal_mu=(-5,5,0.5),
              ideal_sigma=(0,2,0.2),
              noise_sd=(0,1,0.1),noise_mean=(-1,1,0.2))
display(y)

interactive(children=(Dropdown(description='amplitude', options=(1, 2, 3, 4, 5), value=1), FloatSlider(value=0…

<div class="source">source: https://towardsdatascience.com/interactive-machine-learning-make-python-lively-again-a96aec7e1627</div>

### Reproducibilidade e Colaboração

<center>![noimg](img/afp_notebook_orig.png)</center>

<center>![noimg](img/afp_notebook_not_reproduced.png)</center>

<div class="divider">
    <h1>Dados tem Inércia</h1>
</div>

<center>![noimg](img/davinci_notebook.png)</center>