# Bokeh 

In [1]:
import pandas as pd

In [2]:
from bokeh.plotting import figure

In [3]:
from bokeh.io import show, output_notebook, output_file

## Ejemplo plot

In [4]:
output_notebook()

In [5]:
# Create a plot
p = figure(plot_width = 400, plot_height = 400)

# Display a scater plot (several points)
p.circle([1, 2, 3, 4, 5], [6, 7, 2, 4, 5], size = [10, 15, 20, 25, 30], line_color = "navy", 
         fill_color = "orange", fill_alpha = [0.5, 0.2, 0.2, 1, 1])

# Display plot
show(p)

## Pandas Dataframe

In [6]:
import pandas as pd

In [7]:
df = pd.read_csv("female_literacy_birth_rate.csv")

In [8]:
df.head()

Unnamed: 0,Country,Continent,female literacy,fertility,population
0,Chine,ASI,90.5,1.769,1324655000.0
1,Inde,ASI,50.8,2.682,1139965000.0
2,USA,NAM,99.0,2.077,304060000.0
3,Indonésie,ASI,88.8,2.132,227345100.0
4,Brésil,LAT,90.2,1.827,191971500.0


In [9]:
df.tail()

Unnamed: 0,Country,Continent,female literacy,fertility,population
157,Vanuatu,OCE,79.5,3.883,233866.0
158,Samoa,OCE,98.5,3.852,178869.0
159,Sao Tomé-et-Principe,AF,83.3,3.718,160174.0
160,Aruba,LAT,98.0,1.732,105455.0
161,Tonga,ASI,99.1,3.928,103566.0


In [10]:
# See defined columns
df.columns

Index(['Country ', 'Continent', 'female literacy', 'fertility', 'population'], dtype='object')

In [11]:
# Devuelve forma de los datos -> filas y columnas
df.shape

(162, 5)

In [12]:
# information about the objects -> How the data is specified and interpreted
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 162 entries, 0 to 161
Data columns (total 5 columns):
Country            162 non-null object
Continent          162 non-null object
female literacy    162 non-null float64
fertility          162 non-null float64
population         162 non-null float64
dtypes: float64(3), object(2)
memory usage: 6.4+ KB


In [13]:
#Access to a column -> accesing the column directly
df.Continent

0      ASI
1      ASI
2      NAM
3      ASI
4      LAT
5      ASI
6      ASI
7       AF
8      EUR
9      ASI
10     LAT
11     ASI
12     ASI
13     EUR
14      AF
15      AF
16     ASI
17     ASI
18     ASI
19      AF
20     EUR
21     EUR
22     EUR
23     ASI
24      AF
25     ASI
26     EUR
27     EUR
28     LAT
29      AF
      ... 
132     AF
133    EUR
134    EUR
135     AF
136     AF
137     AF
138     AF
139    EUR
140    LAT
141    ASI
142     AF
143     AF
144    EUR
145    ASI
146    ASI
147     AF
148     AF
149    ASI
150    LAT
151     AF
152    EUR
153    ASI
154    LAT
155    EUR
156    ASI
157    OCE
158    OCE
159     AF
160    LAT
161    ASI
Name: Continent, Length: 162, dtype: object

In [14]:
# Also with square-braquets
df['female literacy'].value_counts().head()

99.0    15
99.7     4
99.8     3
91.5     3
98.5     3
Name: female literacy, dtype: int64

In [15]:
# Statistics data about the columns
df.describe()

Unnamed: 0,female literacy,fertility,population
count,162.0,162.0,162.0
mean,80.107407,2.878673,40687080.0
std,23.052415,1.427597,140969500.0
min,12.6,0.966,103566.0
25%,66.425,1.82325,3368292.0
50%,90.0,2.3675,9707404.0
75%,98.5,3.88025,28829910.0
max,100.0,7.069,1324655000.0


### Visualizar Dataframes

In [16]:
df.head()

Unnamed: 0,Country,Continent,female literacy,fertility,population
0,Chine,ASI,90.5,1.769,1324655000.0
1,Inde,ASI,50.8,2.682,1139965000.0
2,USA,NAM,99.0,2.077,304060000.0
3,Indonésie,ASI,88.8,2.132,227345100.0
4,Brésil,LAT,90.2,1.827,191971500.0


In [17]:
p = figure(plot_width = 600, plot_height = 600, 
           x_axis_label = 'fertility (children per woman)',
           y_axis_label = 'female literacy (% population)')
p.circle(df['fertility'], df['female literacy'], size = 0.0000001 * df['population'], fill_alpha = 0.5)
show(p)

In [18]:
# Crear dos datasets con los datos solo de Latinoamerica y africa
df_LAT = df[df.Continent =='LAT']
df_AF = df[df.Continent == 'AF']

In [19]:
# Hacer el mismo gráfico con los datasets filtrados (conjuntamente, cada uno una marca distinta en el gráfico)
p = figure(plot_width = 600, plot_height = 600, 
           x_axis_label = 'fertility (children per woman)',
           y_axis_label = 'female literacy (% population)')
p.circle(df_LAT['fertility'], df_LAT['female literacy'])
p.x(df_AF['fertility'], df_AF['female literacy'])
show(p)

In [20]:
# Hacer el mismo gráfico con los datasets filtrados (conjuntamente, cada uno una marca distinta en el gráfico)
p = figure(plot_width = 600, plot_height = 600, 
           x_axis_label = 'fertility (children per woman)',
           y_axis_label = 'female literacy (% population)')
p.circle(df_LAT['fertility'], df_LAT['female literacy'], size = 10, color = 'blue', alpha = 0.7)
p.circle(df_AF['fertility'], df_AF['female literacy'], size = 10, color = 'red', alpha = 0.7)
show(p)

In [21]:
# Hacer el mismo gráfico con los datasets filtrados incluyendo una leyenda
p = figure(plot_width = 600, plot_height = 600, 
           x_axis_label = 'fertility (children per woman)',
           y_axis_label = 'female literacy (% population)')
p.circle(df_LAT['fertility'], df_LAT['female literacy'], size = 10, color = 'blue', alpha = 0.7, legend = 'LAT')
p.circle(df_AF['fertility'], df_AF['female literacy'], size = 10, color = 'red', alpha = 0.7, legend = 'AF')
p.legend.location = 'bottom_left'
show(p)

In [22]:
p = figure(plot_width = 300, plot_height = 300)
p.line([1, 2, 3, 4, 5], [6, 7, 2, 4, 5], line_color = "red", line_width = 5)
show(p)

In [23]:
AAPL = pd.read_csv("https://api.iextrading.com/1.0/stock/aapl/chart/5y?format=csv", parse_dates=['date'])

In [24]:
AAPL.columns

Index(['date', 'open', 'high', 'low', 'close', 'volume', 'unadjustedVolume',
       'change', 'changePercent', 'vwap', 'label', 'changeOverTime'],
      dtype='object')

In [25]:
AAPL.head()

Unnamed: 0,date,open,high,low,close,volume,unadjustedVolume,change,changePercent,vwap,label,changeOverTime
0,2013-04-29,54.677,56.3893,54.6185,55.9345,159958876,22851268,1.6795,3.096,55.7966,"Apr 29, 13",0.0
1,2013-04-30,56.5821,57.9021,56.1881,57.5809,172737600,24676800,1.6464,2.943,57.1781,"Apr 30, 13",0.029434
2,2013-05-01,57.7993,57.8604,56.4898,57.127,126720237,18102891,-0.453855,-0.788,57.1217,"May 1, 13",0.02132
3,2013-05-02,57.4508,58.3364,57.3013,57.9372,105449078,15064154,0.810175,1.418,58.0741,"May 2, 13",0.035804
4,2013-05-03,58.6901,58.9398,58.4092,58.5172,90319201,12902743,0.579995,1.001,58.6445,"May 3, 13",0.046174


In [26]:
applePlot = figure(plot_height = 400, plot_width = 400, x_axis_type = "datetime", x_axis_label = 'Date', y_axis_label = 'US Dollards')
companyValue = AAPL['close'] * AAPL['volume']
applePlot.line(AAPL['date'], companyValue)
show(applePlot)

In [27]:
p = figure(plot_width = 300, plot_height = 300)
p.vbar(x = [1, 2, 3, 4, 5], width = 0.5, top = [6, 7, 2, 4, 5], color = "firebrick")
show(p)

In [28]:
xs = [[1, 1, 2, 2], [2, 2, 4], [2, 2, 3, 3]]
ys = [[2, 5, 5, 2], [3, 5, 5], [2, 3, 4, 2]]
p = figure()
p.patches(xs, ys, fill_color = ["red", "blue", "green"], line_color = "white")
show(p)

In [29]:
import json

In [30]:
with open("states.json", 'r') as f:
    states = json.load(f)

In [31]:
states.keys()

dict_keys(['az', 'co', 'nm', 'ut'])

In [32]:
states['az'].keys()

dict_keys(['lons', 'lats'])

In [33]:
states_xs = []
states_ys = []
for key in states.keys():
    states_xs.append(states[key]['lons'])
    states_ys.append(states[key]['lats'])
    
p = figure()
p.patches(states_xs, states_ys, fill_color = ["red", "blue", "green", "yellow"], line_color = "white")
show(p)

In [34]:
import numpy as np

In [35]:
x = np.linspace(0, 2 * np.pi, 200)
y = np.cos(x)

p = figure()
p.circle(x, y)
show(p)

### Ejemplo linea con puntos

In [36]:
x = [1, 2, 3, 4, 5]
y = [6, 7, 8, 7, 3]

p = figure(plot_width = 400, plot_height = 400)
p.line(x, y, line_width = 2)
p.circle(x, y, fill_color = 'white', size = 8)
#output_file("ejemplo.html")
show(p)

### Generar column data source manualmente

In [37]:
# Create a plot
p = figure(plot_width = 400, plot_height = 400)

# Display a scater plot (several points)
p.circle([1, 2, 3, 4, 5], [6, 7, 2, 4, 5], size = [10, 15, 20, 25, 30], line_color = "navy", 
         fill_color = "orange", fill_alpha = [0.5, 0.2, 0.2, 1, 1])

# Display plot
show(p)

In [38]:
from bokeh.plotting import ColumnDataSource
datos = {
    'edad':[1, 2, 3, 4, 5],
    'altura':[6, 7, 2, 4, 5]
}
source1 = ColumnDataSource(data = datos)

In [39]:
p = figure(plot_width = 400, plot_height = 400)
p.circle('edad', 'altura', size = 15, line_color = "navy", fill_color = "orange", fill_alpha = 0.5, source = source1)
show(p)

### Ejemplo de flores

In [40]:
from bokeh.sampledata.iris import flowers as df
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [41]:
source = ColumnDataSource(df)

In [42]:
p = figure(plot_width = 400, plot_height = 400)
p.circle('petal_length', 'petal_width', source = source)
show(p)

### Ejemplo 100m

In [43]:
fileData = pd.read_csv("100m.csv")

In [44]:
fileData.columns

Index(['Name', 'Country', 'Medal', 'Time', 'Year', 'color'], dtype='object')

In [45]:
dataSource = ColumnDataSource(fileData)
p = figure(plot_width = 400, plot_height = 400)
p.circle('Year', 'Time', source = dataSource, fill_color = 'color', line_color = 'color')
show(p)