# Altair

In [22]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn

In [90]:
! pip install altair



In [91]:
import altair as alt
from vega_datasets import data

In [206]:
source=data.cars()

# Animando la gráfica de dispersión:

alt.Chart(source).mark_circle(size=60).encode(
    x='Horsepower',            # Valores para el eje de las x
    y='Miles_per_Gallon',      # Valores para el eje de las y
    color='Origin',            # Valores para la clasificación de los marcadores
    tooltip=['Name', 'Origin', 'Horsepower', 'Miles_per_Gallon']
                               # Valores para la etiqueta que aparece sobre los marcadores
).interactive()                # Este método permite que la gráfica sea dinámica

In [207]:
alt.Chart(source).mark_tick().encode(
    x='Horsepower:Q',
    y='Cylinders:O',
    color='Origin',
    tooltip=['Name', 'Origin', 'Horsepower', 'Miles_per_Gallon']
).interactive()

### Encoding para los tipos de datos en altair

<table>
  <tr>
    <th>Tipo de dato</th>
    <th>Código de comando</th>
    <th>Descripción</th>
  </tr>
  <tr>
    <td>Cuantitativo</td>
    <td>Q</td>
    <td>Valor continuo real</td>
  </tr>
  <tr>
    <td>Ordinal</td>
    <td>O</td>
    <td>Cantidad discreta ordenada</td>
  </tr>
  <tr>
    <td>Nominal</td>
    <td>N</td>
    <td>Categoria discreta sin ordenar</td>
  </tr>
  <tr>
    <td>Temporal</td>
    <td>T</td>
    <td>Valores de tiempo</td>
  </tr>
  <tr>
    <td>Geojson</td>
    <td>G</td>
    <td>Froma geográfica</td>
  </tr>
</table>

In [208]:
base = alt.Chart(source).mark_point().encode(
    x='Horsepower:Q',
    y='Miles_per_Gallon:Q',
).properties(
    width=150,
    height=150
)

alt.vconcat(
   base.encode(color='Cylinders:Q').properties(title='quantitative'),
   base.encode(color='Cylinders:O').properties(title='ordinal'),
   base.encode(color='Cylinders:N').properties(title='nominal'),
)

In [209]:
# Agrupando los valores del termómetro

base = alt.Chart(source).mark_point().encode(
    x='Horsepower:Q',
    y='Miles_per_Gallon:Q',
).properties(
    width=150,
    height=150
)

alt.hconcat(
   base.encode(color=alt.Color('Acceleration:Q')),
   base.encode(color=alt.Color('Acceleration:Q', bin=alt.Bin(maxbins=5))
))

## Seleccionando regiones del gráfico

In [210]:
# Para poder seleccionar regiones del gráfico, es necesario agregar:
# 1. El objeto seleccionador (brush)
# 2. El método add_selection a la gráfica.


brush = alt.selection_interval()  # Instancia para el objeto seleccionador


alt.Chart(source).mark_point().encode(
    x='Miles_per_Gallon:Q',
    y='Horsepower:Q',
    color="Origin:N"
).add_selection(brush) # Agregando el seleccionador a la gráfica

In [211]:
# Para obtener una respuesta a la interacción con el brush,
# será necesario agregar una condición. Esto lo hacemos
# mediante el método condition:

brush = alt.selection_interval()


alt.Chart(source).mark_point().encode(
    x='Miles_per_Gallon:Q',
    y='Horsepower:Q',
    color=alt.condition(brush, 'Origin:N', alt.value('lightgray'))
).add_selection(brush)

In [212]:
brush = alt.selection_interval()

chart = alt.Chart(source).mark_point().encode(
    y='Horsepower:Q',
    color=alt.condition(brush, 'Origin:N', alt.value('lightgray'))
).properties(
    width=250,
    height=250
).add_selection(
    brush
)

chart.encode(x='Acceleration:Q') | chart.encode(x='Miles_per_Gallon:Q')

In [213]:
# Fijando la selección al eje de las X

brush = alt.selection_interval(encodings=['x'])

chart = alt.Chart(source).mark_point().encode(
    y='Horsepower:Q',
    color=alt.condition(brush, 'Origin:N', alt.value('lightgray'))
).properties(
    width=250,
    height=250
).add_selection(
    brush
)

chart.encode(x='Acceleration:Q') | chart.encode(x='Miles_per_Gallon:Q')

In [214]:
# Modificar el cuadro de Leyendas para que funcione como filtro:

selection = alt.selection_multi(fields=['Origin', 'Cylinders'])
color = alt.condition(selection,
                      alt.Color('Origin:N', legend=None),
                      alt.value('lightgray'))

scatter = alt.Chart(source).mark_point().encode(
    x='Horsepower:Q',
    y='Miles_per_Gallon:Q',
    color=color,
    tooltip='Name:N'
)

legend = alt.Chart(source).mark_rect().encode(
    y=alt.Y('Origin:N', axis=alt.Axis(orient='right')),
    x='Cylinders:O',
    color=color
).add_selection(
    selection
)

scatter | legend

In [215]:
# Menu de selección:

input_dropdown = alt.binding_select(options=['Europe','Japan','USA'])
selection = alt.selection_single(fields=['Origin'], bind=input_dropdown, name='Country of ')
color = alt.condition(selection,
                    alt.Color('Origin:N', legend=None),
                    alt.value('lightgray'))

alt.Chart(source).mark_point().encode(
    x='Horsepower:Q',
    y='Miles_per_Gallon:Q',
    color=color,
    tooltip='Name:N'
).add_selection(
    selection
)

In [216]:
# Importando el mapa y los datos
airports = data.airports.url
flights_airport = data.flights_airport.url

states = alt.topo_feature(data.us_10m.url, feature="states")

# Creando la propiedad de selección a traves del movimiento del mouse
select_city = alt.selection_single(
    on="mouseover", nearest=True, fields=["origin"], empty="none"
)

# Definiendo los atributos a buscar en la data de aeropuertos
lookup_data = alt.LookupData(
    airports, key="iata", fields=["state", "latitude", "longitude"]
)

# Creando el mapa de fondo
background = alt.Chart(states).mark_geoshape(
    fill="lightgray",
    stroke="white"
).properties(
    width=750,
    height=500
).project("albersUsa")

# Creando las conexiones
connections = alt.Chart(flights_airport).mark_rule(opacity=0.35).encode(
    latitude="latitude:Q",
    longitude="longitude:Q",
    latitude2="lat2:Q",
    longitude2="lon2:Q"
).transform_lookup(
    lookup="origin",
    from_=lookup_data
).transform_lookup(
    lookup="destination",
    from_=lookup_data,
    as_=["state", "lat2", "lon2"]
).transform_filter(
    select_city
)

# Creando los puntos donde estarán los aeropuertos
points = alt.Chart(flights_airport).mark_circle().encode(
    latitude="latitude:Q",
    longitude="longitude:Q",
    size=alt.Size("routes:Q", scale=alt.Scale(range=[0, 1000]), legend=None),
    order=alt.Order("routes:Q", sort="descending"),
    tooltip=["origin:N", "routes:Q"]
).transform_aggregate(
    routes="count()",
    groupby=["origin"]
).transform_lookup(
    lookup="origin",
    from_=lookup_data
).transform_filter(
    (alt.datum.state != "PR") & (alt.datum.state != "VI")
).add_selection(
    select_city
)

# Juntando todo y graficando
(background + connections + points).configure_view(stroke=None)

In [117]:
ejemplo=data.gapminder()
ejemplo

Unnamed: 0,year,country,cluster,pop,life_expect,fertility
0,1955,Afghanistan,0,8891209,30.332,7.7000
1,1960,Afghanistan,0,9829450,31.997,7.7000
2,1965,Afghanistan,0,10997885,34.020,7.7000
3,1970,Afghanistan,0,12430623,36.088,7.7000
4,1975,Afghanistan,0,14132019,38.438,7.7000
...,...,...,...,...,...,...
688,1985,Venezuela,3,16997509,70.190,3.6485
689,1990,Venezuela,3,19325222,71.150,3.2500
690,1995,Venezuela,3,21555902,72.146,2.9415
691,2000,Venezuela,3,23542649,72.766,2.7230


In [118]:
ejemplo.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 693 entries, 0 to 692
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   year         693 non-null    int64  
 1   country      693 non-null    object 
 2   cluster      693 non-null    int64  
 3   pop          693 non-null    int64  
 4   life_expect  693 non-null    float64
 5   fertility    693 non-null    float64
dtypes: float64(2), int64(3), object(1)
memory usage: 32.6+ KB


In [131]:
ejemplo.cluster.unique()

array([0, 3, 4, 1, 5, 2])

In [165]:
len(ejemplo.country.unique())

63

In [125]:
dicc_clusters={}
for i in range(6):
  dicc_clusters[i]=ejemplo[ejemplo.cluster==i]
dicc_clusters

{0:      year      country  cluster         pop  life_expect  fertility
 0    1955  Afghanistan        0     8891209       30.332     7.7000
 1    1960  Afghanistan        0     9829450       31.997     7.7000
 2    1965  Afghanistan        0    10997885       34.020     7.7000
 3    1970  Afghanistan        0    12430623       36.088     7.7000
 4    1975  Afghanistan        0    14132019       38.438     7.7000
 5    1980  Afghanistan        0    15112149       39.854     7.8000
 6    1985  Afghanistan        0    13796928       40.822     7.9000
 7    1990  Afghanistan        0    14669339       41.674     8.0000
 8    1995  Afghanistan        0    20881480       41.763     8.0000
 9    2000  Afghanistan        0    23898198       42.129     7.4792
 10   2005  Afghanistan        0    29928987       43.828     7.0685
 66   1955   Bangladesh        0    49601520       39.348     6.7600
 67   1960   Bangladesh        0    54621538       41.216     6.8500
 68   1965   Bangladesh        

In [161]:
asia=dicc_clusters[0].groupby("country").max().sort_values("pop", ascending=False).index[:5].values.tolist()
europa=dicc_clusters[1].groupby("country").max().sort_values("pop", ascending=False).index[:5].values.tolist()
africa=dicc_clusters[2].groupby("country").max().sort_values("pop", ascending=False).index[:5].values.tolist()
america=dicc_clusters[3].groupby("country").max().sort_values("pop", ascending=False).index[:5].values.tolist()
oceania=dicc_clusters[4].groupby("country").max().sort_values("pop", ascending=False).index[:5].values.tolist()
oriente=dicc_clusters[5].groupby("country").max().sort_values("pop", ascending=False).index[:5].values.tolist()

In [163]:
asia,europa,africa,america,oceania,oriente

(['India', 'Pakistan', 'Bangladesh', 'Afghanistan'],
 ['Germany', 'Turkey', 'France', 'United Kingdom', 'Italy'],
 ['Nigeria', 'South Africa', 'Kenya', 'Rwanda'],
 ['United States', 'Brazil', 'Mexico', 'Colombia', 'Argentina'],
 ['China', 'Indonesia', 'Japan', 'Philippines', 'North Korea'],
 ['Egypt', 'Iran', 'Saudi Arabia', 'Iraq', 'Israel'])

In [191]:
df1=ejemplo[ejemplo.country.isin(america)]
df1.head()

Unnamed: 0,year,country,cluster,pop,life_expect,fertility
11,1955,Argentina,3,18927821,64.399,3.1265
12,1960,Argentina,3,20616009,65.142,3.0895
13,1965,Argentina,3,22283100,65.634,3.049
14,1970,Argentina,3,23962313,67.065,3.1455
15,1975,Argentina,3,26081880,68.481,3.44


In [192]:
source = df1

colores = alt.Scale(domain=(america),
                      range=["green","red","steelblue", "salmon","orange"])

slider = alt.binding_range(min=1955, max=2005, step=5)
select_year = alt.selection_single(name="year", fields=['year'],
                                   bind=slider, init={'year': 2005})

alt.Chart(source).mark_bar().encode(
    y=alt.Y('pop', scale=alt.Scale(domain=(0, df1["pop"].max()))),
    color=alt.Color('country', scale=colores),
    column='country'
).add_selection(
    select_year
).transform_filter(
    select_year
).configure_facet(
    spacing=8
).properties(
    width=50,
    height=150
)

In [198]:
def Altair_plot(region):
  source = ejemplo[ejemplo.country.isin(region)]

  colores = alt.Scale(domain=(region),
                        range=["green","red","steelblue", "salmon","orange"])

  slider = alt.binding_range(min=1955, max=2005, step=5)
  select_year = alt.selection_single(name="year", fields=['year'],
                                    bind=slider, init={'year': 2005})

  chart=alt.Chart(source).mark_bar().encode(
      y=alt.Y('pop', scale=alt.Scale(domain=(0, source["pop"].max()))),
      color=alt.Color('country', scale=colores),
      column='country'
  ).add_selection(
      select_year
  ).transform_filter(
      select_year
  ).configure_facet(
      spacing=8
  ).properties(
      width=50,
      height=150
  )
  return chart

In [201]:
Altair_plot(oriente)

In [204]:
# Generate some random data
rng = np.random.RandomState(1)
x = rng.rand(40) ** 2
y = 10 - 1.0 / (x + 0.1) + rng.randn(40)
source = pd.DataFrame({"x": x, "y": y})

# Define the degree of the polynomial fits
degree_list = [1, 2, 3, 4, 5]

base = alt.Chart(source).mark_circle(color="black").encode(
        alt.X("x"), alt.Y("y")
)

polynomial_fit = [
    base.transform_regression(
        "x", "y", method="poly", order=order, as_=["x", str(order)]
    )
    .mark_line()
    .transform_fold([str(order)], as_=["degree", "y"])
    .encode(alt.Color("degree:N"))
    for order in degree_list
]

alt.layer(base, *polynomial_fit)