# Introducción a Polars 🐻‍❄️



In [1]:
import polars as pl
import pandas as pd

## 🐻‍❄️
Comparemos un poco la velocidad al leer un archivo csv

In [2]:
%%timeit
with open('../data/censo.csv', 'r', encoding='latin1') as fh:
    polars_df = pl.read_csv(fh.read().encode('utf-8')) 
polars_df.head()

47.7 ms ± 612 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


## 🐼
Pandas es hasta 10x más lento

In [3]:
%%timeit
pandas_df = pd.read_csv('../data/censo.csv', encoding='latin1') 
pandas_df.head()

638 ms ± 9.99 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## ¿Qué hay dentro de Polars? 🐻‍❄️ <br><br>
### Rust 🦀
![rust](../img/Rust.png) <br>
Fuente: https://www.javatpoint.com/rust-features

## Apache Arrow 🏹

![arrow](../img/arrow.png)<br><br><br>
### El problema 
![babel](../img/The_Tower_of_Babel.jpg)<br><br><br>
### La solución: Apache Arrow
![arrow-ecosystem](../img/arrow-ecosystem.png)


### ¿Cómo funciona?
<div style="background-color: white">
<img src='../img/arrow-columnar.png'> 
</div>

In [4]:
with open('../data/censo.csv', 'r', encoding='latin1') as fh:
    polars_df = pl.read_csv(fh.read().encode('utf-8')) 
polars_df.head()

ENTIDAD,NOM_ENT,MUN,NOM_MUN,LOC,NOM_LOC,AGEB,MZA,POBTOT,POBMAS,POBFEM,P_0A2,P_0A2_M,P_0A2_F,P_3YMAS,P_3YMAS_M,P_3YMAS_F,P_5YMAS,P_5YMAS_M,P_5YMAS_F,P_12YMAS,P_12YMAS_M,P_12YMAS_F,P_15YMAS,P_15YMAS_M,P_15YMAS_F,P_18YMAS,P_18YMAS_M,P_18YMAS_F,P_3A5,P_3A5_M,P_3A5_F,P_6A11,P_6A11_M,P_6A11_F,P_8A14,P_8A14_M,...,PHOGJEF_M,PHOGJEF_F,VIVTOT,TVIVHAB,TVIVPAR,VIVPAR_HAB,TVIVPARHAB,VIVPAR_DES,VIVPAR_UT,OCUPVIVPAR,PROM_OCUP,PRO_OCUP_C,VPH_PISODT,VPH_PISOTI,VPH_1DOR,VPH_2YMASD,VPH_1CUART,VPH_2CUART,VPH_3YMASC,VPH_C_ELEC,VPH_S_ELEC,VPH_AGUADV,VPH_AGUAFV,VPH_EXCSA,VPH_DRENAJ,VPH_NODREN,VPH_C_SERV,VPH_SNBIEN,VPH_RADIO,VPH_TV,VPH_REFRI,VPH_LAVAD,VPH_AUTOM,VPH_PC,VPH_TELEF,VPH_CEL,VPH_INTER
i64,str,i64,str,i64,str,str,i64,i64,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,...,str,str,i64,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
9,"""Distrito Feder...",0,"""Total del Dist...",0,"""Total de la en...","""0000""",0,8851080,"""4233783""","""4617297""","""357390""","""182118""","""175272""","""8295664""","""3952770""","""4342894""","""8034809""","""3820187""","""4214622""","""7110465""","""3350128""","""3760337""","""6715516""","""3150628""","""3564888""","""6289306""","""2935800""","""3353506""","""391086""","""199034""","""192052""","""794113""","""403608""","""390505""","""926644""","""470131""",...,"""6246023""","""2349107""",2745180,"""2453770""","""2679944""","""2388534""","""2453031""","""211245""","""80165""","""8595130""","""3.60""","""0.87""","""2334171""","""24361""","""756690""","""1618569""","""137775""","""287658""","""1945806""","""2375582""","""2256""","""2312839""","""58268""","""2362481""","""2362017""","""7178""","""2301380""","""6250""","""2185469""","""2337884""","""2165900""","""1854623""","""1110374""","""1171631""","""1715772""","""1817230""","""936648"""
9,"""Distrito Feder...",2,"""Azcapotzalco""",0,"""Total del muni...","""0000""",0,414711,"""196053""","""218658""","""14917""","""7688""","""7229""","""390179""","""183551""","""206628""","""379605""","""178134""","""201471""","""340634""","""158502""","""182132""","""323722""","""149891""","""173831""","""304800""","""140336""","""164464""","""16011""","""8193""","""7818""","""33534""","""16856""","""16678""","""39434""","""19865""",...,"""284581""","""120042""",132135,"""117264""","""128955""","""114084""","""117237""","""10232""","""4639""","""404623""","""3.55""","""0.84""","""111818""","""673""","""32675""","""80908""","""3769""","""11718""","""97887""","""113708""","""28""","""112763""","""584""","""112900""","""113130""","""118""","""112387""","""147""","""106564""","""112503""","""107269""","""93517""","""52913""","""60003""","""86750""","""88236""","""47225"""
9,"""Distrito Feder...",2,"""Azcapotzalco""",1,"""Total de la lo...","""0000""",0,414711,"""196053""","""218658""","""14917""","""7688""","""7229""","""390179""","""183551""","""206628""","""379605""","""178134""","""201471""","""340634""","""158502""","""182132""","""323722""","""149891""","""173831""","""304800""","""140336""","""164464""","""16011""","""8193""","""7818""","""33534""","""16856""","""16678""","""39434""","""19865""",...,"""284581""","""120042""",132135,"""117264""","""128955""","""114084""","""117237""","""10232""","""4639""","""404623""","""3.55""","""0.84""","""111818""","""673""","""32675""","""80908""","""3769""","""11718""","""97887""","""113708""","""28""","""112763""","""584""","""112900""","""113130""","""118""","""112387""","""147""","""106564""","""112503""","""107269""","""93517""","""52913""","""60003""","""86750""","""88236""","""47225"""
9,"""Distrito Feder...",2,"""Azcapotzalco""",1,"""Total AGEB urb...","""0010""",0,3424,"""1600""","""1824""","""71""","""35""","""36""","""3228""","""1502""","""1726""","""3174""","""1477""","""1697""","""2916""","""1343""","""1573""","""2787""","""1274""","""1513""","""2650""","""1200""","""1450""","""77""","""37""","""40""","""235""","""122""","""113""","""294""","""148""",...,"""2280""","""1021""",879,"""856""","""838""","""815""","""856""","""15""","""8""","""3301""","""4.05""","""0.83""","""797""","""5""","""61""","""749""","""0""","""19""","""790""","""811""","""0""","""809""","""*""","""809""","""809""","""*""","""809""","""0""","""771""","""805""","""804""","""762""","""466""","""447""","""751""","""598""","""346"""
9,"""Distrito Feder...",2,"""Azcapotzalco""",1,"""Azcapotzalco""","""0010""",1,202,"""96""","""106""","""6""","""4""","""*""","""193""","""91""","""102""","""189""","""91""","""98""","""172""","""84""","""88""","""162""","""79""","""83""","""157""","""76""","""81""","""4""","""0""","""4""","""17""","""7""","""10""","""23""","""10""",...,"""127""","""72""",48,"""*""","""*""","""*""","""*""","""*""","""*""","""199""","""4.23""","""0.90""","""46""","""0""","""5""","""41""","""0""","""*""","""44""","""46""","""0""","""46""","""0""","""46""","""46""","""0""","""46""","""0""","""43""","""46""","""46""","""45""","""28""","""21""","""45""","""26""","""15"""


In [5]:
print(polars_df.head())

shape: (5, 198)
┌─────────┬──────────┬─────┬─────────────────────┬─────┬─────────┬───────────┬─────────┬───────────┐
│ ENTIDAD ┆ NOM_ENT  ┆ MUN ┆ NOM_MUN             ┆ ... ┆ VPH_PC  ┆ VPH_TELEF ┆ VPH_CEL ┆ VPH_INTER │
│ ---     ┆ ---      ┆ --- ┆ ---                 ┆     ┆ ---     ┆ ---       ┆ ---     ┆ ---       │
│ i64     ┆ str      ┆ i64 ┆ str                 ┆     ┆ str     ┆ str       ┆ str     ┆ str       │
╞═════════╪══════════╪═════╪═════════════════════╪═════╪═════════╪═══════════╪═════════╪═══════════╡
│ 9       ┆ Distrito ┆ 0   ┆ Total del Distrito  ┆ ... ┆ 1171631 ┆ 1715772   ┆ 1817230 ┆ 936648    │
│         ┆ Federal  ┆     ┆ Federal             ┆     ┆         ┆           ┆         ┆           │
│ 9       ┆ Distrito ┆ 2   ┆ Azcapotzalco        ┆ ... ┆ 60003   ┆ 86750     ┆ 88236   ┆ 47225     │
│         ┆ Federal  ┆     ┆                     ┆     ┆         ┆           ┆         ┆           │
│ 9       ┆ Distrito ┆ 2   ┆ Azcapotzalco        ┆ ... ┆ 60003   ┆ 86750   

In [6]:
polars_df.columns

['ENTIDAD',
 'NOM_ENT',
 'MUN',
 'NOM_MUN',
 'LOC',
 'NOM_LOC',
 'AGEB',
 'MZA',
 'POBTOT',
 'POBMAS',
 'POBFEM',
 'P_0A2',
 'P_0A2_M',
 'P_0A2_F',
 'P_3YMAS',
 'P_3YMAS_M',
 'P_3YMAS_F',
 'P_5YMAS',
 'P_5YMAS_M',
 'P_5YMAS_F',
 'P_12YMAS',
 'P_12YMAS_M',
 'P_12YMAS_F',
 'P_15YMAS',
 'P_15YMAS_M',
 'P_15YMAS_F',
 'P_18YMAS',
 'P_18YMAS_M',
 'P_18YMAS_F',
 'P_3A5',
 'P_3A5_M',
 'P_3A5_F',
 'P_6A11',
 'P_6A11_M',
 'P_6A11_F',
 'P_8A14',
 'P_8A14_M',
 'P_8A14_F',
 'P_12A14',
 'P_12A14_M',
 'P_12A14_F',
 'P_15A17',
 'P_15A17_M',
 'P_15A17_F',
 'P_18A24',
 'P_18A24_M',
 'P_18A24_F',
 'P_15A49_F',
 'P_60YMAS',
 'P_60YMAS_M',
 'P_60YMAS_F',
 'REL_H_M',
 'POB0_14',
 'POB15_64',
 'POB65_MAS',
 'PROM_HNV',
 'PNACENT',
 'PNACENT_M',
 'PNACENT_F',
 'PNACOE',
 'PNACOE_M',
 'PNACOE_F',
 'PRES2005',
 'PRES2005_M',
 'PRES2005_F',
 'PRESOE05',
 'PRESOE05_M',
 'PRESOE05_F',
 'P3YM_HLI',
 'P3YM_HLI_M',
 'P3YM_HLI_F',
 'P3HLINHE',
 'P3HLINHE_M',
 'P3HLINHE_F',
 'P3HLI_HE',
 'P3HLI_HE_M',
 'P3HLI_HE_F',
 '

In [7]:
polars_df.dtypes

[Int64,
 Utf8,
 Int64,
 Utf8,
 Int64,
 Utf8,
 Utf8,
 Int64,
 Int64,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 Utf8,
 

![Polars Data types](../img/Polars-data-types.png)

In [8]:
polars_df.schema

{'ENTIDAD': Int64,
 'NOM_ENT': Utf8,
 'MUN': Int64,
 'NOM_MUN': Utf8,
 'LOC': Int64,
 'NOM_LOC': Utf8,
 'AGEB': Utf8,
 'MZA': Int64,
 'POBTOT': Int64,
 'POBMAS': Utf8,
 'POBFEM': Utf8,
 'P_0A2': Utf8,
 'P_0A2_M': Utf8,
 'P_0A2_F': Utf8,
 'P_3YMAS': Utf8,
 'P_3YMAS_M': Utf8,
 'P_3YMAS_F': Utf8,
 'P_5YMAS': Utf8,
 'P_5YMAS_M': Utf8,
 'P_5YMAS_F': Utf8,
 'P_12YMAS': Utf8,
 'P_12YMAS_M': Utf8,
 'P_12YMAS_F': Utf8,
 'P_15YMAS': Utf8,
 'P_15YMAS_M': Utf8,
 'P_15YMAS_F': Utf8,
 'P_18YMAS': Utf8,
 'P_18YMAS_M': Utf8,
 'P_18YMAS_F': Utf8,
 'P_3A5': Utf8,
 'P_3A5_M': Utf8,
 'P_3A5_F': Utf8,
 'P_6A11': Utf8,
 'P_6A11_M': Utf8,
 'P_6A11_F': Utf8,
 'P_8A14': Utf8,
 'P_8A14_M': Utf8,
 'P_8A14_F': Utf8,
 'P_12A14': Utf8,
 'P_12A14_M': Utf8,
 'P_12A14_F': Utf8,
 'P_15A17': Utf8,
 'P_15A17_M': Utf8,
 'P_15A17_F': Utf8,
 'P_18A24': Utf8,
 'P_18A24_M': Utf8,
 'P_18A24_F': Utf8,
 'P_15A49_F': Utf8,
 'P_60YMAS': Utf8,
 'P_60YMAS_M': Utf8,
 'P_60YMAS_F': Utf8,
 'REL_H_M': Utf8,
 'POB0_14': Utf8,
 'POB15_64'

## Accediendo a los datos

Comparativo de Slicing entre 🐻‍❄️<br><br>
![Comparativo Slicing](../img/Polars-pandas-slicing.png)<br><br>
Fuente: https://pola-rs.github.io/polars-book/user-guide/howcani/selecting_data/selecting_data_indexing.html

Polars soporta `vectorization`
<br><br>
El uso de Indexing es muy limitado:
- Extraer un valor escalar de un `DataFrame`
- Convertir una columna de un `DataFrame` a una `Series`
- Para exploración de datos e inspeccionar filas o columnas

In [9]:
polars_df["NOM_ENT"].head(1)

NOM_ENT
str
"""Distrito Feder..."


Aunque es posible indexar varias columnas, Polars **NO va a paralelizar** la ejecución

In [10]:
polars_df[["LOC","NOM_ENT"]].head(1)

LOC,NOM_ENT
i64,str
0,"""Distrito Feder..."


## `select` method
Método que usaremos ampliamente en Polars, se comporta distinto ya que no sólo permite el nombre de una columna es posible usar:
- Una sola columna
- Una lista de columnas 💪
- Tipos de datos 🤯


In [11]:
polars_df.select(["NOM_ENT","NOM_MUN"]).head()

NOM_ENT,NOM_MUN
str,str
"""Distrito Feder...","""Total del Dist..."
"""Distrito Feder...","""Azcapotzalco"""
"""Distrito Feder...","""Azcapotzalco"""
"""Distrito Feder...","""Azcapotzalco"""
"""Distrito Feder...","""Azcapotzalco"""


In [12]:
polars_df.select(['*']).head()

ENTIDAD,NOM_ENT,MUN,NOM_MUN,LOC,NOM_LOC,AGEB,MZA,POBTOT,POBMAS,POBFEM,P_0A2,P_0A2_M,P_0A2_F,P_3YMAS,P_3YMAS_M,P_3YMAS_F,P_5YMAS,P_5YMAS_M,P_5YMAS_F,P_12YMAS,P_12YMAS_M,P_12YMAS_F,P_15YMAS,P_15YMAS_M,P_15YMAS_F,P_18YMAS,P_18YMAS_M,P_18YMAS_F,P_3A5,P_3A5_M,P_3A5_F,P_6A11,P_6A11_M,P_6A11_F,P_8A14,P_8A14_M,...,PHOGJEF_M,PHOGJEF_F,VIVTOT,TVIVHAB,TVIVPAR,VIVPAR_HAB,TVIVPARHAB,VIVPAR_DES,VIVPAR_UT,OCUPVIVPAR,PROM_OCUP,PRO_OCUP_C,VPH_PISODT,VPH_PISOTI,VPH_1DOR,VPH_2YMASD,VPH_1CUART,VPH_2CUART,VPH_3YMASC,VPH_C_ELEC,VPH_S_ELEC,VPH_AGUADV,VPH_AGUAFV,VPH_EXCSA,VPH_DRENAJ,VPH_NODREN,VPH_C_SERV,VPH_SNBIEN,VPH_RADIO,VPH_TV,VPH_REFRI,VPH_LAVAD,VPH_AUTOM,VPH_PC,VPH_TELEF,VPH_CEL,VPH_INTER
i64,str,i64,str,i64,str,str,i64,i64,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,...,str,str,i64,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
9,"""Distrito Feder...",0,"""Total del Dist...",0,"""Total de la en...","""0000""",0,8851080,"""4233783""","""4617297""","""357390""","""182118""","""175272""","""8295664""","""3952770""","""4342894""","""8034809""","""3820187""","""4214622""","""7110465""","""3350128""","""3760337""","""6715516""","""3150628""","""3564888""","""6289306""","""2935800""","""3353506""","""391086""","""199034""","""192052""","""794113""","""403608""","""390505""","""926644""","""470131""",...,"""6246023""","""2349107""",2745180,"""2453770""","""2679944""","""2388534""","""2453031""","""211245""","""80165""","""8595130""","""3.60""","""0.87""","""2334171""","""24361""","""756690""","""1618569""","""137775""","""287658""","""1945806""","""2375582""","""2256""","""2312839""","""58268""","""2362481""","""2362017""","""7178""","""2301380""","""6250""","""2185469""","""2337884""","""2165900""","""1854623""","""1110374""","""1171631""","""1715772""","""1817230""","""936648"""
9,"""Distrito Feder...",2,"""Azcapotzalco""",0,"""Total del muni...","""0000""",0,414711,"""196053""","""218658""","""14917""","""7688""","""7229""","""390179""","""183551""","""206628""","""379605""","""178134""","""201471""","""340634""","""158502""","""182132""","""323722""","""149891""","""173831""","""304800""","""140336""","""164464""","""16011""","""8193""","""7818""","""33534""","""16856""","""16678""","""39434""","""19865""",...,"""284581""","""120042""",132135,"""117264""","""128955""","""114084""","""117237""","""10232""","""4639""","""404623""","""3.55""","""0.84""","""111818""","""673""","""32675""","""80908""","""3769""","""11718""","""97887""","""113708""","""28""","""112763""","""584""","""112900""","""113130""","""118""","""112387""","""147""","""106564""","""112503""","""107269""","""93517""","""52913""","""60003""","""86750""","""88236""","""47225"""
9,"""Distrito Feder...",2,"""Azcapotzalco""",1,"""Total de la lo...","""0000""",0,414711,"""196053""","""218658""","""14917""","""7688""","""7229""","""390179""","""183551""","""206628""","""379605""","""178134""","""201471""","""340634""","""158502""","""182132""","""323722""","""149891""","""173831""","""304800""","""140336""","""164464""","""16011""","""8193""","""7818""","""33534""","""16856""","""16678""","""39434""","""19865""",...,"""284581""","""120042""",132135,"""117264""","""128955""","""114084""","""117237""","""10232""","""4639""","""404623""","""3.55""","""0.84""","""111818""","""673""","""32675""","""80908""","""3769""","""11718""","""97887""","""113708""","""28""","""112763""","""584""","""112900""","""113130""","""118""","""112387""","""147""","""106564""","""112503""","""107269""","""93517""","""52913""","""60003""","""86750""","""88236""","""47225"""
9,"""Distrito Feder...",2,"""Azcapotzalco""",1,"""Total AGEB urb...","""0010""",0,3424,"""1600""","""1824""","""71""","""35""","""36""","""3228""","""1502""","""1726""","""3174""","""1477""","""1697""","""2916""","""1343""","""1573""","""2787""","""1274""","""1513""","""2650""","""1200""","""1450""","""77""","""37""","""40""","""235""","""122""","""113""","""294""","""148""",...,"""2280""","""1021""",879,"""856""","""838""","""815""","""856""","""15""","""8""","""3301""","""4.05""","""0.83""","""797""","""5""","""61""","""749""","""0""","""19""","""790""","""811""","""0""","""809""","""*""","""809""","""809""","""*""","""809""","""0""","""771""","""805""","""804""","""762""","""466""","""447""","""751""","""598""","""346"""
9,"""Distrito Feder...",2,"""Azcapotzalco""",1,"""Azcapotzalco""","""0010""",1,202,"""96""","""106""","""6""","""4""","""*""","""193""","""91""","""102""","""189""","""91""","""98""","""172""","""84""","""88""","""162""","""79""","""83""","""157""","""76""","""81""","""4""","""0""","""4""","""17""","""7""","""10""","""23""","""10""",...,"""127""","""72""",48,"""*""","""*""","""*""","""*""","""*""","""*""","""199""","""4.23""","""0.90""","""46""","""0""","""5""","""41""","""0""","""*""","""44""","""46""","""0""","""46""","""0""","""46""","""46""","""0""","""46""","""0""","""43""","""46""","""46""","""45""","""28""","""21""","""45""","""26""","""15"""


## Polars `Expressions`

Son un mapping de una `Series` como input a una `Series` como output

<div style="background-color: white">
<img src='../img/map.png'> 
</div>

Son usadas ampliamente en operaciones de data science:
- Tomar una muestra de filas de una `column`
- Aplicar operaciones aritméticasa una `column`
- Extraer partes de una fecha
- Convertir a minúsculas una column de tipo string

También son usadas comúnmente dentro de operaciones:
- Calcular la media de un grupo en una operación  `group by` 
- Calcular el tamaño de cada grupo en una operación `group by`
- Realizar una suma horizontal entre columnas

Polars realiza estas transformaciones a velocidad relámpago ⚡ al:
- Automáticamente optimizar el query de cada `expression`
- Paralelizar automáticamente las `expressions` en varias columnas

De una manera similar a 🐼, podemos hacer `method chaining` y ejecutar secuencialmente varias `expressions`


In [13]:
polars_df.select(pl.exclude(["ENTIDAD","MUN"])).head()

NOM_ENT,NOM_MUN,LOC,NOM_LOC,AGEB,MZA,POBTOT,POBMAS,POBFEM,P_0A2,P_0A2_M,P_0A2_F,P_3YMAS,P_3YMAS_M,P_3YMAS_F,P_5YMAS,P_5YMAS_M,P_5YMAS_F,P_12YMAS,P_12YMAS_M,P_12YMAS_F,P_15YMAS,P_15YMAS_M,P_15YMAS_F,P_18YMAS,P_18YMAS_M,P_18YMAS_F,P_3A5,P_3A5_M,P_3A5_F,P_6A11,P_6A11_M,P_6A11_F,P_8A14,P_8A14_M,P_8A14_F,P_12A14,...,PHOGJEF_M,PHOGJEF_F,VIVTOT,TVIVHAB,TVIVPAR,VIVPAR_HAB,TVIVPARHAB,VIVPAR_DES,VIVPAR_UT,OCUPVIVPAR,PROM_OCUP,PRO_OCUP_C,VPH_PISODT,VPH_PISOTI,VPH_1DOR,VPH_2YMASD,VPH_1CUART,VPH_2CUART,VPH_3YMASC,VPH_C_ELEC,VPH_S_ELEC,VPH_AGUADV,VPH_AGUAFV,VPH_EXCSA,VPH_DRENAJ,VPH_NODREN,VPH_C_SERV,VPH_SNBIEN,VPH_RADIO,VPH_TV,VPH_REFRI,VPH_LAVAD,VPH_AUTOM,VPH_PC,VPH_TELEF,VPH_CEL,VPH_INTER
str,str,i64,str,str,i64,i64,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,...,str,str,i64,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
"""Distrito Feder...","""Total del Dist...",0,"""Total de la en...","""0000""",0,8851080,"""4233783""","""4617297""","""357390""","""182118""","""175272""","""8295664""","""3952770""","""4342894""","""8034809""","""3820187""","""4214622""","""7110465""","""3350128""","""3760337""","""6715516""","""3150628""","""3564888""","""6289306""","""2935800""","""3353506""","""391086""","""199034""","""192052""","""794113""","""403608""","""390505""","""926644""","""470131""","""456513""","""394949""",...,"""6246023""","""2349107""",2745180,"""2453770""","""2679944""","""2388534""","""2453031""","""211245""","""80165""","""8595130""","""3.60""","""0.87""","""2334171""","""24361""","""756690""","""1618569""","""137775""","""287658""","""1945806""","""2375582""","""2256""","""2312839""","""58268""","""2362481""","""2362017""","""7178""","""2301380""","""6250""","""2185469""","""2337884""","""2165900""","""1854623""","""1110374""","""1171631""","""1715772""","""1817230""","""936648"""
"""Distrito Feder...","""Azcapotzalco""",0,"""Total del muni...","""0000""",0,414711,"""196053""","""218658""","""14917""","""7688""","""7229""","""390179""","""183551""","""206628""","""379605""","""178134""","""201471""","""340634""","""158502""","""182132""","""323722""","""149891""","""173831""","""304800""","""140336""","""164464""","""16011""","""8193""","""7818""","""33534""","""16856""","""16678""","""39434""","""19865""","""19569""","""16912""",...,"""284581""","""120042""",132135,"""117264""","""128955""","""114084""","""117237""","""10232""","""4639""","""404623""","""3.55""","""0.84""","""111818""","""673""","""32675""","""80908""","""3769""","""11718""","""97887""","""113708""","""28""","""112763""","""584""","""112900""","""113130""","""118""","""112387""","""147""","""106564""","""112503""","""107269""","""93517""","""52913""","""60003""","""86750""","""88236""","""47225"""
"""Distrito Feder...","""Azcapotzalco""",1,"""Total de la lo...","""0000""",0,414711,"""196053""","""218658""","""14917""","""7688""","""7229""","""390179""","""183551""","""206628""","""379605""","""178134""","""201471""","""340634""","""158502""","""182132""","""323722""","""149891""","""173831""","""304800""","""140336""","""164464""","""16011""","""8193""","""7818""","""33534""","""16856""","""16678""","""39434""","""19865""","""19569""","""16912""",...,"""284581""","""120042""",132135,"""117264""","""128955""","""114084""","""117237""","""10232""","""4639""","""404623""","""3.55""","""0.84""","""111818""","""673""","""32675""","""80908""","""3769""","""11718""","""97887""","""113708""","""28""","""112763""","""584""","""112900""","""113130""","""118""","""112387""","""147""","""106564""","""112503""","""107269""","""93517""","""52913""","""60003""","""86750""","""88236""","""47225"""
"""Distrito Feder...","""Azcapotzalco""",1,"""Total AGEB urb...","""0010""",0,3424,"""1600""","""1824""","""71""","""35""","""36""","""3228""","""1502""","""1726""","""3174""","""1477""","""1697""","""2916""","""1343""","""1573""","""2787""","""1274""","""1513""","""2650""","""1200""","""1450""","""77""","""37""","""40""","""235""","""122""","""113""","""294""","""148""","""146""","""129""",...,"""2280""","""1021""",879,"""856""","""838""","""815""","""856""","""15""","""8""","""3301""","""4.05""","""0.83""","""797""","""5""","""61""","""749""","""0""","""19""","""790""","""811""","""0""","""809""","""*""","""809""","""809""","""*""","""809""","""0""","""771""","""805""","""804""","""762""","""466""","""447""","""751""","""598""","""346"""
"""Distrito Feder...","""Azcapotzalco""",1,"""Azcapotzalco""","""0010""",1,202,"""96""","""106""","""6""","""4""","""*""","""193""","""91""","""102""","""189""","""91""","""98""","""172""","""84""","""88""","""162""","""79""","""83""","""157""","""76""","""81""","""4""","""0""","""4""","""17""","""7""","""10""","""23""","""10""","""13""","""10""",...,"""127""","""72""",48,"""*""","""*""","""*""","""*""","""*""","""*""","""199""","""4.23""","""0.90""","""46""","""0""","""5""","""41""","""0""","""*""","""44""","""46""","""0""","""46""","""0""","""46""","""46""","""0""","""46""","""0""","""43""","""46""","""46""","""45""","""28""","""21""","""45""","""26""","""15"""


In [14]:
polars_df.select(pl.col(pl.Utf8)).width

192

In [15]:
polars_df.select(pl.exclude(pl.Utf8)).head()

ENTIDAD,MUN,LOC,MZA,POBTOT,VIVTOT
i64,i64,i64,i64,i64,i64
9,0,0,0,8851080,2745180
9,2,0,0,414711,132135
9,2,1,0,414711,132135
9,2,1,0,3424,879
9,2,1,1,202,48


## `Expression` contexts

No puedes usar una `expression` en cualquier lugar. Necesitan un context:
- selection: `select`
- groupby aggregation: `groupby`
- hstack/add columns: `with_columns`

In [16]:
polars_df(pl.exclude(pl.Int64))

TypeError: 'DataFrame' object is not callable

In [17]:
polars_df.with_columns([
    pl.lit(1).alias("mi_numero"),
    pl.col("POBTOT")/1000,
    pl.col("NOM_ENT").str.to_uppercase()]
).select(["mi_numero","NOM_ENT","POBTOT"]).head()

mi_numero,NOM_ENT,POBTOT
i32,str,f64
1,"""DISTRITO FEDER...",8851.08
1,"""DISTRITO FEDER...",414.711
1,"""DISTRITO FEDER...",414.711
1,"""DISTRITO FEDER...",3.424
1,"""DISTRITO FEDER...",0.202


In [18]:
polars_df.with_columns(
    ["POBTOT"]/1000
).select(["POBTOT"]).head()

TypeError: unsupported operand type(s) for /: 'list' and 'int'

## `Lazy mode`
El modo default de Polars es `Eager`, es decir trata de ejecutar el código inmediatanmente.<br>
Si queremos aprovechar Polars a su máximo potencial ⚡ , debemos usar `Lazy Evaluation`<br>
`scan_csv` usa Lazy Evaluation y en conjunto con `collect` hacemos el mejor uso del API de Polars 

In [19]:
with open('../data/inegi.csv', 'r',encoding='latin1') as fh:
    inegi_df = pl.read_csv(fh.read().encode('utf8'))
    inegi_df.write_parquet(
        "../output/inegi.parquet",
        compression="zstd",
        row_group_size=100_000) 

Usando `lazy evaluation`

In [20]:
inegi_df = pl.scan_csv('../data/inegi.csv',encoding='utf8-lossy').collect()
inegi_df.head()

id,clee,nom_estab,raz_social,codigo_act,nombre_act,per_ocu,tipo_vial,nom_vial,tipo_v_e_1,nom_v_e_1,tipo_v_e_2,nom_v_e_2,tipo_v_e_3,nom_v_e_3,numero_ext,letra_ext,edificio,edificio_e,numero_int,letra_int,tipo_asent,nomb_asent,tipoCenCom,nom_CenCom,num_local,cod_postal,cve_ent,entidad,cve_mun,municipio,cve_loc,localidad,ageb,manzana,telefono,correoelec,www,tipoUniEco,latitud,longitud,fecha_alta
i64,str,str,str,i64,str,str,str,str,str,str,str,str,str,str,i64,str,str,str,i64,str,str,str,str,str,str,i64,i64,str,i64,str,i64,str,str,i64,i64,str,str,str,f64,f64,str
6174829,"""20169114119001...","""PESCA DE CRUST...",,114119,"""Pesca y captur...","""0 a 5 personas...","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""",0.0,,,,0.0,,"""LOCALIDAD""","""BUENOS AIRES""",,,,68407,20,"""Oaxaca""",169,"""San Jos� Indep...",2,"""Buenos Aires""","""0030""",5,,,,"""Fijo""",18.255652,-96.614697,"""2010-07"""
6174806,"""20406114119000...","""PESCA DE CRUST...",,114119,"""Pesca y captur...","""0 a 5 personas...","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""",,"""SN""",,,,,"""LOCALIDAD""","""LOMA ALTA""",,,,68511,20,"""Oaxaca""",406,"""Santa Mar�a Ch...",74,"""Loma Alta""","""0022""",800,,,,"""Fijo""",18.313758,-96.647255,"""2010-07"""
6174723,"""20406114119000...","""PESCA DE CRUST...",,114119,"""Pesca y captur...","""0 a 5 personas...","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""",,"""SN""",,,,,"""LOCALIDAD""","""LOMA ALTA""",,,,68511,20,"""Oaxaca""",406,"""Santa Mar�a Ch...",74,"""Loma Alta""","""0022""",800,,,,"""Fijo""",18.313758,-96.647255,"""2010-07"""
6174722,"""20309114119000...","""PESCA DE CRUST...",,114119,"""Pesca y captur...","""0 a 5 personas...","""OTRO (ESPECIFI...","""NINGUNO""","""OTRO (ESPECIFI...","""NINGUNO""","""OTRO (ESPECIFI...","""NINGUNO""","""OTRO (ESPECIFI...","""NINGUNO""",0.0,,,,0.0,,"""COLONIA""","""BUENA VISTA""",,,,68450,20,"""Oaxaca""",309,"""San Pedro Ixca...",16,"""Colonia Buena ...","""0014""",800,,,,"""Fijo""",18.136119,-96.520094,"""2010-07"""
6174801,"""20406114119000...","""PESCA DE CRUST...",,114119,"""Pesca y captur...","""0 a 5 personas...","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""",0.0,"""SN""",,,,,"""LOCALIDAD""","""LOMA ALTA""",,,,68511,20,"""Oaxaca""",406,"""Santa Mar�a Ch...",74,"""Loma Alta""","""0022""",800,,,,"""Fijo""",18.313758,-96.647255,"""2010-07"""


Polars usa la estructura de datos `lazyFrame` cuando está en modo lazy

In [21]:
inegi_df = pl.scan_parquet("../output/inegi.parquet")
inegi_df

In [22]:
inegi_df.describe_optimized_plan()

'  PARQUET SCAN ../output/inegi.parquet\n  PROJECT */42 COLUMNS\n'

`fetch` Nos permite extraer datos de manera eficiente sin tener que llevar todos los datos a memoria

In [23]:
inegi_df.fetch(10)

id,clee,nom_estab,raz_social,codigo_act,nombre_act,per_ocu,tipo_vial,nom_vial,tipo_v_e_1,nom_v_e_1,tipo_v_e_2,nom_v_e_2,tipo_v_e_3,nom_v_e_3,numero_ext,letra_ext,edificio,edificio_e,numero_int,letra_int,tipo_asent,nomb_asent,tipoCenCom,nom_CenCom,num_local,cod_postal,cve_ent,entidad,cve_mun,municipio,cve_loc,localidad,ageb,manzana,telefono,correoelec,www,tipoUniEco,latitud,longitud,fecha_alta
i64,str,str,str,i64,str,str,str,str,str,str,str,str,str,str,i64,str,str,str,i64,str,str,str,str,str,str,i64,i64,str,i64,str,i64,str,str,i64,i64,str,str,str,f64,f64,str
6174829,"""20169114119001...","""PESCA DE CRUST...",,114119,"""Pesca y captur...","""0 a 5 personas...","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""",0.0,,,,0.0,,"""LOCALIDAD""","""BUENOS AIRES""",,,,68407,20,"""Oaxaca""",169,"""San José Indep...",2,"""Buenos Aires""","""0030""",5,,,,"""Fijo""",18.255652,-96.614697,"""2010-07"""
6174806,"""20406114119000...","""PESCA DE CRUST...",,114119,"""Pesca y captur...","""0 a 5 personas...","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""",,"""SN""",,,,,"""LOCALIDAD""","""LOMA ALTA""",,,,68511,20,"""Oaxaca""",406,"""Santa María Ch...",74,"""Loma Alta""","""0022""",800,,,,"""Fijo""",18.313758,-96.647255,"""2010-07"""
6174723,"""20406114119000...","""PESCA DE CRUST...",,114119,"""Pesca y captur...","""0 a 5 personas...","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""",,"""SN""",,,,,"""LOCALIDAD""","""LOMA ALTA""",,,,68511,20,"""Oaxaca""",406,"""Santa María Ch...",74,"""Loma Alta""","""0022""",800,,,,"""Fijo""",18.313758,-96.647255,"""2010-07"""
6174722,"""20309114119000...","""PESCA DE CRUST...",,114119,"""Pesca y captur...","""0 a 5 personas...","""OTRO (ESPECIFI...","""NINGUNO""","""OTRO (ESPECIFI...","""NINGUNO""","""OTRO (ESPECIFI...","""NINGUNO""","""OTRO (ESPECIFI...","""NINGUNO""",0.0,,,,0.0,,"""COLONIA""","""BUENA VISTA""",,,,68450,20,"""Oaxaca""",309,"""San Pedro Ixca...",16,"""Colonia Buena ...","""0014""",800,,,,"""Fijo""",18.136119,-96.520094,"""2010-07"""
6174801,"""20406114119000...","""PESCA DE CRUST...",,114119,"""Pesca y captur...","""0 a 5 personas...","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""",0.0,"""SN""",,,,,"""LOCALIDAD""","""LOMA ALTA""",,,,68511,20,"""Oaxaca""",406,"""Santa María Ch...",74,"""Loma Alta""","""0022""",800,,,,"""Fijo""",18.313758,-96.647255,"""2010-07"""
6176111,"""20169114119000...","""PESCA DE CRUST...",,114119,"""Pesca y captur...","""0 a 5 personas...","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""",0.0,,,,,,"""LOCALIDAD""","""CERRO LAGUNA""",,,,68407,20,"""Oaxaca""",169,"""San José Indep...",5,"""El Tepeyac""","""0030""",800,,,,"""Fijo""",18.229444,-96.623056,"""2010-07"""
6176109,"""20169114119000...","""PESCA DE CRUST...",,114119,"""Pesca y captur...","""0 a 5 personas...","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""",0.0,,,,0.0,,"""LOCALIDAD""","""RIO LODO""",,,,68407,20,"""Oaxaca""",406,"""Santa María Ch...",60,"""Río Lodo""","""0022""",800,,,,"""Fijo""",18.302727,-96.678663,"""2010-07"""
6176116,"""20406114119000...","""PESCA DE CRUST...",,114119,"""Pesca y captur...","""0 a 5 personas...","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""",,"""SN""",,,,,"""LOCALIDAD""","""LOMA ALTA""",,,,68511,20,"""Oaxaca""",406,"""Santa María Ch...",74,"""Loma Alta""","""0022""",800,,,,"""Fijo""",18.313758,-96.647255,"""2010-07"""
6176114,"""20169114119000...","""PESCA DE CRUST...",,114119,"""Pesca y captur...","""0 a 5 personas...","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""",0.0,,,,,,"""LOCALIDAD""","""CERRO LAGUNA""",,,,68407,20,"""Oaxaca""",169,"""San José Indep...",11,"""Cerro Laguna""","""0030""",800,,,,"""Fijo""",18.258726,-96.660089,"""2010-07"""
6176135,"""20169114119000...","""PESCA DE CRUST...",,114119,"""Pesca y captur...","""0 a 5 personas...","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""",0.0,,,,,,"""LOCALIDAD""","""CERRO CLARIN""",,,,68407,20,"""Oaxaca""",169,"""San José Indep...",3,"""Cerro Clarín""","""0030""",800,,,,"""Fijo""",18.28,-96.659,"""2010-07"""


Mediante `collect` accedemos a todos los datos

In [24]:
inegi_df.collect().head()

id,clee,nom_estab,raz_social,codigo_act,nombre_act,per_ocu,tipo_vial,nom_vial,tipo_v_e_1,nom_v_e_1,tipo_v_e_2,nom_v_e_2,tipo_v_e_3,nom_v_e_3,numero_ext,letra_ext,edificio,edificio_e,numero_int,letra_int,tipo_asent,nomb_asent,tipoCenCom,nom_CenCom,num_local,cod_postal,cve_ent,entidad,cve_mun,municipio,cve_loc,localidad,ageb,manzana,telefono,correoelec,www,tipoUniEco,latitud,longitud,fecha_alta
i64,str,str,str,i64,str,str,str,str,str,str,str,str,str,str,i64,str,str,str,i64,str,str,str,str,str,str,i64,i64,str,i64,str,i64,str,str,i64,i64,str,str,str,f64,f64,str
6174829,"""20169114119001...","""PESCA DE CRUST...",,114119,"""Pesca y captur...","""0 a 5 personas...","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""",0.0,,,,0.0,,"""LOCALIDAD""","""BUENOS AIRES""",,,,68407,20,"""Oaxaca""",169,"""San José Indep...",2,"""Buenos Aires""","""0030""",5,,,,"""Fijo""",18.255652,-96.614697,"""2010-07"""
6174806,"""20406114119000...","""PESCA DE CRUST...",,114119,"""Pesca y captur...","""0 a 5 personas...","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""",,"""SN""",,,,,"""LOCALIDAD""","""LOMA ALTA""",,,,68511,20,"""Oaxaca""",406,"""Santa María Ch...",74,"""Loma Alta""","""0022""",800,,,,"""Fijo""",18.313758,-96.647255,"""2010-07"""
6174723,"""20406114119000...","""PESCA DE CRUST...",,114119,"""Pesca y captur...","""0 a 5 personas...","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""",,"""SN""",,,,,"""LOCALIDAD""","""LOMA ALTA""",,,,68511,20,"""Oaxaca""",406,"""Santa María Ch...",74,"""Loma Alta""","""0022""",800,,,,"""Fijo""",18.313758,-96.647255,"""2010-07"""
6174722,"""20309114119000...","""PESCA DE CRUST...",,114119,"""Pesca y captur...","""0 a 5 personas...","""OTRO (ESPECIFI...","""NINGUNO""","""OTRO (ESPECIFI...","""NINGUNO""","""OTRO (ESPECIFI...","""NINGUNO""","""OTRO (ESPECIFI...","""NINGUNO""",0.0,,,,0.0,,"""COLONIA""","""BUENA VISTA""",,,,68450,20,"""Oaxaca""",309,"""San Pedro Ixca...",16,"""Colonia Buena ...","""0014""",800,,,,"""Fijo""",18.136119,-96.520094,"""2010-07"""
6174801,"""20406114119000...","""PESCA DE CRUST...",,114119,"""Pesca y captur...","""0 a 5 personas...","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""","""CALLE""","""NINGUNO""",0.0,"""SN""",,,,,"""LOCALIDAD""","""LOMA ALTA""",,,,68511,20,"""Oaxaca""",406,"""Santa María Ch...",74,"""Loma Alta""","""0022""",800,,,,"""Fijo""",18.313758,-96.647255,"""2010-07"""


In [25]:
inegi_df.head()

In [26]:
inegi_df.describe_optimized_plan()

'  PARQUET SCAN ../output/inegi.parquet\n  PROJECT */42 COLUMNS\n'

In [27]:
polars_df = pl.scan_parquet("../output/inegi.parquet")
polars_df

## Ejercicios

1. Del dataset de censo:<br>
    1. Obtén sólo las columnas de tipo `String`
    2. Filtra el dataframe para obtener las filas con población total mayor a 500 habitantes
    3. Crea una nueva columna que se llame ANIO_CENSO y que tenga un valor constante de 2019 (numérico)
    4. Lee el CSV en modo lazy y realiza los dos pasos anteriores ( 2 y 3)<br><br>

2. Del dataset de inegi:<br>
    1. Lee el CSV en modo lazy y selecciona únicamente las primeras dos columnas
    2. Obtén el plan optimizado
    3. Ahora agrega un filtro por cve_ent con valor de 20. Obtén nuevamente el plan optimizado.