In [1]:
!pip install polars



**Importing Polars**

In [2]:
import polars as pl

**Creating DataFrames**

In [3]:
#Creating DataFrame
df = pl.DataFrame({'id':   [1, 2, 3, 4, 5],
                   'unom': [1001, 1002, 1003, 1004, 1005],
                   'house_street': ['Central','Central','Central','North', 'West'],
                   'nom': [10, 11, 12, 5, 7]})

**Meta Operations**

In [4]:
# Get column names
df.columns

['id', 'unom', 'house_street', 'nom']

In [5]:
# Get dtypes
df.dtypes

[Int64, Int64, String, Int64]

In [6]:
# Get shape
df.shape

(5, 4)

In [7]:
# Memory usage
df.estimated_size()

150

In [8]:
# Display DataFrame
print(df)

shape: (5, 4)
┌─────┬──────┬──────────────┬─────┐
│ id  ┆ unom ┆ house_street ┆ nom │
│ --- ┆ ---  ┆ ---          ┆ --- │
│ i64 ┆ i64  ┆ str          ┆ i64 │
╞═════╪══════╪══════════════╪═════╡
│ 1   ┆ 1001 ┆ Central      ┆ 10  │
│ 2   ┆ 1002 ┆ Central      ┆ 11  │
│ 3   ┆ 1003 ┆ Central      ┆ 12  │
│ 4   ┆ 1004 ┆ North        ┆ 5   │
│ 5   ┆ 1005 ┆ West         ┆ 7   │
└─────┴──────┴──────────────┴─────┘


In [9]:
# Get DataFrame info
df.schema

Schema([('id', Int64),
        ('unom', Int64),
        ('house_street', String),
        ('nom', Int64)])

In [10]:
# Select columns
df.select(['id', 'unom'])

id,unom
i64,i64
1,1001
2,1002
3,1003
4,1004
5,1005


In [11]:
# Filter rows
df.filter(pl.col('house_street') == 'Central')

id,unom,house_street,nom
i64,i64,str,i64
1,1001,"""Central""",10
2,1002,"""Central""",11
3,1003,"""Central""",12


In [12]:
# Sort DataFrame
df.sort('nom',descending=False)

id,unom,house_street,nom
i64,i64,str,i64
4,1004,"""North""",5
5,1005,"""West""",7
1,1001,"""Central""",10
2,1002,"""Central""",11
3,1003,"""Central""",12


In [13]:
# Add new column
df = df.with_columns(pl.lit(9).alias('district_id'))
df.head(3)

id,unom,house_street,nom,district_id
i64,i64,str,i64,i32
1,1001,"""Central""",10,9
2,1002,"""Central""",11,9
3,1003,"""Central""",12,9


In [14]:
# Rename columns
df = df.rename({'district_id': 'district'})
df.head(3)

id,unom,house_street,nom,district
i64,i64,str,i64,i32
1,1001,"""Central""",10,9
2,1002,"""Central""",11,9
3,1003,"""Central""",12,9


In [15]:
# Drop columns
df = df.drop(['district'])
df.head(3)

id,unom,house_street,nom
i64,i64,str,i64
1,1001,"""Central""",10
2,1002,"""Central""",11
3,1003,"""Central""",12


In [16]:
# Group by and aggregate
df.group_by('house_street').agg(pl.count('unom'))

house_street,unom
str,u32
"""Central""",3
"""West""",1
"""North""",1


**Data Manipulation**

In [17]:
# Apply function to column
df = df.with_columns(pl.col('nom').map_elements(lambda x: x + 1).alias('nom_correction'))
df.head()

Expr.map_elements is significantly slower than the native expressions API.
Only use if you absolutely CANNOT implement your logic otherwise.
Replace this expression...
  - pl.col("nom").map_elements(lambda x: ...)
with this one instead:
  + pl.col("nom") + 1

  df = df.with_columns(pl.col('nom').map_elements(lambda x: x + 1).alias('nom_correction'))
  df = df.with_columns(pl.col('nom').map_elements(lambda x: x + 1).alias('nom_correction'))


id,unom,house_street,nom,nom_correction
i64,i64,str,i64,i64
1,1001,"""Central""",10,11
2,1002,"""Central""",11,12
3,1003,"""Central""",12,13
4,1004,"""North""",5,6
5,1005,"""West""",7,8


In [18]:
# Custom function
def custom_func(x):
    return x + 2

# Apply custom function
df.with_columns(pl.col('nom').map_batches(custom_func).alias('nom_correction_2'))

id,unom,house_street,nom,nom_correction,nom_correction_2
i64,i64,str,i64,i64,i64
1,1001,"""Central""",10,11,12
2,1002,"""Central""",11,12,13
3,1003,"""Central""",12,13,14
4,1004,"""North""",5,6,7
5,1005,"""West""",7,8,9


In [19]:
# Replace values
df = df.with_columns(pl.col('nom_correction').replace({13: 15, 6: 7}))
df.head()

id,unom,house_street,nom,nom_correction
i64,i64,str,i64,i64
1,1001,"""Central""",10,11
2,1002,"""Central""",11,12
3,1003,"""Central""",12,15
4,1004,"""North""",5,7
5,1005,"""West""",7,8


**String Operations**

In [20]:
# Convert to uppercase
df.with_columns(pl.col('house_street').str.to_uppercase())

id,unom,house_street,nom,nom_correction
i64,i64,str,i64,i64
1,1001,"""CENTRAL""",10,11
2,1002,"""CENTRAL""",11,12
3,1003,"""CENTRAL""",12,15
4,1004,"""NORTH""",5,7
5,1005,"""WEST""",7,8


In [22]:
street_list = ['Central', 'North']
df.filter(pl.col('house_street').str.contains_any(street_list))

id,unom,house_street,nom,nom_correction
i64,i64,str,i64,i64
1,1001,"""Central""",10,11
2,1002,"""Central""",11,12
3,1003,"""Central""",12,15
4,1004,"""North""",5,7


In [23]:
# String replace
df.with_columns(pl.col('house_street').str.replace('North', 'South'))

id,unom,house_street,nom,nom_correction
i64,i64,str,i64,i64
1,1001,"""Central""",10,11
2,1002,"""Central""",11,12
3,1003,"""Central""",12,15
4,1004,"""South""",5,7
5,1005,"""West""",7,8


In [24]:
# String length
df.with_columns(pl.col('house_street').str.len_chars().alias('name_street_length'))

id,unom,house_street,nom,nom_correction,name_street_length
i64,i64,str,i64,i64,u32
1,1001,"""Central""",10,11,7
2,1002,"""Central""",11,12,7
3,1003,"""Central""",12,15,7
4,1004,"""North""",5,7,5
5,1005,"""West""",7,8,4


**Window Functions**

In [25]:
# Cumulative sum
df.with_columns(pl.col('id').cum_sum().over('house_street').alias('id_cum'))

id,unom,house_street,nom,nom_correction,id_cum
i64,i64,str,i64,i64,i64
1,1001,"""Central""",10,11,1
2,1002,"""Central""",11,12,3
3,1003,"""Central""",12,15,6
4,1004,"""North""",5,7,4
5,1005,"""West""",7,8,5


**Advanced Filtering**

In [26]:
# Multiple conditions
df.filter((pl.col('id') > 2) & (pl.col('nom') < 7))

id,unom,house_street,nom,nom_correction
i64,i64,str,i64,i64
4,1004,"""North""",5,7


In [27]:
# Is in list
df.filter(pl.col('unom').is_in([1001, 1002, 1003]))

id,unom,house_street,nom,nom_correction
i64,i64,str,i64,i64
1,1001,"""Central""",10,11
2,1002,"""Central""",11,12
3,1003,"""Central""",12,15


**Advanced Aggregations**

In [28]:
# Multiple aggregations
df.group_by('house_street').agg([
    pl.sum('id').alias('id_sum'),
    pl.mean('unom').alias('unom_mean'),
    pl.n_unique('nom').alias('nom_unique_count')
])

house_street,id_sum,unom_mean,nom_unique_count
str,i64,f64,u32
"""North""",4,1004.0,1
"""West""",5,1005.0,1
"""Central""",6,1002.0,3


**Reshaping Data**

In [29]:
# Concatenate string columns
df.with_columns(pl.concat_str(['id', 'unom'], separator='-').alias('new_code'))

id,unom,house_street,nom,nom_correction,new_code
i64,i64,str,i64,i64,str
1,1001,"""Central""",10,11,"""1-1001"""
2,1002,"""Central""",11,12,"""2-1002"""
3,1003,"""Central""",12,15,"""3-1003"""
4,1004,"""North""",5,7,"""4-1004"""
5,1005,"""West""",7,8,"""5-1005"""


**Conditional Expressions**

In [30]:
# When-Then-Otherwise
df.with_columns(
    pl.when(pl.col('id') > 4)
    .then(pl.lit('yes'))
    .when(pl.col('id') < 2)
    .then(pl.lit('no'))
    .otherwise(pl.lit('-'))
    .alias('house_category')
)

id,unom,house_street,nom,nom_correction,house_category
i64,i64,str,i64,i64,str
1,1001,"""Central""",10,11,"""no"""
2,1002,"""Central""",11,12,"""-"""
3,1003,"""Central""",12,15,"""-"""
4,1004,"""North""",5,7,"""-"""
5,1005,"""West""",7,8,"""yes"""
