# [Python Polars: A Lightning-Fast DataFrame Library](https://realpython.com/polars-python/)

## [DataFrames, Expressions, and Contexts](https://realpython.com/polars-python/#dataframes-expressions-and-contexts)

In [1]:
import numpy as np
import polars as pl

num_rows = 5000
rng = np.random.default_rng(seed=7)

buildings_data = {
     "sqft": rng.exponential(scale=1000, size=num_rows),
     "year": rng.integers(low=1995, high=2023, size=num_rows),
     "building_type": rng.choice(["A", "B", "C"], size=num_rows),
 }
buildings = pl.DataFrame(buildings_data)
buildings

sqft,year,building_type
f64,i64,str
707.529256,1996,"""C"""
1025.203348,2020,"""C"""
568.548657,2012,"""A"""
895.109864,2000,"""A"""
206.532754,2011,"""A"""
3383.637351,2018,"""B"""
9.753627,2007,"""A"""
2809.215763,2004,"""B"""
575.332756,2021,"""A"""
300.534013,2003,"""A"""


In [2]:
buildings.schema

{'sqft': Float64, 'year': Int64, 'building_type': Utf8}

In [3]:
buildings.head()

sqft,year,building_type
f64,i64,str
707.529256,1996,"""C"""
1025.203348,2020,"""C"""
568.548657,2012,"""A"""
895.109864,2000,"""A"""
206.532754,2011,"""A"""


In [4]:
buildings.describe()

describe,sqft,year,building_type
str,f64,f64,str
"""count""",5000.0,5000.0,"""5000"""
"""null_count""",0.0,0.0,"""0"""
"""mean""",994.094456,2008.5258,
"""std""",1016.641569,8.062353,
"""min""",1.133256,1995.0,"""A"""
"""max""",9307.793917,2022.0,"""C"""
"""median""",669.370932,2009.0,


In [5]:
buildings.select("sqft")

sqft
f64
707.529256
1025.203348
568.548657
895.109864
206.532754
3383.637351
9.753627
2809.215763
575.332756
300.534013


In [6]:
buildings.select(pl.col("sqft"))

sqft
f64
707.529256
1025.203348
568.548657
895.109864
206.532754
3383.637351
9.753627
2809.215763
575.332756
300.534013


In [7]:
buildings.select(pl.col("sqft").sort() / 1000)

sqft
f64
0.001133
0.001152
0.001429
0.001439
0.001505
0.001597
0.001747
0.00195
0.002005
0.002024


In [9]:
after_2015 = buildings.filter(pl.col("year") > 2015)
after_2015.shape

(1230, 3)

In [10]:
after_2015.select(pl.col("year").min())

year
i64
2016


In [12]:
buildings.groupby("building_type").agg(
    [
        pl.mean("sqft").alias("mean_sqft"),
        pl.max("sqft").alias("max_sqft"),
        pl.median("year").alias("median_year"),
        pl.count(),
    ]
)

building_type,mean_sqft,max_sqft,median_year,count
str,f64,f64,f64,u32
"""C""",999.854722,9307.793917,2009.0,1692
"""B""",992.754444,8313.942444,2009.0,1655
"""A""",989.539918,7247.538867,2009.0,1653


## [The Lazy API](https://realpython.com/polars-python/#the-lazy-api)

In [19]:
!pip install polars -U



In [20]:
num_rows = 5000
rng = np.random.default_rng(seed=7)

buildings = {
     "sqft": rng.exponential(scale=1000, size=num_rows),
     "price": rng.exponential(scale=100_000, size=num_rows),
     "year": rng.integers(low=1995, high=2023, size=num_rows),
     "building_type": rng.choice(["A", "B", "C"], size=num_rows),
  }
buildings_lazy = pl.LazyFrame(buildings)
buildings_lazy

TypeError: LazyFrame() takes no arguments

In [None]:
data = {"a": [1, 2], "b": [3, 4]}
lf = pl.LazyFrame(data=data)
lf.collect()

TypeError: LazyFrame() takes no arguments