# Chapter 3: Moving from Pandas to Polars

In [1]:
import polars as pl
pl.show_versions()  # The book is built with Polars version 1.0.0

## Animals

In [3]:
%cat data/animals.csv

## Similarities to Recognize

## Appearances to Appreciate

In [6]:
import pandas as pd
import polars as pl

### Differences in Code

In [8]:
animals_pd = pd.read_csv("data/animals.csv", sep=",", header=0)
animals_pl = pl.read_csv("data/animals.csv", separator=",", has_header=True)

print(f"{type(animals_pd) = }")
print(f"{type(animals_pl) = }")

### Differences in Display

In [10]:
animals_pd

In [11]:
animals_pl

In [12]:
animals_pd["animal"]

In [13]:
animals_pl.get_column("animal")

In [14]:
animals_pd = animals_pd.drop(columns=["habitat", "diet", "features"])
animals_pd

In [15]:
animals_pl = animals_pl.drop("habitat", "diet", "features")
animals_pl

## Concepts to Unlearn

### Index

In [18]:
animals_pd.index

In [19]:
animals_agg_pd = animals_pd.groupby(["class", "status"])[["weight"]].mean()
animals_agg_pd

In [20]:
animals_agg_pd.index

In [21]:
animals_agg_pd.reset_index()

In [22]:
animals_pl.group_by(["class", "status"]).agg(pl.col("weight").mean())

### Axes

In [24]:
animals_pd.drop("weight")

In [25]:
animals_pd.drop("weight", axis=1)

### Indexing and Slicing

In [27]:
animals_pd[["animal", "class"]]

In [28]:
animals_pd[animals_pd["status"] == "endangered"]

In [29]:
animals_pd[:3]

In [30]:
animals_pd["weight"] = animals_pd["weight"] * 1000

In [31]:
print(trips[:,:4])
print(trips[:,4:7])
print(trips[:,7:11])
print(trips[:,11:])

### Eagerness

In [33]:
lazy_query = (
    pl.scan_csv("data/animals.csv")
    .group_by("class")
    .agg(pl.col("weight").mean())
    .filter(pl.col("class") == "mammal")
)

In [34]:
lazy_query.show_graph(optimized=False)

In [35]:
lazy_query.show_graph()

In [36]:
lazy_query.collect()

### Relaxedness

## Syntax to Forget

### Common Operations Side By Side

#### Removing Duplicate Values

In [41]:
animals_pd.drop_duplicates(subset="class")

In [42]:
animals_pl.unique(subset="class")

#### Removing Missing Values

In [44]:
animals_pd.dropna(subset="weight")

In [45]:
animals_pl.drop_nulls(subset="weight")

#### Sorting Rows

In [47]:
animals_pd.sort_values("weight", ascending=False)

In [48]:
animals_pl.sort("weight", descending=True)

#### Casting Existing Column

In [50]:
animals_pd.assign(lifespan=animals_pd["lifespan"].astype(float))

In [51]:
animals_pl.with_columns(pl.col("lifespan").cast(pl.Float64))

#### Aggregating Rows

In [53]:
animals_pd.groupby(["class", "status"])[["weight"]].mean()

In [54]:
animals_pl.group_by(["class", "status"]).agg(pl.col("weight").mean())

## Takeaways