# Chapter 3: Moving from Pandas to Polars

In [None]:
import polars as pl
pl.show_versions()  # The book is built with Polars version 1.13.1

## Animals

In [None]:
%cat data/animals.csv

## Similarities to Recognize

## Appearances to Appreciate

In [None]:
import pandas as pd
import polars as pl

### Differences in Code

In [None]:
animals_pd = pd.read_csv("data/animals.csv", sep=",", header=0)
animals_pl = pl.read_csv("data/animals.csv", separator=",", has_header=True)

print(f"{type(animals_pd) = }")
print(f"{type(animals_pl) = }")

### Differences in Display

In [None]:
animals_pd

In [None]:
animals_pl

In [None]:
animals_pd["animal"]

In [None]:
animals_pl.get_column("animal")

In [None]:
animals_pd = animals_pd.drop(columns=["habitat", "diet", "features"])
animals_pd

In [None]:
animals_pl = animals_pl.drop("habitat", "diet", "features")
animals_pl

## Concepts to Unlearn

### Index

In [None]:
animals_pd.index

In [None]:
animals_agg_pd = animals_pd.groupby(["class", "status"])[["weight"]].mean()
animals_agg_pd

In [None]:
animals_agg_pd.index

In [None]:
animals_agg_pd.reset_index()

In [None]:
animals_pl.group_by(["class", "status"]).agg(pl.col("weight").mean())

### Axes

In [None]:
# This raises a KeyError:
# animals_pd.drop("weight")

In [None]:
animals_pd.drop("weight", axis=1)

### Indexing and Slicing

In [None]:
animals_pd[["animal", "class"]]

In [None]:
animals_pd[animals_pd["status"] == "endangered"]

In [None]:
animals_pd[:3]

In [None]:
animals_pd["weight"] = animals_pd["weight"] * 1000

In [None]:
animals_pd["weight"]

In [None]:
animals_pd["weight"] = animals_pd["weight"].sort_values()

In [None]:
animals_pd["weight"]

### Eagerness

In [None]:
lazy_query = (
    pl.scan_csv("data/animals.csv")
    .group_by("class")
    .agg(pl.col("weight").mean())
    .filter(pl.col("class") == "mammal")
)

In [None]:
lazy_query.show_graph(optimized=False)

In [None]:
lazy_query.show_graph()

In [None]:
lazy_query.collect()

### Relaxedness

## Syntax to Forget

### Common Operations Side By Side

#### Removing Duplicate Values

In [None]:
animals_pd.drop_duplicates(subset="class")

In [None]:
animals_pl.unique(subset="class")

#### Removing Missing Values

In [None]:
animals_pd.dropna(subset="weight")

In [None]:
animals_pl.drop_nulls(subset="weight")

#### Sorting Rows

In [None]:
animals_pd.sort_values("weight", ascending=False)

In [None]:
animals_pl.sort("weight", descending=True)

#### Casting Existing Column

In [None]:
animals_pd.assign(lifespan=animals_pd["lifespan"].astype(float))

In [None]:
animals_pl.with_columns(pl.col("lifespan").cast(pl.Float64))

#### Aggregating Rows

In [None]:
animals_pd.groupby(["class", "status"])[["weight"]].mean()

In [None]:
animals_pl.group_by("class", "status").agg(pl.col("weight").mean())

## To and From Pandas

In [None]:
animals_pl = pl.DataFrame(animals_pd)

animals_pl

In [None]:
there_and_back_again_df = animals_pl.to_pandas()

there_and_back_again_df

## Takeaways