# Chapter 15: Reshaping

In [1]:
import polars as pl
pl.show_versions()  # The book is built with Polars version 1.0.0

## Wide Versus Long DataFrames

In [3]:
import polars as pl

df = pl.DataFrame({
    "student": ["Alice", "Bob", "Charlie"],
    "math": [85, 78, 92],
    "science": [90, 82, 85],
    "history": [88, 80, 87]
})
df

In [4]:
df = pl.DataFrame({
    "student": ["Alice", "Alice", "Alice", "Bob", "Bob", "Bob", "Charlie",
        "Charlie", "Charlie"],
    "subject": ["Math", "Science", "History", "Math", "Science", "History",
        "Math", "Science", "History"],
    "grade": [85, 90, 88, 78, 82, 80, 92, 85, 87]
})
df

## Pivot to Wider DataFrame

In [6]:
import polars as pl

df = pl.DataFrame({
    "student": ["Alice", "Alice", "Alice", "Bob", "Bob", "Bob", "Charlie",
        "Charlie", "Charlie"],
    "subject": ["Math", "Science", "History", "Math", "Science", "History",
        "Math", "Science", "History"],
    "grade": [85, 90, 88, 78, 82, 80, 92, 85, 87]
})

df

In [7]:
df.pivot(index="student", columns="subject", values="grade")

In [8]:
df = pl.DataFrame({
    "student": ["Alice", "Alice", "Alice", "Alice", "Alice", "Alice",
                "Bob", "Bob", "Bob", "Bob", "Bob", "Bob"],
    "subject": ["Math", "Math", "Math", "Science", "Science", "Science",
                "Math", "Math", "Math", "Science", "Science", "Science"],
    "grade": [85, 88, 85, 60, 66, 63,
              51, 79, 62, 82, 85, 82]
})

df

In [9]:
df.pivot(
    index="student",
    columns="subject",
    values="grade",
    aggregate_function="mean"
)

In [10]:
df.pivot(
    index="student",
    columns="subject",
    values="grade",
    aggregate_function=pl.element().max() - pl.element().min()
)

## Melt to Longer DataFrame

In [12]:
df = pl.DataFrame({
    "student": ["Alice", "Bob", "Charlie"],
    "math": [85, 78, 92],
    "science": [90, 82, 85],
    "history": [88, 80, 87]
})
df

In [13]:
df.melt(
    id_vars=["student"],
    value_vars=["math", "science", "history"],
    variable_name="subject",
    value_name="grade"
)

In [14]:
df = pl.DataFrame({
    "student": ["Alice", "Bob", "Charlie", "Alice", "Bob", "Charlie"],
    "class": ["Math101", "Math101", "Math101", "Math102", "Math102", "Math102"],
    "age": [20, 21, 22, 20, 21, 22],
    "semester": ["Fall", "Fall", "Fall", "Spring", "Spring", "Spring"],
    "math": [85, 78, 92, 88, 79, 95],
    "science": [90, 82, 85, 92, 81, 87],
    "history": [88, 80, 87, 85, 82, 89]
})
df

In [15]:
df.melt(
    id_vars=["student", "class", "age", "semester"],
    value_vars=["math", "science", "history"],
    variable_name="subject",
    value_name="grade"
)

## Transposing

In [17]:
df = pl.DataFrame({
    "student": ["Alice", "Bob", "Charlie"],
    "math": [85, 78, 92],
    "science": [90, 82, 85],
    "history": [88, 80, 87]
})
df

In [18]:
df.transpose(
    include_header=True,
    header_name="original_headers",
    column_names=(f"report_{count}" for count in range(1, len(df.columns) + 1))
)

## Exploding

In [20]:
df = pl.DataFrame({
    "student": ["Alice", "Bob", "Charlie"],
    "math": [[85, 90, 88], [78, 82, 80], [92, 85, 87]]
})
df

In [21]:
df.explode("math")

In [22]:
df = pl.DataFrame({
    "student": ["Alice", "Bob", "Charlie"],
    "math": [[85, 90, 88], [78, 82, 80], [92, 85, 87]],
    "science": [[85, 90, 88], [78, 82], [92, 85, 87]],
    "history": [[85, 90, 88], [78, 82], [92, 85, 87]],
})
df

In [23]:
df.explode("math", "science", "history")

In [24]:
df = pl.DataFrame({
    "id": [1,2],
    "value1": [["a", "b"], ["c"]],
    "value2": [["a"], ["b"]],
})
df.explode("value1", "value2")

In [25]:
df = pl.DataFrame({
    "id": [1,2],
    "nested_value": [["a", "b"], [["c"], ["d", "e"]]],
}, strict=False)
df

In [26]:
df.explode("nested_value")

In [27]:
df.explode("nested_value").explode("nested_value")

## Partition into Multiple DataFrames

In [29]:
df = pl.DataFrame({
    "OrderID": [1, 2, 3, 4, 5, 6],
    "Product": ["A", "B", "A", "C", "B", "A"],
    "Quantity": [10, 5, 8, 7, 3, 12],
    "Region": ["North", "South", "North", "West", "South", "West"]
})

In [30]:
df.partition_by("Region")

In [31]:
df.partition_by("Region", include_key=False)

In [32]:
dfs = df.partition_by(["Region"], as_dict=True)
dfs

In [33]:
dfs[("North",)]

## Conclusion