# Sorting
## Basic sorting

In [5]:
import pandas as pd

data = {
    "order_id": [201, 202, 203, 204, 205],
    "customer": ["Alice", "Brian", "Carlos", "Diana", "Eva"],
    "order_amount": [250.0, 75.5, 120.0, 450.2, 89.9],
    "items_purchased": [5, 1, 2, 8, 2]
}

df = pd.DataFrame(data)

print(df)

   order_id customer  order_amount  items_purchased
0       201    Alice         250.0                5
1       202    Brian          75.5                1
2       203   Carlos         120.0                2
3       204    Diana         450.2                8
4       205      Eva          89.9                2


Sort by Order Amount (Ascending by Default)

In [6]:
df_sorted = df.sort_values("order_amount", ascending=False)
print(df_sorted)

   order_id customer  order_amount  items_purchased
3       204    Diana         450.2                8
0       201    Alice         250.0                5
2       203   Carlos         120.0                2
4       205      Eva          89.9                2
1       202    Brian          75.5                1


## Sorting by Multiple Columns

First sorting key dominates; Second key breaks ties:

- Sort first by items_purchased (descending),
- then by order_amount (ascending).

Observe that if you change `ascending` from `[False, False]` to `[False, True]`, the order of Eva and Carlos changes, because they have the same amount of items purchased, but order_amount is different.

In [9]:
df_sorted = df.sort_values(
    by=["items_purchased", "order_amount"],
    ascending=[False, True]
)

print(df_sorted)

   order_id customer  order_amount  items_purchased
3       204    Diana         450.2                8
0       201    Alice         250.0                5
4       205      Eva          89.9                2
2       203   Carlos         120.0                2
1       202    Brian          75.5                1


## Sorting by Index

In [10]:
df_sorted_index = df.sort_index()
print(df_sorted_index)

   order_id customer  order_amount  items_purchased
0       201    Alice         250.0                5
1       202    Brian          75.5                1
2       203   Carlos         120.0                2
3       204    Diana         450.2                8
4       205      Eva          89.9                2


In [11]:
df_sorted_index = df.sort_index(ascending=False)
print(df_sorted_index)

   order_id customer  order_amount  items_purchased
4       205      Eva          89.9                2
3       204    Diana         450.2                8
2       203   Carlos         120.0                2
1       202    Brian          75.5                1
0       201    Alice         250.0                5


## Sorting Categorical Data

In [12]:
movies = {
    "title": ["Movie A", "Movie B", "Movie C", "Movie D"],
    "rating_category": ["Good", "Excellent", "Average", "Good"]
}

df_movies = pd.DataFrame(movies)

print(df_movies)

     title rating_category
0  Movie A            Good
1  Movie B       Excellent
2  Movie C         Average
3  Movie D            Good


If we sort normally: this sorts alphabetically (Average → Excellent → Good)

In [14]:
print(df_movies.sort_values("rating_category"))

     title rating_category
2  Movie C         Average
1  Movie B       Excellent
0  Movie A            Good
3  Movie D            Good


### Custom Order Sorting (Very Important Concept)
- Categorical ordering matters
- Alphabetical ≠ logical order

In [15]:
category_order = ["Average", "Good", "Excellent"]

df_movies["rating_category"] = pd.Categorical(
    df_movies["rating_category"],
    categories=category_order,
    ordered=True
)

print(df_movies.sort_values("rating_category"))

     title rating_category
2  Movie C         Average
0  Movie A            Good
3  Movie D            Good
1  Movie B       Excellent
