# Pandas - Sorting

[Reference](https://www.youtube.com/watch?v=T11QYVfZoD0&ab_channel=CoreySchafer)

In [1]:
import pandas as pd

In [2]:
# 使用Gemini 2.5 Pro生成
DUMMY_DATA = [
    ['Jennifer', 'Miller', 'jennifer.miller@gmail.com'],
    ['Peter', 'Wilson', 'peter.wilson@yahoo.com'],
    ['Linda', 'Johnson', 'linda.johnson@icloud.com'],
    ['Robert', 'Chen', 'robert.chen@proton.me'],
    ['Robert', 'Smith', 'robert.smith@outlook.com'],
    ['William', 'Taylor', 'william.taylor@yahoo.com'],
    ['Jennifer', 'Chen', 'jennifer.chen@gmail.com'],
    ['Peter', 'Wilson', 'peter.wilson@yahoo.com'],
    ['Susan', 'Brown', 'susan.brown@proton.me'],
    ['Robert', 'Jones', 'robert.jones@outlook.com']
]
DUMMY_COLUMNS = ["First Name", "Last Name", "Email"]

df = pd.DataFrame(DUMMY_DATA, columns=DUMMY_COLUMNS)
df.head()

Unnamed: 0,First Name,Last Name,Email
0,Jennifer,Miller,jennifer.miller@gmail.com
1,Peter,Wilson,peter.wilson@yahoo.com
2,Linda,Johnson,linda.johnson@icloud.com
3,Robert,Chen,robert.chen@proton.me
4,Robert,Smith,robert.smith@outlook.com


In [3]:
# 按照某一列排序(給定List則多層排序)
df.sort_values(
    by='Last Name',
    # inplace=True,
    # ascending=False  # Default:True
).head()

Unnamed: 0,First Name,Last Name,Email
8,Susan,Brown,susan.brown@proton.me
3,Robert,Chen,robert.chen@proton.me
6,Jennifer,Chen,jennifer.chen@gmail.com
2,Linda,Johnson,linda.johnson@icloud.com
9,Robert,Jones,robert.jones@outlook.com


In [4]:
# Last Name(ASC) -> First Name(DESC)
df.sort_values(
    by=['Last Name', 'First Name'],
    ascending=[True, False],
    inplace=True
)
df.head()

Unnamed: 0,First Name,Last Name,Email
8,Susan,Brown,susan.brown@proton.me
3,Robert,Chen,robert.chen@proton.me
6,Jennifer,Chen,jennifer.chen@gmail.com
2,Linda,Johnson,linda.johnson@icloud.com
9,Robert,Jones,robert.jones@outlook.com


In [5]:
# sort by Index
df.sort_index().head()

Unnamed: 0,First Name,Last Name,Email
0,Jennifer,Miller,jennifer.miller@gmail.com
1,Peter,Wilson,peter.wilson@yahoo.com
2,Linda,Johnson,linda.johnson@icloud.com
3,Robert,Chen,robert.chen@proton.me
4,Robert,Smith,robert.smith@outlook.com


In [6]:
# sort 單一column
df['Last Name'].sort_values().head()

8      Brown
3       Chen
6       Chen
2    Johnson
9      Jones
Name: Last Name, dtype: object

使用其他Dataset

In [7]:
df = pd.read_csv(
    '../dataset/president_heights.csv'
)
df.head()

Unnamed: 0,order,name,height(cm)
0,1,George Washington,189
1,2,John Adams,170
2,3,Thomas Jefferson,189
3,4,James Madison,163
4,5,James Monroe,183


In [8]:
df.sort_values(
    by=['height(cm)'],
    ascending=True,
    inplace=True
)
df

Unnamed: 0,order,name,height(cm)
3,4,James Madison,163
7,8,Martin Van Buren,168
21,23,Benjamin Harrison,168
1,2,John Adams,170
22,25,William McKinley,170
5,6,John Quincy Adams,171
8,9,William Henry Harrison,173
10,11,James K. Polk,173
11,12,Zachary Taylor,173
17,18,Ulysses S. Grant,173


In [9]:
# grab 身高前十高的資料
df.nlargest(10, 'height(cm)')

Unnamed: 0,order,name,height(cm)
15,16,Abraham Lincoln,193
33,36,Lyndon B. Johnson,193
2,3,Thomas Jefferson,189
0,1,George Washington,189
29,32,Franklin D. Roosevelt,188
38,41,George H. W. Bush,188
39,42,Bill Clinton,188
37,40,Ronald Reagan,185
41,44,Barack Obama,185
6,7,Andrew Jackson,185
