# pandas.DataFrame.sort_values
https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.sort_values.html

In [1]:
import numpy as np
import pandas as pd

### 1-1. Example 1: Sort

In [2]:
df = pd.DataFrame({
    'col1': ['A', 'A', 'B', np.nan, 'D', 'C'],
    'col2': [2, 1, 9, 8, 7, 4],
    'col3': [0, 1, 9, 4, 2, 3],
    'col4': ['a', 'B', 'c', 'D', 'e', 'F']
})
df

Unnamed: 0,col1,col2,col3,col4
0,A,2,0,a
1,A,1,1,B
2,B,9,9,c
3,,8,4,D
4,D,7,2,e
5,C,4,3,F


In [3]:
df.sort_values(by=['col1'])

Unnamed: 0,col1,col2,col3,col4
0,A,2,0,a
1,A,1,1,B
2,B,9,9,c
5,C,4,3,F
4,D,7,2,e
3,,8,4,D


In [4]:
df.sort_values(by=['col1', 'col2'])

Unnamed: 0,col1,col2,col3,col4
1,A,1,1,B
0,A,2,0,a
2,B,9,9,c
5,C,4,3,F
4,D,7,2,e
3,,8,4,D


### 1-2. Example 2: Sort Descending

In [5]:
df.sort_values(by='col1', ascending=False)

Unnamed: 0,col1,col2,col3,col4
4,D,7,2,e
5,C,4,3,F
2,B,9,9,c
0,A,2,0,a
1,A,1,1,B
3,,8,4,D


### 1-3. Example 3: Putting NAs first

In [6]:
df.sort_values(by='col1', ascending=False, na_position='first')

Unnamed: 0,col1,col2,col3,col4
3,,8,4,D
4,D,7,2,e
5,C,4,3,F
2,B,9,9,c
0,A,2,0,a
1,A,1,1,B


### 1-4. Example 4: Sorting with a key function

In [7]:
df.sort_values(by='col4', key=lambda col: col.str.lower())

Unnamed: 0,col1,col2,col3,col4
0,A,2,0,a
1,A,1,1,B
2,B,9,9,c
3,,8,4,D
4,D,7,2,e
5,C,4,3,F


#### Note: I do NOT understand!!

### 1-5. Example 5:

In [8]:
df = pd.DataFrame({
   "time": ['0hr', '128hr', '72hr', '48hr', '96hr'],
   "value": [10, 20, 30, 40, 50]
})
df

Unnamed: 0,time,value
0,0hr,10
1,128hr,20
2,72hr,30
3,48hr,40
4,96hr,50


In [9]:
from natsort import index_natsorted
df.sort_values(
   by="time",
   key=lambda x: np.argsort(index_natsorted(df["time"]))
)

ModuleNotFoundError: No module named 'natsort'

## 2. 找出 Pandas 數組行中的第二大元素排序
https://blog.csdn.net/chuan403082010/article/details/79439530

In [10]:
import numpy as np
import pandas as pd

In [11]:
df = pd.DataFrame({'a': [4, 5, 3, 1, 2],
                   'b': [20, 10, 40, 50, 30],
                   'c': [25, 20, 5, 15, 10]})
df

Unnamed: 0,a,b,c
0,4,20,25
1,5,10,20
2,3,40,5
3,1,50,15
4,2,30,10


In [12]:
def sort_value(column):
    new_column = column.sort_values(ascending=False)
    return new_column.iloc[1]

def second_largest(df):
    return df.apply(sort_value)

In [13]:
second_largest(df)

a     4
b    40
c    20
dtype: int64

## 3. 非常全面的 Pandas 入門教程
https://www.mdeditor.tw/pl/2tRj/zh-tw

In [14]:
df = pd.DataFrame(np.random.randint(1, 15, 15).reshape(5,-1), columns=list('abc'))
df

Unnamed: 0,a,b,c
0,1,11,6
1,13,2,6
2,10,3,6
3,12,6,2
4,2,12,2


In [15]:
n = 5
df['a'].argsort()[::-1].iloc[:3]

4    1
3    3
2    2
Name: a, dtype: int64

In [16]:
df = pd.DataFrame(np.random.randint(1,100, 9).reshape(3, -1))
df

Unnamed: 0,0,1,2
0,47,47,72
1,82,78,16
2,38,47,97


In [17]:
df.apply(lambda x: x.sort_values().unique()[-2], axis=1)

0    47
1    78
2    47
dtype: int64

In [18]:
df.apply(lambda x: x.sort_values().unique()[-2], axis=0)

0    47
1    47
2    72
dtype: int64

## 4. Test by Myself

In [19]:
import numpy as np
import pandas as pd

In [20]:
df = pd.DataFrame({'a': [4, 5, 3, 1, 2],
                   'b': [20, 10, 40, 50, 30],
                   'c': [25, 20, 5, 15, 10],
                   'd': [3, 6, 6, 2, 2]})
df

Unnamed: 0,a,b,c,d
0,4,20,25,3
1,5,10,20,6
2,3,40,5,6
3,1,50,15,2
4,2,30,10,2


In [21]:
df.sort_values(by=['a'])

Unnamed: 0,a,b,c,d
3,1,50,15,2
4,2,30,10,2
2,3,40,5,6
0,4,20,25,3
1,5,10,20,6


In [22]:
df.sort_values(by=['a']).index

Int64Index([3, 4, 2, 0, 1], dtype='int64')

In [23]:
print(df.sort_values(by=['a']).index[0], df.sort_values(by=['a']).index[1])

3 4


In [24]:
df.sort_values(by=['b'])

Unnamed: 0,a,b,c,d
1,5,10,20,6
0,4,20,25,3
4,2,30,10,2
2,3,40,5,6
3,1,50,15,2


In [25]:
df.sort_values(by=['b']).index

Int64Index([1, 0, 4, 2, 3], dtype='int64')

In [26]:
df.sort_values(by=['c'])

Unnamed: 0,a,b,c,d
2,3,40,5,6
4,2,30,10,2
3,1,50,15,2
1,5,10,20,6
0,4,20,25,3


In [27]:
df.sort_values(by=['d'])

Unnamed: 0,a,b,c,d
3,1,50,15,2
4,2,30,10,2
0,4,20,25,3
1,5,10,20,6
2,3,40,5,6


In [28]:
df.sort_values(by=['d']).index

Int64Index([3, 4, 0, 1, 2], dtype='int64')

### 4-1. ascending

In [29]:
df.sort_values(by=['a'], ascending=False)

Unnamed: 0,a,b,c,d
1,5,10,20,6
0,4,20,25,3
2,3,40,5,6
4,2,30,10,2
3,1,50,15,2


In [30]:
df.sort_values(by=['a'])

Unnamed: 0,a,b,c,d
3,1,50,15,2
4,2,30,10,2
2,3,40,5,6
0,4,20,25,3
1,5,10,20,6


In [31]:
df.sort_values(by=['a'], ascending=False).index

Int64Index([1, 0, 2, 4, 3], dtype='int64')

In [33]:
print(df.sort_values(by=['a'], ascending=False).index[0], df.sort_values(by=['a'], ascending=False).index[1])

1 0
