# 정렬(sort)
- 데이터 정렬
- 기준
    - 인덱스 -> sort_index()
    - 값 -> sort_value()

In [22]:
# module import
import pandas as pd
import numpy as np

# data
df = pd.DataFrame(
    {
        "value": [1, 2, 3, 4, 5],
        "name": ["jin", "iceman", "speed", "maberick", "bob"],
        "age": [12, np.nan, 30, 27, np.nan],
    }
)

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   value   5 non-null      int64  
 1   name    5 non-null      object 
 2   age     3 non-null      float64
dtypes: float64(1), int64(1), object(1)
memory usage: 248.0+ bytes


In [23]:
print(df.index, df.columns, sep='\n') 

RangeIndex(start=0, stop=5, step=1)
Index(['value', 'name', 'age'], dtype='object')


In [24]:
# ascending 기본값 = True
df.sort_index(ascending=False)

Unnamed: 0,value,name,age
4,5,bob,
3,4,maberick,27.0
2,3,speed,30.0
1,2,iceman,
0,1,jin,12.0


In [25]:
# 특정 컬럼 기준 정렬
df.sort_values(by=["age",'name'])

Unnamed: 0,value,name,age
0,1,jin,12.0
3,4,maberick,27.0
2,3,speed,30.0
4,5,bob,
1,2,iceman,


In [26]:
df.name[0], len(df.name[0])

('jin', 3)

In [27]:
[len(name) for name in df['name']]

[3, 6, 5, 8, 3]

In [28]:
df['name_len'] = df['name'].apply(len)
df.sort_values(by='name_len')

Unnamed: 0,value,name,age,name_len
0,1,jin,12.0,3
4,5,bob,,3
2,3,speed,30.0,5
1,2,iceman,,6
3,4,maberick,27.0,8


In [29]:
# lambda 활용 key 사용해보기
df.sort_values(by='name', key=lambda col: col.str.len())

Unnamed: 0,value,name,age,name_len
0,1,jin,12.0,3
4,5,bob,,3
2,3,speed,30.0,5
1,2,iceman,,6
3,4,maberick,27.0,8


## 컬럼명 정렬

In [30]:
df.columns, type(df.columns)
colList = list(df.columns)
colList.sort()
colList

['age', 'name', 'name_len', 'value']

In [31]:
df2 = df[colList]
df2

Unnamed: 0,age,name,name_len,value
0,12.0,jin,3,1
1,,iceman,6,2
2,30.0,speed,5,3
3,27.0,maberick,8,4
4,,bob,3,5


In [32]:
df3 = df2.sort_index(ascending=False)
df3

Unnamed: 0,age,name,name_len,value
4,,bob,3,5
3,27.0,maberick,8,4
2,30.0,speed,5,3
1,,iceman,6,2
0,12.0,jin,3,1


In [33]:
df4 = df3.T
df4

Unnamed: 0,4,3,2,1,0
age,,27.0,30.0,,12.0
name,bob,maberick,speed,iceman,jin
name_len,3,8,5,6,3
value,5,4,3,2,1


In [34]:
df4 = df4.reset_index()

In [35]:
df4 = df4.sort_values(by='index')

In [36]:
df4 = df4.set_index('index',drop=True)
df4 = df4.reset_index()
df4

Unnamed: 0,index,4,3,2,1,0
0,age,,27.0,30.0,,12.0
1,name,bob,maberick,speed,iceman,jin
2,name_len,3,8,5,6,3
3,value,5,4,3,2,1


In [37]:
df4.drop('index',axis=1)

Unnamed: 0,4,3,2,1,0
0,,27.0,30.0,,12.0
1,bob,maberick,speed,iceman,jin
2,3,8,5,6,3
3,5,4,3,2,1
