# Sorting DataFrames with `.sort_values()` 

### Import Pandas

In [1]:
import pandas as pd

### Read CSV

In [2]:
salaries = pd.read_csv("mls_salaries.csv")

### Head

In [3]:
# called the head method to see what were working with
salaries.head(3)

Unnamed: 0,club,last_name,first_name,position,base_salary,guaranteed_compensation
0,ATL,Almiron,Miguel,M,1912500.0,2297000.0
1,ATL,Ambrose,Mikey,D,65625.0,65625.0
2,ATL,Asad,Yamil,M,150000.0,150000.0


### Sort values

In [4]:
# sort values by first_name
salaries.sort_values("first_name").head(3)

Unnamed: 0,club,last_name,first_name,position,base_salary,guaranteed_compensation
177,HOU,DeLaGarza,AJ,D,250000.0,252500.0
128,DAL,Guillen,Aaron,D,54075.0,54075.0
430,PHI,Jones,Aaron,D,53004.0,53004.0


In [5]:
salaries.sort_values("first_name", ascending=False)

Unnamed: 0,club,last_name,first_name,position,base_salary,guaranteed_compensation
372,NYRB,Lewis,Zeiko,M,75000.0,93750.00
471,POR,Valentin,Zarek,D,110000.0,110000.00
83,CLB,Steffen,Zack,,105000.0,105000.00
314,NE,Herivaux,Zachary,M,65625.0,65625.00
516,SEA,Mathers,Zach,M,53004.0,53004.00
...,...,...,...,...,...,...
46,CHI,Juninho,,M,700008.0,716674.67
265,MNUFC,Ibson,,M,200004.0,210337.40
401,ORL,Kaka,,M,6660000.0,7167500.00
429,PHI,Ilsinho,,M,470000.0,518333.33


In [6]:
salaries.sort_values("base_salary")

Unnamed: 0,club,last_name,first_name,position,base_salary,guaranteed_compensation
523,SEA,Tolo,Nouhou,D,52999.92,52999.92
348,NYCFC,Okoli,Sean,F,52999.92,52999.92
19,ATL,Oblitey Otoo,Jeffrey,,53000.00,53000.00
458,POR,Farfan,Marco,D,53000.00,53000.00
413,ORL,Stajduhar,Mason,GK,53000.00,53000.00
...,...,...,...,...,...,...
568,TOR,Giovinco,Sebastian,F,5600000.00,7115555.67
355,NYCFC,Villa,David,F,5610000.00,5610000.00
560,TOR,Bradley,Michael,M,6000000.00,6500000.00
401,ORL,Kaka,,M,6660000.00,7167500.00


In [7]:
salaries.sort_values("base_salary", ascending=False)

Unnamed: 0,club,last_name,first_name,position,base_salary,guaranteed_compensation
401,ORL,Kaka,,M,6660000.00,7167500.00
560,TOR,Bradley,Michael,M,6000000.00,6500000.00
355,NYCFC,Villa,David,F,5610000.00,5610000.00
568,TOR,Giovinco,Sebastian,F,5600000.00,7115555.67
349,NYCFC,Pirlo,Andrea,M,5600000.00,5915690.00
...,...,...,...,...,...,...
186,HOU,Lucatero,Christian,M,53000.00,53250.00
458,POR,Farfan,Marco,D,53000.00,53000.00
523,SEA,Tolo,Nouhou,D,52999.92,52999.92
348,NYCFC,Okoli,Sean,F,52999.92,52999.92


### How is NaN sorted?
NaN is sorterd last by deafault, to change that, use na_position = "first"

In [8]:
salaries.sort_values("base_salary", na_position = "first")

Unnamed: 0,club,last_name,first_name,position,base_salary,guaranteed_compensation
613,,,,,,
523,SEA,Tolo,Nouhou,D,52999.92,52999.92
348,NYCFC,Okoli,Sean,F,52999.92,52999.92
19,ATL,Oblitey Otoo,Jeffrey,,53000.00,53000.00
458,POR,Farfan,Marco,D,53000.00,53000.00
...,...,...,...,...,...,...
349,NYCFC,Pirlo,Andrea,M,5600000.00,5915690.00
568,TOR,Giovinco,Sebastian,F,5600000.00,7115555.67
355,NYCFC,Villa,David,F,5610000.00,5610000.00
560,TOR,Bradley,Michael,M,6000000.00,6500000.00


### Sort by `club` then `base_salary`

In [9]:
# sort by club then base_salary
salaries.sort_values(["club", "base_salary"])

Unnamed: 0,club,last_name,first_name,position,base_salary,guaranteed_compensation
19,ATL,Oblitey Otoo,Jeffrey,,53000.00,53000.00
28,ATL,Walkes,Anton,D,53004.00,53004.00
29,ATL,Wheeler-Omiunu,Andrew,M,53004.00,53004.00
4,ATL,Carleton,Andrew,F,65000.00,77400.00
30,ATL,Williams,Romario,F,65000.00,65000.00
...,...,...,...,...,...,...
603,VAN,Reyna,Yordy,M-F,440000.04,533700.04
607,VAN,Shea,Brek,M-D,625000.00,670000.00
595,VAN,Laba,Matias,M,725000.00,885500.00
599,VAN,Montero,Fredy,F,1400000.04,1800000.04


### Sort by `club` then `base_salary`, descending

In [10]:
# sort by club then base_salary, descending
salaries.sort_values(["club", "base_salary"], ascending=False)

Unnamed: 0,club,last_name,first_name,position,base_salary,guaranteed_compensation
599,VAN,Montero,Fredy,F,1400000.04,1800000.04
595,VAN,Laba,Matias,M,725000.00,885500.00
607,VAN,Shea,Brek,M-D,625000.00,670000.00
603,VAN,Reyna,Yordy,M-F,440000.04,533700.04
601,VAN,Ousted,David,GK,360000.00,378933.33
...,...,...,...,...,...,...
30,ATL,Williams,Romario,F,65000.00,65000.00
28,ATL,Walkes,Anton,D,53004.00,53004.00
29,ATL,Wheeler-Omiunu,Andrew,M,53004.00,53004.00
19,ATL,Oblitey Otoo,Jeffrey,,53000.00,53000.00


### Sort by `club` then `base_salary`, independently sorted

In [11]:
# sort by club then base_salary, pasing a list of bools to sort clubs desc and base_salary asc
salaries.sort_values(["club", "base_salary"], ascending=[False, True])

Unnamed: 0,club,last_name,first_name,position,base_salary,guaranteed_compensation
606,VAN,Seiler,Cole,D,54075.0,54075.0
585,VAN,Davies,Alphonso,M,65000.0,65000.0
596,VAN,Levis,Brett,D,65000.0,67500.0
597,VAN,McKendry,Ben,M,65000.0,65000.0
591,VAN,Greig,Kyle,F,65004.0,66849.0
...,...,...,...,...,...,...
27,ATL,Villalba,Hector,F,663000.0,770750.0
5,ATL,Carmona,Carlos,M,675000.0,725000.0
16,ATL,Martinez,Josef,F,924000.0,1041310.0
0,ATL,Almiron,Miguel,M,1912500.0,2297000.0


### Change the sorting of NaN values with `na_position=`

In [12]:
salaries.sort_values(["club", "base_salary"], ascending=[False, True], na_position="first")

Unnamed: 0,club,last_name,first_name,position,base_salary,guaranteed_compensation
613,,,,,,
606,VAN,Seiler,Cole,D,54075.0,54075.0
585,VAN,Davies,Alphonso,M,65000.0,65000.0
596,VAN,Levis,Brett,D,65000.0,67500.0
597,VAN,McKendry,Ben,M,65000.0,65000.0
...,...,...,...,...,...,...
17,ATL,McCann,Chris,M,540000.0,568000.0
27,ATL,Villalba,Hector,F,663000.0,770750.0
5,ATL,Carmona,Carlos,M,675000.0,725000.0
16,ATL,Martinez,Josef,F,924000.0,1041310.0


As you can see, the NaN values are in the beginning ⬆️

In [13]:
salaries.sort_values(["club", "base_salary"], ascending=[False, True], na_position="last")

Unnamed: 0,club,last_name,first_name,position,base_salary,guaranteed_compensation
606,VAN,Seiler,Cole,D,54075.0,54075.0
585,VAN,Davies,Alphonso,M,65000.0,65000.0
596,VAN,Levis,Brett,D,65000.0,67500.0
597,VAN,McKendry,Ben,M,65000.0,65000.0
591,VAN,Greig,Kyle,F,65004.0,66849.0
...,...,...,...,...,...,...
27,ATL,Villalba,Hector,F,663000.0,770750.0
5,ATL,Carmona,Carlos,M,675000.0,725000.0
16,ATL,Martinez,Josef,F,924000.0,1041310.0
0,ATL,Almiron,Miguel,M,1912500.0,2297000.0


As you can see, the NaN values are in the end, or last ⬆️