# Sorting DataFrames
### You can sort a DataFrame by it's indecies using the .sort_index() method.
* This sorts rows (axis=0) by default, but you can specify axis=1 tto sort the columns

In [1]:
import numpy as np
import pandas as pd

In [2]:
retail_df = pd.read_csv("../DataFrames/retail_2016_2017.csv")
retail_df

Unnamed: 0,id,date,store_nbr,family,sales,onpromotion
0,1945944,2016-01-01,1,AUTOMOTIVE,0.000,0
1,1945945,2016-01-01,1,BABY CARE,0.000,0
2,1945946,2016-01-01,1,BEAUTY,0.000,0
3,1945947,2016-01-01,1,BEVERAGES,0.000,0
4,1945948,2016-01-01,1,BOOKS,0.000,0
...,...,...,...,...,...,...
1054939,3000883,2017-08-15,9,POULTRY,438.133,0
1054940,3000884,2017-08-15,9,PREPARED FOODS,154.553,1
1054941,3000885,2017-08-15,9,PRODUCE,2419.729,148
1054942,3000886,2017-08-15,9,SCHOOL AND OFFICE SUPPLIES,121.000,8


In [3]:
# This creates a sample DataFrame by filtering rows for the 4 specified product families, and grabbing 5 random rows
condition = retail_df.family.isin(["BEVERAGES", "DELI", "DAIRY"]) 

sample_df = retail_df[condition].sample(5, random_state=2021)

sample_df

Unnamed: 0,id,date,store_nbr,family,sales,onpromotion
74292,2020236,2016-02-11,43,DELI,212.0,2
13506,1959450,2016-01-08,38,DELI,131.545,43
882588,2828532,2017-05-11,23,BEVERAGES,1194.0,22
445008,2390952,2016-09-06,45,BEVERAGES,8339.0,19
495966,2441910,2016-10-05,25,DELI,0.0,0


In [7]:
sample_df.sort_index(ascending=False) # This sorts the Sample DataFrame in descending order by it's row index (it sorts in ascending order by default)

Unnamed: 0,id,date,store_nbr,family,sales,onpromotion
882588,2828532,2017-05-11,23,BEVERAGES,1194.0,22
495966,2441910,2016-10-05,25,DELI,0.0,0
445008,2390952,2016-09-06,45,BEVERAGES,8339.0,19
74292,2020236,2016-02-11,43,DELI,212.0,2
13506,1959450,2016-01-08,38,DELI,131.545,43


In [8]:
sample_df.sort_index(axis=1, inplace=True) # sorts the sample DF by the column in ascending order by its column index, and modifies the underlying values
sample_df # notice it will sort columns by alphabetical order. Typically we wont sort by columns very often. More often we will just be sorting rows

Unnamed: 0,date,family,id,onpromotion,sales,store_nbr
74292,2016-02-11,DELI,2020236,2,212.0,43
13506,2016-01-08,DELI,1959450,43,131.545,38
882588,2017-05-11,BEVERAGES,2828532,22,1194.0,23
445008,2016-09-06,BEVERAGES,2390952,19,8339.0,45
495966,2016-10-05,DELI,2441910,0,0.0,25


In [9]:
sample_df.sort_values("store_nbr") # sorts the sample DF by the values in the store_nbr column in ascending order by default

Unnamed: 0,date,family,id,onpromotion,sales,store_nbr
882588,2017-05-11,BEVERAGES,2828532,22,1194.0,23
495966,2016-10-05,DELI,2441910,0,0.0,25
13506,2016-01-08,DELI,1959450,43,131.545,38
74292,2016-02-11,DELI,2020236,2,212.0,43
445008,2016-09-06,BEVERAGES,2390952,19,8339.0,45


In [12]:
# This sorts tje sample DataFrame by the values in the family column in ascending order, 
# then by the values in the sales column in descending order within each family

sample_df.sort_values(["family", "sales"], ascending=[True, False]) 

Unnamed: 0,date,family,id,onpromotion,sales,store_nbr
445008,2016-09-06,BEVERAGES,2390952,19,8339.0,45
882588,2017-05-11,BEVERAGES,2828532,22,1194.0,23
74292,2016-02-11,DELI,2020236,2,212.0,43
13506,2016-01-08,DELI,1959450,43,131.545,38
495966,2016-10-05,DELI,2441910,0,0.0,25


### More examples

In [15]:
oil = pd.read_csv("../DataFrames/oil.csv")
oil

Unnamed: 0,date,dcoilwtico
0,2013-01-01,
1,2013-01-02,93.14
2,2013-01-03,92.97
3,2013-01-04,93.12
4,2013-01-07,93.20
...,...,...
1213,2017-08-25,47.65
1214,2017-08-28,46.40
1215,2017-08-29,46.46
1216,2017-08-30,45.96


In [17]:
oil.sort_index(ascending=False, axis=1)

Unnamed: 0,dcoilwtico,date
0,,2013-01-01
1,93.14,2013-01-02
2,92.97,2013-01-03
3,93.12,2013-01-04
4,93.20,2013-01-07
...,...,...
1213,47.65,2017-08-25
1214,46.40,2017-08-28
1215,46.46,2017-08-29
1216,45.96,2017-08-30


In [33]:
oil["month"] = oil["date"].astype("datetime64[s]").dt.month
oil

Unnamed: 0,date,dcoilwtico,month
0,2013-01-01,,1
1,2013-01-02,93.14,1
2,2013-01-03,92.97,1
3,2013-01-04,93.12,1
4,2013-01-07,93.20,1
...,...,...,...
1213,2017-08-25,47.65,8
1214,2017-08-28,46.40,8
1215,2017-08-29,46.46,8
1216,2017-08-30,45.96,8


In [35]:
oil.sort_values("dcoilwtico", ascending=False) # NaN's will always be at the bottom of a column

Unnamed: 0,date,dcoilwtico,month
178,2013-09-06,110.62,9
171,2013-08-28,110.17,8
179,2013-09-09,109.62,9
170,2013-08-27,109.11,8
182,2013-09-12,108.72,9
...,...,...,...
1079,2017-02-20,,2
1118,2017-04-14,,4
1149,2017-05-29,,5
1174,2017-07-03,,7


In [39]:
oil.sort_values(["month", "dcoilwtico"], ascending=[True, False])

Unnamed: 0,date,dcoilwtico,month
282,2014-01-30,98.25,1
21,2013-01-30,97.98,1
22,2013-01-31,97.65,1
20,2013-01-29,97.62,1
283,2014-01-31,97.55,1
...,...,...,...
774,2015-12-21,34.55,12
256,2013-12-25,,12
517,2014-12-25,,12
778,2015-12-25,,12


In [2]:


x = 3
y = 4

y = x

x = 7

print(y)

3
