# **DataFrame Basics III**

## **Sorting dataframes**

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv('titanic.csv')

In [None]:
titanic.head()

In [None]:
titanic.age.sort_values()

In [None]:
titanic.sort_values(by = 'age')

In [None]:
titanic

In [None]:
titanic.sort_values(by = 'age', inplace = True)

In [None]:
titanic.head()

In [None]:
titanic.sort_index(inplace = True)

In [None]:
titanic.head()

In [None]:
titanic.sort_values(by = ['age', 'pclass'], inplace = True)

In [None]:
titanic.head(20)

In [None]:
titanic.sort_index(inplace = True)

In [None]:
titanic.sort_values(by = ['age', 'pclass', 'sex'], ascending = [True, True, False], inplace = True)

In [None]:
pd.options.display.max_rows = 900

In [None]:
titanic

In [None]:
titanic.sort_index()

In [None]:
pd.options.display.max_rows = 10

In [None]:
titanic

## **Ranking DataFrames**

In [None]:
import pandas as pd

In [None]:
sales = pd.Series([15, 32, 45, 21, 55, 15, 0], index = ["Mon", "Tue", "Wen", "Thu", "Fri", "Sat", "Sun"])

In [None]:
sales

In [None]:
sales.sort_values(ascending = False)

In [None]:
sales.rank(method = 'max', ascending = False)

In [None]:
sales.rank(method = 'first', ascending = False).sort_values()

In [None]:
sales.rank(method = 'average', ascending = False).sort_values()

In [None]:
titanic = pd.read_csv('titanic.csv')

In [None]:
titanic.head()

In [None]:
titanic.fare.sort_values(ascending = False)

In [None]:
titanic.fare.rank(ascending = False)

In [None]:
titanic.fare.rank(ascending = True, method = 'first').sort_values(ascending = False)

In [None]:
fare_rank = titanic.fare.rank(ascending = False, method = 'min')

In [None]:
titanic.insert(column = 'fare_rank', value = fare_rank, loc = 7)

In [None]:
titanic.head()

In [None]:
titanic.sort_values(by = 'fare_rank')

## **nunique() and nlargest() / nsmallest() with DataFrames**

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv('titanic.csv')

In [None]:
titanic.head()

In [None]:
titanic.tail()

#### **nunique()**

In [None]:
titanic.age.unique()

In [None]:
titanic.nunique(axis = 0)

#### **nlargest()**

In [None]:
titanic.nlargest(columns = 'age', n = 5)

In [None]:
titanic.sort_values(by = 'age', ascending = False).head()

#### **nsmallest()**

In [None]:
titanic.nsmallest(n = 5, columns = 'age')

In [None]:
titanic.sort_values(by = 'age').head()

In [None]:
titanic.loc[titanic.age.idxmin()]

## **Summary statistics and accumulations**

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv('titanic.csv')

In [None]:
titanic.head()

In [None]:
titanic.describe()

In [None]:
titanic.count()

In [None]:
titanic.max(numeric_only = True)

In [None]:
titanic.min(numeric_only = True)

In [None]:
titanic.mean(numeric_only = True)

In [None]:
titanic.sum(numeric_only = True)

In [None]:
titanic.age.cumsum()

In [None]:
titanic.corr(numeric_only = True)

In [None]:
titanic.survived.corr(titanic.fare)

## **The agg() method**

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv('titanic.csv')

In [None]:
titanic.head()

In [None]:
titanic.describe()

In [None]:
titanic.agg('mean', numeric_only = True) # works but it is not recommended

In [None]:
titanic.select_dtypes('number').agg('mean')

In [None]:
titanic.select_dtypes('number').agg(['mean', 'sum', 'max'])

In [None]:
titanic.select_dtypes('number').agg({'age': ['min', 'max'], 'fare': ['mean', 'sum']})

In [None]:
def dummy_func(n):
    return 2*n

In [None]:
titanic.select_dtypes('number').agg({'age': ['min', 'max'], 'fare': ['mean', 'sum'], 'fare': dummy_func})

In [None]:
titanic.select_dtypes('number').transform(dummy_func)

## **User-defined functions with apply(), map() and applymap()**

In [None]:
import pandas as pd

In [None]:
sales = pd.read_csv('sales.csv', index_col = 0)

In [None]:
sales

In [None]:
sales.info()

In [None]:
sales.describe()

In [None]:
sales.min(axis = 0)

In [None]:
sales.min(axis = 1)

In [None]:
def range(series):
    return series.max() - series.min()

In [None]:
sales.apply(func = range, axis = 0)

In [None]:
sales.apply(func = range, axis = 1)

In [None]:
sales.apply(func = lambda series: series.max() - series.min(), axis = 1)

In [None]:
summer = pd.read_csv('summer.csv')

In [None]:
summer.head()

In [None]:
summer.Athlete.apply(lambda x: x[0])

In [None]:
sales.map(lambda x: 0 if pd.isna(x) else x * 5)

In [None]:
sales.applymap(func = lambda x: x + 42)

In [None]:
sales.map(func = lambda x: x + 42)

## **Hierarchical Indexing (MultiIndex)**

In [8]:
import pandas as pd

In [9]:
titanic = pd.read_csv('titanic.csv')

In [10]:
titanic_slice = titanic.iloc[:50, :].copy()

In [11]:
titanic_slice

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,
5,0,3,male,,0,0,8.4583,Q,
6,0,1,male,54.0,0,0,51.8625,S,E
7,0,3,male,2.0,3,1,21.075,S,
8,1,3,female,27.0,0,2,11.1333,S,
9,1,2,female,14.0,1,0,30.0708,C,


In [12]:
titanic_slice.set_index('pclass')

Unnamed: 0_level_0,survived,sex,age,sibsp,parch,fare,embarked,deck
pclass,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
3,0,male,22.0,1,0,7.25,S,
1,1,female,38.0,1,0,71.2833,C,C
3,1,female,26.0,0,0,7.925,S,
1,1,female,35.0,1,0,53.1,S,C
3,0,male,35.0,0,0,8.05,S,
3,0,male,,0,0,8.4583,Q,
1,0,male,54.0,0,0,51.8625,S,E
3,0,male,2.0,3,1,21.075,S,
3,1,female,27.0,0,2,11.1333,S,
2,1,female,14.0,1,0,30.0708,C,


In [13]:
titanic_slice.set_index(keys = ['pclass', 'sex'], inplace = True)

In [14]:
titanic_slice.sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,survived,age,sibsp,parch,fare,embarked,deck
pclass,sex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,female,1,38.0,1,0,71.2833,C,C
1,female,1,35.0,1,0,53.1,S,C
1,female,1,58.0,0,0,26.55,S,C
1,female,1,,1,0,146.5208,C,B
1,male,0,54.0,0,0,51.8625,S,E
1,male,1,28.0,0,0,35.5,S,A
1,male,0,19.0,3,2,263.0,S,C
1,male,0,40.0,0,0,27.7208,C,
1,male,0,28.0,1,0,82.1708,C,
1,male,0,42.0,1,0,52.0,S,


In [15]:
titanic_slice.sort_index(ascending = False)

Unnamed: 0_level_0,Unnamed: 1_level_0,survived,age,sibsp,parch,fare,embarked,deck
pclass,sex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
3,male,0,22.0,1,0,7.25,S,
3,male,0,35.0,0,0,8.05,S,
3,male,0,,0,0,8.4583,Q,
3,male,0,2.0,3,1,21.075,S,
3,male,0,20.0,0,0,8.05,S,
3,male,0,39.0,1,5,31.275,S,
3,male,0,2.0,4,1,29.125,Q,
3,male,0,,0,0,7.225,C,
3,male,0,,0,0,7.8958,S,
3,male,1,,0,0,7.2292,C,


In [16]:
titanic_slice.sort_index(ascending = [True, False], inplace = True)

In [17]:
titanic_slice

Unnamed: 0_level_0,Unnamed: 1_level_0,survived,age,sibsp,parch,fare,embarked,deck
pclass,sex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,male,0,54.0,0,0,51.8625,S,E
1,male,1,28.0,0,0,35.5,S,A
1,male,0,19.0,3,2,263.0,S,C
1,male,0,40.0,0,0,27.7208,C,
1,male,0,28.0,1,0,82.1708,C,
1,male,0,42.0,1,0,52.0,S,
1,female,1,38.0,1,0,71.2833,C,C
1,female,1,35.0,1,0,53.1,S,C
1,female,1,58.0,0,0,26.55,S,C
1,female,1,,1,0,146.5208,C,B


In [18]:
titanic_slice.swaplevel()

Unnamed: 0_level_0,Unnamed: 1_level_0,survived,age,sibsp,parch,fare,embarked,deck
sex,pclass,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
male,1,0,54.0,0,0,51.8625,S,E
male,1,1,28.0,0,0,35.5,S,A
male,1,0,19.0,3,2,263.0,S,C
male,1,0,40.0,0,0,27.7208,C,
male,1,0,28.0,1,0,82.1708,C,
male,1,0,42.0,1,0,52.0,S,
female,1,1,38.0,1,0,71.2833,C,C
female,1,1,35.0,1,0,53.1,S,C
female,1,1,58.0,0,0,26.55,S,C
female,1,1,,1,0,146.5208,C,B


In [19]:
titanic_slice

Unnamed: 0_level_0,Unnamed: 1_level_0,survived,age,sibsp,parch,fare,embarked,deck
pclass,sex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,male,0,54.0,0,0,51.8625,S,E
1,male,1,28.0,0,0,35.5,S,A
1,male,0,19.0,3,2,263.0,S,C
1,male,0,40.0,0,0,27.7208,C,
1,male,0,28.0,1,0,82.1708,C,
1,male,0,42.0,1,0,52.0,S,
1,female,1,38.0,1,0,71.2833,C,C
1,female,1,35.0,1,0,53.1,S,C
1,female,1,58.0,0,0,26.55,S,C
1,female,1,,1,0,146.5208,C,B


In [20]:
titanic_slice = titanic_slice.swaplevel()

In [21]:
titanic_slice

Unnamed: 0_level_0,Unnamed: 1_level_0,survived,age,sibsp,parch,fare,embarked,deck
sex,pclass,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
male,1,0,54.0,0,0,51.8625,S,E
male,1,1,28.0,0,0,35.5,S,A
male,1,0,19.0,3,2,263.0,S,C
male,1,0,40.0,0,0,27.7208,C,
male,1,0,28.0,1,0,82.1708,C,
male,1,0,42.0,1,0,52.0,S,
female,1,1,38.0,1,0,71.2833,C,C
female,1,1,35.0,1,0,53.1,S,C
female,1,1,58.0,0,0,26.55,S,C
female,1,1,,1,0,146.5208,C,B


In [22]:
titanic_slice.reset_index(inplace = True)

In [23]:
titanic_slice

Unnamed: 0,sex,pclass,survived,age,sibsp,parch,fare,embarked,deck
0,male,1,0,54.0,0,0,51.8625,S,E
1,male,1,1,28.0,0,0,35.5,S,A
2,male,1,0,19.0,3,2,263.0,S,C
3,male,1,0,40.0,0,0,27.7208,C,
4,male,1,0,28.0,1,0,82.1708,C,
5,male,1,0,42.0,1,0,52.0,S,
6,female,1,1,38.0,1,0,71.2833,C,C
7,female,1,1,35.0,1,0,53.1,S,C
8,female,1,1,58.0,0,0,26.55,S,C
9,female,1,1,,1,0,146.5208,C,B


In [24]:
titanic_slice = titanic_slice.set_index(['pclass', 'sex']).sort_index(ascending = True)

In [25]:
titanic_slice

Unnamed: 0_level_0,Unnamed: 1_level_0,survived,age,sibsp,parch,fare,embarked,deck
pclass,sex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,female,1,38.0,1,0,71.2833,C,C
1,female,1,35.0,1,0,53.1,S,C
1,female,1,58.0,0,0,26.55,S,C
1,female,1,,1,0,146.5208,C,B
1,male,0,54.0,0,0,51.8625,S,E
1,male,1,28.0,0,0,35.5,S,A
1,male,0,19.0,3,2,263.0,S,C
1,male,0,40.0,0,0,27.7208,C,
1,male,0,28.0,1,0,82.1708,C,
1,male,0,42.0,1,0,52.0,S,


In [26]:
titanic_slice.loc[1]

Unnamed: 0_level_0,survived,age,sibsp,parch,fare,embarked,deck
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
female,1,38.0,1,0,71.2833,C,C
female,1,35.0,1,0,53.1,S,C
female,1,58.0,0,0,26.55,S,C
female,1,,1,0,146.5208,C,B
male,0,54.0,0,0,51.8625,S,E
male,1,28.0,0,0,35.5,S,A
male,0,19.0,3,2,263.0,S,C
male,0,40.0,0,0,27.7208,C,
male,0,28.0,1,0,82.1708,C,
male,0,42.0,1,0,52.0,S,


In [28]:
titanic_slice.loc[[1, 2]]

Unnamed: 0_level_0,Unnamed: 1_level_0,survived,age,sibsp,parch,fare,embarked,deck
pclass,sex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,female,1,38.0,1,0,71.2833,C,C
1,female,1,35.0,1,0,53.1,S,C
1,female,1,58.0,0,0,26.55,S,C
1,female,1,,1,0,146.5208,C,B
1,male,0,54.0,0,0,51.8625,S,E
1,male,1,28.0,0,0,35.5,S,A
1,male,0,19.0,3,2,263.0,S,C
1,male,0,40.0,0,0,27.7208,C,
1,male,0,28.0,1,0,82.1708,C,
1,male,0,42.0,1,0,52.0,S,


In [30]:
titanic_slice.loc[:2]

Unnamed: 0_level_0,Unnamed: 1_level_0,survived,age,sibsp,parch,fare,embarked,deck
pclass,sex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,female,1,38.0,1,0,71.2833,C,C
1,female,1,35.0,1,0,53.1,S,C
1,female,1,58.0,0,0,26.55,S,C
1,female,1,,1,0,146.5208,C,B
1,male,0,54.0,0,0,51.8625,S,E
1,male,1,28.0,0,0,35.5,S,A
1,male,0,19.0,3,2,263.0,S,C
1,male,0,40.0,0,0,27.7208,C,
1,male,0,28.0,1,0,82.1708,C,
1,male,0,42.0,1,0,52.0,S,


In [31]:
titanic_slice.loc[1, 'female']

Unnamed: 0_level_0,Unnamed: 1_level_0,survived,age,sibsp,parch,fare,embarked,deck
pclass,sex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,female,1,38.0,1,0,71.2833,C,C
1,female,1,35.0,1,0,53.1,S,C
1,female,1,58.0,0,0,26.55,S,C
1,female,1,,1,0,146.5208,C,B


In [34]:
titanic_slice.loc[(1, 'female'), 'age']

pclass  sex   
1       female    38.0
        female    35.0
        female    58.0
        female     NaN
Name: age, dtype: float64

In [35]:
titanic_slice.loc[([1, 2], 'female'), ['age', 'fare']]

Unnamed: 0_level_0,Unnamed: 1_level_0,age,fare
pclass,sex,Unnamed: 2_level_1,Unnamed: 3_level_1
1,female,38.0,71.2833
1,female,35.0,53.1
1,female,58.0,26.55
1,female,,146.5208
2,female,14.0,30.0708
2,female,55.0,16.0
2,female,27.0,21.0
2,female,3.0,41.5792


In [41]:
titanic_slice.loc[(slice(None), slice('female')), :]

Unnamed: 0_level_0,Unnamed: 1_level_0,survived,age,sibsp,parch,fare,embarked,deck
pclass,sex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,female,1,38.0,1,0,71.2833,C,C
1,female,1,35.0,1,0,53.1,S,C
1,female,1,58.0,0,0,26.55,S,C
1,female,1,,1,0,146.5208,C,B
2,female,1,14.0,1,0,30.0708,C,
2,female,1,55.0,0,0,16.0,S,
2,female,0,27.0,1,0,21.0,S,
2,female,1,3.0,1,2,41.5792,C,
3,female,1,26.0,0,0,7.925,S,
3,female,1,27.0,0,2,11.1333,S,
