# **DataFrame Basics III**

## **Sorting dataframes**

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv('titanic.csv')

In [None]:
titanic.head()

In [None]:
titanic.age.sort_values()

In [None]:
titanic.sort_values(by = 'age')

In [None]:
titanic

In [None]:
titanic.sort_values(by = 'age', inplace = True)

In [None]:
titanic.head()

In [None]:
titanic.sort_index(inplace = True)

In [None]:
titanic.head()

In [None]:
titanic.sort_values(by = ['age', 'pclass'], inplace = True)

In [None]:
titanic.head(20)

In [None]:
titanic.sort_index(inplace = True)

In [None]:
titanic.sort_values(by = ['age', 'pclass', 'sex'], ascending = [True, True, False], inplace = True)

In [None]:
pd.options.display.max_rows = 900

In [None]:
titanic

In [None]:
titanic.sort_index()

In [None]:
pd.options.display.max_rows = 10

In [None]:
titanic

## **Ranking DataFrames**

In [None]:
import pandas as pd

In [None]:
sales = pd.Series([15, 32, 45, 21, 55, 15, 0], index = ["Mon", "Tue", "Wen", "Thu", "Fri", "Sat", "Sun"])

In [None]:
sales

In [None]:
sales.sort_values(ascending = False)

In [None]:
sales.rank(method = 'max', ascending = False)

In [None]:
sales.rank(method = 'first', ascending = False).sort_values()

In [None]:
sales.rank(method = 'average', ascending = False).sort_values()

In [None]:
titanic = pd.read_csv('titanic.csv')

In [None]:
titanic.head()

In [None]:
titanic.fare.sort_values(ascending = False)

In [None]:
titanic.fare.rank(ascending = False)

In [None]:
titanic.fare.rank(ascending = True, method = 'first').sort_values(ascending = False)

In [None]:
fare_rank = titanic.fare.rank(ascending = False, method = 'min')

In [None]:
titanic.insert(column = 'fare_rank', value = fare_rank, loc = 7)

In [None]:
titanic.head()

In [None]:
titanic.sort_values(by = 'fare_rank')

## **nunique() and nlargest() / nsmallest() with DataFrames**

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv('titanic.csv')

In [None]:
titanic.head()

In [None]:
titanic.tail()

#### **nunique()**

In [None]:
titanic.age.unique()

In [None]:
titanic.nunique(axis = 0)

#### **nlargest()**

In [None]:
titanic.nlargest(columns = 'age', n = 5)

In [None]:
titanic.sort_values(by = 'age', ascending = False).head()

#### **nsmallest()**

In [None]:
titanic.nsmallest(n = 5, columns = 'age')

In [None]:
titanic.sort_values(by = 'age').head()

In [None]:
titanic.loc[titanic.age.idxmin()]

## **Summary statistics and accumulations**

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv('titanic.csv')

In [None]:
titanic.head()

In [None]:
titanic.describe()

In [None]:
titanic.count()

In [None]:
titanic.max(numeric_only = True)

In [None]:
titanic.min(numeric_only = True)

In [None]:
titanic.mean(numeric_only = True)

In [None]:
titanic.sum(numeric_only = True)

In [None]:
titanic.age.cumsum()

In [None]:
titanic.corr(numeric_only = True)

In [None]:
titanic.survived.corr(titanic.fare)

## **The agg() method**

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv('titanic.csv')

In [None]:
titanic.head()

In [None]:
titanic.describe()

In [None]:
titanic.agg('mean', numeric_only = True) # works but it is not recommended

In [None]:
titanic.select_dtypes('number').agg('mean')

In [None]:
titanic.select_dtypes('number').agg(['mean', 'sum', 'max'])

In [None]:
titanic.select_dtypes('number').agg({'age': ['min', 'max'], 'fare': ['mean', 'sum']})

In [None]:
def dummy_func(n):
    return 2*n

In [None]:
titanic.select_dtypes('number').agg({'age': ['min', 'max'], 'fare': ['mean', 'sum'], 'fare': dummy_func})

In [None]:
titanic.select_dtypes('number').transform(dummy_func)

## **User-defined functions with apply(), map() and applymap()**

In [None]:
import pandas as pd

In [None]:
sales = pd.read_csv('sales.csv', index_col = 0)

In [None]:
sales

In [None]:
sales.info()

In [None]:
sales.describe()

In [None]:
sales.min(axis = 0)

In [None]:
sales.min(axis = 1)

In [None]:
def range(series):
    return series.max() - series.min()

In [None]:
sales.apply(func = range, axis = 0)

In [None]:
sales.apply(func = range, axis = 1)

In [None]:
sales.apply(func = lambda series: series.max() - series.min(), axis = 1)

In [None]:
summer = pd.read_csv('summer.csv')

In [None]:
summer.head()

In [None]:
summer.Athlete.apply(lambda x: x[0])

In [None]:
sales.map(lambda x: 0 if pd.isna(x) else x * 5)

In [None]:
sales.applymap(func = lambda x: x + 42)

In [None]:
sales.map(func = lambda x: x + 42)