# Pandas Advanced Functions

In [None]:
# imports

import numpy as np
import pandas as pd

pd.options.display.max_columns = None

### DataFrame.pivot_table()

https://pandas.pydata.org/docs/reference/api/pandas.pivot_table.html

In [None]:
# in case you forgot...

df = pd.read_csv('./datasets/vehicles.csv', low_memory=False)
df.info()

In [None]:
df.head()

In [None]:
df['Vehicle Class'].value_counts()

In [None]:
# Introducing the Pivot Table!!!

pivot_table = df.pivot_table(index=['Vehicle Class'])
pivot_table

---

In [None]:
# Multicolumn pivot table

pt_multicol_simple = df.pivot_table(index=['Vehicle Class'],
                                    columns=['Cylinders'])

pt_multicol_simple

In [None]:
print(pt_multicol_simple.columns[0])
#print(pt_multicol_simple.columns[0][0])
#print(pt_multicol_simple.columns[0][1])
#print(pt_multicol_simple.columns.get_level_values(0))
#print(pt_multicol_simple.columns.get_level_values(1))

In [None]:
# Slicing a multicolumn pivot table

pt_multicol_simple_co2 = pt_multicol_simple[pt_multicol_simple.columns[0][0]]
pt_multicol_simple_co2

In [None]:
# Indexing a multicolumn pivot table

pt_multicol_simple['CO2 Emission Grams/Mile'][4.0][3]

In [None]:
# Infinite columns...generally not a good idea...

pt_multicol_complex = df.pivot_table(index=['Vehicle Class', 'Model'],
                                     columns=['Cylinders', 'Engine Displacement'])
pt_multicol_complex

---

In [None]:
# Selecting the values to pivot

pt_value = df.pivot_table(index=['Vehicle Class'],
                          values=['Combined MPG'])
pt_value

---

In [None]:
# Selecting the aggregation to perform

pt_value_agg = df.pivot_table(index=['Vehicle Class'],
                              values=['Combined MPG'],
                              aggfunc='max')
pt_value_agg

In [None]:
# You can perform many aggregations

pt_value_agg = df.pivot_table(index=['Vehicle Class'],
                              values=['Combined MPG'],
                              aggfunc=['mean','max'])
pt_value_agg

In [None]:
# You can fill null values

pt_col_fill = df.pivot_table(index=['Vehicle Class'],
                             columns=['Cylinders'],
                             fill_value='ola ke ase')

pt_col_fill

---

### DataFrame.set_index()
https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.set_index.html

### DataFrame.reindex()
https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.reindex.html

### DataFrame.reset_index()
https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.reset_index.html


In [None]:
#Which is my index

print(df.index)
#print(pivot_table.index)
#print(pt_multicol_simple.index)
#print(pt_multicol_complex.index)
#print(pivot_table.index.values)

In [None]:
# Select new index

df.set_index(['Make'], inplace=True)
#df.set_index(['Make', 'Model'], inplace=True)
df.head()

In [None]:
# Set new index

new_index = [i for i in range(0, len(df)*2, 2)]
df = df.reindex(new_index)
df.head()

In [None]:
# Reset index

df.reset_index(inplace = True)
df.head()

---

### pd.eval()
https://pandas.pydata.org/docs/reference/api/pandas.eval.html

In [None]:
df1 = pd.DataFrame(np.random.randn(10, 3), columns=['a', 'b', 'c'])
df1.head()

In [None]:
df2 = pd.DataFrame(np.random.randn(10, 3), columns=['a', 'b', 'c'])
df2.head()

In [None]:
# One DataFrame

df1.eval('b > 0')

In [None]:
# Between 2 DataFrames

pd.eval('df1 < df2')

---

### DataFrame.query()
https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.query.html

In [None]:
df1.query('b > 0')

---

### DataFrame.lookup()

https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.lookup.html

In [None]:
seasons = pd.DataFrame(np.random.random((10,4)), columns=['winter','spring','summer','autumn'])
seasons

In [None]:
lookup_list = ['summer','winter','spring','summer','autumn','winter','winter','spring','summer','summer']
seasons.lookup(seasons.index, lookup_list)

---

### .melt()

https://pandas.pydata.org/docs/reference/api/pandas.melt.html

In [None]:
# Unpivot data

unpivoted = pd.melt(seasons, value_vars = seasons.columns, 
                    var_name = 'season', 
                    value_name = '% percentage')
unpivoted

---

### .cut()

https://pandas.pydata.org/docs/reference/api/pandas.cut.html

In [None]:
unpivoted['groped_seasons'] = pd.cut(unpivoted['% percentage'], 3)
print(unpivoted['groped_seasons'].unique())
unpivoted

---

### Do you know any other fancy method?