In [None]:
import numpy as np
import pandas as pd

#### Hierarchical Indexing
Hierarchical indexing is an important feature of pandas that enables you to have mul‐ tiple (two or more) index levels on an axis. 

In [None]:
data = pd.Series(
    np.random.randn(9),
    index=[
    ['a', 'a', 'a', 'b', 'b', 'c', 'c', 'd', 'd'],
    [1, 2, 3, 1, 3, 1, 2, 2, 3]
    ]
)
data

In [None]:
data.index

In [None]:
data['b']

In [None]:
data['b':'c']

In [None]:
data.loc[['b', 'd']]

In [None]:
data.loc[:, 2]

In [None]:
data.unstack()

In [None]:
data.unstack().stack()

In [None]:
frame = pd.DataFrame(
    np.arange(12).reshape((4, 3)),
    index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]],
    columns=[['Ohio', 'Ohio', 'Colorado'], ['Green', 'Red', 'Green']])

frame

In [None]:
frame.index.names = ['key1', 'key2']
frame.columns.names = ['state', 'color']
frame

In [None]:
frame['Ohio']

#### Indexing with a DataFrame’s columns
It’s not unusual to want to use one or more columns from a DataFrame as the row index; alternatively, you may wish to move the row index into the DataFrame’s col‐ umns. 

In [None]:
frame.swaplevel('key1', 'key2')

In [None]:
frame.sort_index(level=1)

In [None]:
frame.swaplevel(0, 1).sort_index(level=0)

#### Summary Statistics by Level

In [None]:
frame.sum(level='key2')

In [None]:
frame.sum(level='color', axis=1)

#### Indexing with a DataFrame’s columns

In [None]:
frame = pd.DataFrame({
    'a': range(7),
    'b': range(7, 0, -1),
    'c': ['one', 'one', 'one', 'two', 'two', 'two', 'two'],
    'd':[0,1,2,0,1,2,3]
})

frame

In [None]:
frame2 = frame.set_index(['c', 'd'])
frame2

In [None]:
frame2.reset_index()

#### Combining and Merging Datasets

In [None]:
df1 = pd.DataFrame({
    'key': ['b', 'b', 'a', 'c', 'a', 'a', 'b'],
    'data1': range(7)})

df2 = pd.DataFrame({
    'key': ['a', 'b', 'd'],
    'data2': range(3)})

df1

In [None]:
df2

#### DataFrame.merge(right, how='inner', on=None, left_on=None, right_on=None, left_index=False, right_index=False, sort=False, suffixes=('_x', '_y'), copy=True, indicator=False, validate=None)
Merge DataFrame objects by performing a database-style join operation by columns or indexes.

If joining columns on columns, the DataFrame indexes will be ignored. Otherwise if joining indexes on indexes or indexes on a column or columns, the index will be passed on.

In [None]:
pd.merge(df1, df2)

In [None]:
pd.merge(df1, df2, on='key')

In [None]:
df3 = pd.DataFrame({
    'lkey': ['b', 'b', 'a', 'c', 'a', 'a', 'b'],
    'data1': range(7)})

df4 = pd.DataFrame({
    'rkey': ['a', 'b', 'd'],
    'data2': range(3)})

pd.merge(df3, df4, left_on='lkey', right_on='rkey')

#### how : {‘left’, ‘right’, ‘outer’, ‘inner’}, default ‘inner’

left: use only keys from left frame, similar to a SQL left outer join; preserve key order

right: use only keys from right frame, similar to a SQL right outer join; preserve key order

outer: use union of keys from both frames, similar to a SQL full outer join; sort keys lexicographically

inner: use intersection of keys from both frames, similar to a SQL inner join; preserve the order of the left keys

In [None]:
pd.merge(df3, df4, left_on='lkey', right_on='rkey', how='outer')

#### Merging on Index

In [None]:
left1 = pd.DataFrame({
    'key': ['a', 'b', 'a', 'a', 'b', 'c'],
    'value': range(6)})

right1 = pd.DataFrame({
    'group_val': [3.5, 7]}, 
    index=['a', 'b'])


left1

In [None]:
right1

In [None]:
# the default merge method is to intersect the join keys
pd.merge(left1, right1, left_on='key', right_index=True)

In [None]:
pd.merge(left1, right1, left_on='key', right_index=True, how='outer')

In [None]:
left1.merge(right1, left_on='key', right_index=True, how='outer')

#### DataFrame.join(other, on=None, how='left', lsuffix='', rsuffix='', sort=False)
Join columns with other DataFrame either on index or on a key column. Efficiently Join multiple DataFrame objects by index at once by passing a list.

DataFrame’s join method performs a left join on the join keys, exactly preserving the left frame’s row index. 

In [None]:
left1.join(right1, on='key')

#### Concatenating Along an Axis

In [None]:
arr = np.arange(12).reshape((3, 4))
arr

In [None]:
np.concatenate([arr, arr], axis=1)

#### pandas.concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, keys=None, levels=None, names=None, verify_integrity=False, sort=None, copy=True)
Concatenate pandas objects along a particular axis with optional set logic along the other axes.

Can also add a layer of hierarchical indexing on the concatenation axis, which may be useful if the labels are the same (or overlapping) on the passed axis number.

In [None]:
s1 = pd.Series([0, 1], index=['a', 'b'])
s2 = pd.Series([2, 3, 4], index=['c', 'd', 'e'])
s3 = pd.Series([5, 6], index=['f', 'g'])
pd.concat([s1, s2, s3])

In [None]:
pd.concat([s2, s1, s3], axis=1, sort=True)

In [None]:
s4 = pd.concat([s1, s3])
s4

In [None]:
pd.concat([s1, s4], axis=1, sort=True, join='inner')

#### When row index does "not" contain any relevant data

In [None]:
df1 = pd.DataFrame(np.random.randn(3, 4), columns=['a', 'b', 'c', 'd'])
df2 = pd.DataFrame(np.random.randn(2, 3), columns=['b', 'd', 'a'])

df1

In [None]:
df2

In [None]:
pd.concat([df1, df2], ignore_index=True, sort=True)