In [3]:
import pandas as pd
import numpy as np

# reindex

```python
df.reindex(
    labels=None,
    index=None,
    columns=None,
    axis=None,
    method=None,
    copy=True,
    level=None,
    fill_value=nan,
    limit=None,
    tolerance=None,
)
Docstring:
Conform DataFrame to new index with optional filling logic, placing
NA/NaN in locations having no value in the previous index. A new object
is produced unless the new index is equivalent to the current one and
``copy=False``.

Parameters
----------
labels : array-like, optional
            New labels / index to conform the axis specified by 'axis' to.
index, columns : array-like, optional
    New labels / index to conform to, should be specified using
    keywords. Preferably an Index object to avoid duplicating data
axis : int or str, optional
            Axis to target. Can be either the axis name ('index', 'columns')
            or number (0, 1).
method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}
    Method to use for filling holes in reindexed DataFrame.
    Please note: this is only applicable to DataFrames/Series with a
    monotonically increasing/decreasing index.

    * None (default): don't fill gaps
    * pad / ffill: propagate last valid observation forward to next
      valid
    * backfill / bfill: use next valid observation to fill gap
    * nearest: use nearest valid observations to fill gap

copy : bool, default True
    Return a new object, even if the passed indexes are the same.
level : int or name
    Broadcast across a level, matching Index values on the
    passed MultiIndex level.
fill_value : scalar, default np.NaN
    Value to use for missing values. Defaults to NaN, but can be any
    "compatible" value.
limit : int, default None
    Maximum number of consecutive elements to forward or backward fill.
tolerance : optional
    Maximum distance between original and new labels for inexact
    matches. The values of the index at the matching locations most
    satisfy the equation ``abs(index[indexer] - target) <= tolerance``.

    Tolerance may be a scalar value, which applies the same tolerance
    to all values, or list-like, which applies variable tolerance per
    element. List-like includes list, tuple, array, Series, and must be
    the same size as the index and its dtype must exactly match the
    index's type.

    .. versionadded:: 0.21.0 (list-like tolerance)

Returns
-------
DataFrame with changed index.
```

<code>df.redindex(index = ['I', 'II'], columns = ['a', 'b', 'c'])</code><br>
this works almost the same as <code>df.loc[['I', 'II'], ['a', 'b', 'c']]</code><br>
but if we select a row with label does not exist in the DataFrame, df.loc will raise an error<br>
but redindex just return turn a row will all NaN values<br>
(if we select an unvailable column, it will return a column filled with NaN)<br>

In [5]:
np.random.seed(101)
scores = pd.DataFrame(np.random.randint(3, 10, (3,3)), columns = ['Google', 'Apple', 'Samsung'])
scores

Unnamed: 0,Google,Apple,Samsung
0,6,4,9
1,8,6,4
2,8,3,7


In [7]:
matching = scores.reindex(index = [2,1,3,4], columns = ['Google', 'Apple', 'Xiaomi'])
matching

Unnamed: 0,Google,Apple,Xiaomi
2,8.0,3.0,
1,8.0,6.0,
3,,,
4,,,


In [26]:
data = [
    ['VN Champions', 30, 55971],
    ['Dirilis', 35, 58730],
    ['TF CUSTOMIZERS', 32, 75321],
    ['Daemones', 34, 345321],
    ['VN Champions', 33, 62132]
]
TF = pd.DataFrame(data, index = ['VN Pikachu', 'ARMAGEDON', 'Rek151', 'Monkey', 'Morino Nanako'], columns = ['clan', 'level', 'average damage'])
TF

Unnamed: 0,clan,level,average damage
VN Pikachu,VN Champions,30,55971
ARMAGEDON,Dirilis,35,58730
Rek151,TF CUSTOMIZERS,32,75321
Monkey,Daemones,34,345321
Morino Nanako,VN Champions,33,62132


In [32]:
dirty_players = ['ARMAGEDON', 'xXx-Hadi-xXx', 'Rek151']
features = ['clan', 'average damage', 'average experience']

In [33]:
TF.reindex(index = dirty_players, columns = features)

Unnamed: 0,clan,average damage,average experience
ARMAGEDON,Dirilis,58730.0,
xXx-Hadi-xXx,,,
Rek151,TF CUSTOMIZERS,75321.0,


<hr>

In [60]:
index = ['Firefox', 'Chrome', 'Safari', 'IE10', 'Konqueror']
df = pd.DataFrame({
'http_status': [200,200,404,404,301],
'response_time': [0.04, 0.02, 0.07, 0.08, 1.0]},
index=index)
df

Unnamed: 0,http_status,response_time
Firefox,200,0.04
Chrome,200,0.02
Safari,404,0.07
IE10,404,0.08
Konqueror,301,1.0


In [62]:
#reindex the index
new_index = ['Safari', 'Iceweasel', 'Comodo Dragon', 'IE10',  'Chrome']
df.reindex(index = new_index, fill_value = -1)

Unnamed: 0,http_status,response_time
Safari,404,0.07
Iceweasel,-1,-1.0
Comodo Dragon,-1,-1.0
IE10,404,0.08
Chrome,200,0.02


In [63]:
#reindex the columns
new_columns = ['http_status', 'user_agent']
df.reindex(columns = new_columns)

Unnamed: 0,http_status,user_agent
Firefox,200,
Chrome,200,
Safari,404,
IE10,404,
Konqueror,301,


# reindex_like

```python
df.reindex_like(other, method=None, copy=True, limit=None, tolerance=None)
```

order a DataFrame so that it's row labels order and column labels order is the same
as the other DataFrame

In [9]:
levels = ['Weak', 'Strong']
placebo = pd.DataFrame(np.random.randint(3,10, (3,2)), index = ['Alex', 'John', 'Linda'], columns = ['Weak', 'Strong'])
placebo

Unnamed: 0,Weak,Strong
Alex,3,8
John,7,7
Linda,3,9


In [10]:
treatment = pd.DataFrame(np.random.randint(3,10, (3,2)), index = ['Linda', 'Alex', 'John'], columns = ['Strong', 'Weak'])
treatment

Unnamed: 0,Strong,Weak
Linda,8,7
Alex,8,3
John,4,6


In [11]:
treatment.reindex_like(placebo)


Unnamed: 0,Weak,Strong
Alex,3,8
John,6,4
Linda,7,8


<hr>

Second example

In [65]:

df1 = pd.DataFrame([[24.3, 75.7, 'high'],
                    [31, 87.8, 'high'],
                    [22, 71.6, 'medium'],
                    [35, 95, 'medium']],
                   columns=['temp_celsius', 'temp_fahrenheit', 'windspeed'],
                   index=pd.date_range(start='2014-02-12',
                                       end='2014-02-15', freq='D'))
df1

Unnamed: 0,temp_celsius,temp_fahrenheit,windspeed
2014-02-12,24.3,75.7,high
2014-02-13,31.0,87.8,high
2014-02-14,22.0,71.6,medium
2014-02-15,35.0,95.0,medium


In [67]:
df2 = pd.DataFrame([[28, 'low'],
                    [30, 'low'],
                     [35.1, 'medium']],
     columns=['temp_celsius', 'windspeed'],
     index=pd.DatetimeIndex(['2014-02-12', '2014-02-13',
                             '2014-02-15']))
df2

Unnamed: 0,temp_celsius,windspeed
2014-02-12,28.0,low
2014-02-13,30.0,low
2014-02-15,35.1,medium


In [69]:
#equivalent: df2.reindex(index = df1.index, columns = df1.columsn)
df2.reindex_like(df1)

Unnamed: 0,temp_celsius,temp_fahrenheit,windspeed
2014-02-12,28.0,,low
2014-02-13,30.0,,low
2014-02-14,,,
2014-02-15,35.1,,medium


In [70]:
df2.reindex(index = df1.index, columns = df1.columns)

Unnamed: 0,temp_celsius,temp_fahrenheit,windspeed
2014-02-12,28.0,,low
2014-02-13,30.0,,low
2014-02-14,,,
2014-02-15,35.1,,medium


# rename

<b><code>DataFrame.rename()</code></b>

```python
df.rename(
    mapper=None,
    index=None,
    columns=None,
    axis=None,
    copy=True,
    inplace=False,
    level=None,
)
```

```python
alter axes labels.

Function / dict values must be unique (1-to-1). Labels not contained in
a dict / Series will be left as-is. Extra labels listed don't throw an
error.

See the :ref:`user guide <basics.rename>` for more.

Parameters
----------
mapper, index, columns : dict-like or function, optional
    dict-like or functions transformations to apply to
    that axis' values. Use either ``mapper`` and ``axis`` to
    specify the axis to target with ``mapper``, or ``index`` and
    ``columns``.
axis : int or str, optional
    Axis to target with ``mapper``. Can be either the axis name
    ('index', 'columns') or number (0, 1). The default is 'index'.
copy : boolean, default True
    Also copy underlying data
inplace : boolean, default False
    Whether to return a new DataFrame. If True then value of copy is
    ignored.
level : int or level name, default None
    In case of a MultiIndex, only rename labels in the specified
    level.
```

In [73]:
df = pd.DataFrame([[1,2], [3,4]], index = ['Nguyen Huu Trung', 'Nguyen Huu Kien'], columns = ['Ranking', 'Rating'])
df

Unnamed: 0,Ranking,Rating
Nguyen Huu Trung,1,2
Nguyen Huu Kien,3,4


In [74]:
#rename index and columns
df.rename(columns = {'Ranking':'Rank', 'Rating': 'Rate'}, index = {'Nguyen Huu Trung': 'Trung', 'Nguyen Huu Kien':'Kien'})

Unnamed: 0,Rank,Rate
Trung,1,2
Kien,3,4


In [75]:
#rename index by a mapping function
df.rename(index = lambda v: f'xXx-{v}-xXx')

Unnamed: 0,Ranking,Rating
xXx-Nguyen Huu Trung-xXx,1,2
xXx-Nguyen Huu Kien-xXx,3,4


In [79]:
#rename using a dict-like (e.g: Series), inplace
target = pd.Series(['VN Pikachu', 'VN Raichu'], index = ['Nguyen Huu Trung', 'Nguyen Huu Kien'])
target

Nguyen Huu Trung    VN Pikachu
Nguyen Huu Kien      VN Raichu
dtype: object

In [80]:
df.rename(index = target, inplace = True)
df

Unnamed: 0,Ranking,Rating
VN Pikachu,1,2
VN Raichu,3,4


# reset_index

In [82]:
TF

Unnamed: 0,clan,level,average damage
VN Pikachu,VN Champions,30,55971
ARMAGEDON,Dirilis,35,58730
Rek151,TF CUSTOMIZERS,32,75321
Monkey,Daemones,34,345321
Morino Nanako,VN Champions,33,62132


```python
DataFrame.reset_index(
    level=None,
    drop=False,
    inplace=False,
    col_level=0,
    col_fill='',
)
```

In [84]:
TF.reset_index()

Unnamed: 0,index,clan,level,average damage
0,VN Pikachu,VN Champions,30,55971
1,ARMAGEDON,Dirilis,35,58730
2,Rek151,TF CUSTOMIZERS,32,75321
3,Monkey,Daemones,34,345321
4,Morino Nanako,VN Champions,33,62132


In [85]:
TF.reset_index(drop = True)

Unnamed: 0,clan,level,average damage
0,VN Champions,30,55971
1,Dirilis,35,58730
2,TF CUSTOMIZERS,32,75321
3,Daemones,34,345321
4,VN Champions,33,62132


<hr>

Reset with MultiIndex

In [89]:
index = pd.MultiIndex.from_tuples([('bird', 'falcon'),
                                    ('bird', 'parrot'),
                                    ('mammal', 'lion'),
                                    ('mammal', 'monkey')],
                                   names=['class', 'name'])
columns = pd.MultiIndex.from_tuples([('speed', 'max'),
                                      ('species', 'type')])
species = pd.DataFrame([(389.0, 'fly'),
                    ( 24.0, 'fly'),
                    ( 80.5, 'run'),
                    (np.nan, 'jump')],
                   index=index,
                   columns=columns)
species

Unnamed: 0_level_0,Unnamed: 1_level_0,speed,species
Unnamed: 0_level_1,Unnamed: 1_level_1,max,type
class,name,Unnamed: 2_level_2,Unnamed: 3_level_2
bird,falcon,389.0,fly
bird,parrot,24.0,fly
mammal,lion,80.5,run
mammal,monkey,,jump


In [91]:
species.reset_index(level = 'class')

Unnamed: 0_level_0,class,speed,species
Unnamed: 0_level_1,Unnamed: 1_level_1,max,type
name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
falcon,bird,389.0,fly
parrot,bird,24.0,fly
lion,mammal,80.5,run
monkey,mammal,,jump


<code>col_level</code>: if columns is <code>MultiIndex</code>, then <code>col_level</code> decides what level to put the label to

put label `class` to the first level, (with `speed`, `species`)

In [100]:
species.reset_index(level = 'class', col_level = 0)

Unnamed: 0_level_0,class,speed,species
Unnamed: 0_level_1,Unnamed: 1_level_1,max,type
name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
falcon,bird,389.0,fly
parrot,bird,24.0,fly
lion,mammal,80.5,run
monkey,mammal,,jump


put label `class` to the second level(width `max`, `type`)

In [101]:
species.reset_index(level = 'class', col_level = 1)

Unnamed: 0_level_0,Unnamed: 1_level_0,speed,species
Unnamed: 0_level_1,class,max,type
name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
falcon,bird,389.0,fly
parrot,bird,24.0,fly
lion,mammal,80.5,run
monkey,mammal,,jump


In [92]:
#drop index
species.reset_index(level = 'name', drop = True)

Unnamed: 0_level_0,speed,species
Unnamed: 0_level_1,max,type
class,Unnamed: 1_level_2,Unnamed: 2_level_2
bird,389.0,fly
bird,24.0,fly
mammal,80.5,run
mammal,,jump


# set_index

```python
DataFrame.set_index(
    keys,
    drop=True,
    append=False,
    inplace=False,
    verify_integrity=False,
)
```

set 1 or more columns to be the index

In [116]:
data = TF.reset_index().rename(columns = {'index' : 'name'})
data

Unnamed: 0,name,clan,level,average damage
0,VN Pikachu,VN Champions,30,55971
1,ARMAGEDON,Dirilis,35,58730
2,Rek151,TF CUSTOMIZERS,32,75321
3,Monkey,Daemones,34,345321
4,Morino Nanako,VN Champions,33,62132


In [118]:
#set 1 column
data.set_index('name')

Unnamed: 0_level_0,clan,level,average damage
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
VN Pikachu,VN Champions,30,55971
ARMAGEDON,Dirilis,35,58730
Rek151,TF CUSTOMIZERS,32,75321
Monkey,Daemones,34,345321
Morino Nanako,VN Champions,33,62132


In [121]:
#set multiple column, inplace
data.set_index(['clan', 'name'], inplace = True)
data

Unnamed: 0_level_0,Unnamed: 1_level_0,level,average damage
clan,name,Unnamed: 2_level_1,Unnamed: 3_level_1
VN Champions,VN Pikachu,30,55971
Dirilis,ARMAGEDON,35,58730
TF CUSTOMIZERS,Rek151,32,75321
Daemones,Monkey,34,345321
VN Champions,Morino Nanako,33,62132
