# Reindexing

Reindexing changes the row labels and column labels for a DataFrame.

To reindex means to conform the data to match a given set of labels along a particular axis.

In [2]:
import pandas as pd
import numpy as np

In [3]:
N = 20

In [11]:
df = pd.DataFrame({
    "A": pd.date_range(start="2016-01-01", periods=N, freq='D'),
    'x': np.linspace(0, stop=N-1, num=N),
    'y': np.random.rand(N),
    'C': np.random.choice(['Low', 'Medium', 'High'], N).tolist(),
    'D': np.random.normal(100, 10, size=(N)).tolist()
})

In [12]:
df

Unnamed: 0,A,x,y,C,D
0,2016-01-01,0.0,0.954905,High,105.967861
1,2016-01-02,1.0,0.683841,High,120.069833
2,2016-01-03,2.0,0.45513,Medium,107.188836
3,2016-01-04,3.0,0.450606,Medium,103.719434
4,2016-01-05,4.0,0.757775,Low,102.074033
5,2016-01-06,5.0,0.761819,High,89.002751
6,2016-01-07,6.0,0.575315,High,88.508318
7,2016-01-08,7.0,0.435351,Medium,92.907876
8,2016-01-09,8.0,0.014237,Low,111.210027
9,2016-01-10,9.0,0.456064,High,99.446029


평균 100, 표준편차 10인 경우, 약 68%의 값이 평균에서 $$\pm 1$$ 표준편차(90에서 110) 범위 내에 들어가는데, 나머지 32%의 값은 이 범위를 벗어날 수 있기 때문에 위에서 보고 있는 것처럼 120 근처의 값이랑, 78 정도의 값이 도출될 수 있는 것이다.

In [13]:
df_reindexed = df.reindex(index=[0, 2, 5], columns=['A', 'C', 'B'])

In [14]:
df_reindexed

Unnamed: 0,A,C,B
0,2016-01-01,High,
2,2016-01-03,Medium,
5,2016-01-06,High,


# Reindex to Align with Other Objects

In [15]:
import pandas as pd
import numpy as np

In [27]:
df1 = pd.DataFrame(np.random.randn(10, 3), columns=['col1', 'col2', 'col3'])
df2 = pd.DataFrame(np.random.randn(7, 3), columns=['col1', 'col2', 'col3'])

In [28]:
df1

Unnamed: 0,col1,col2,col3
0,0.910843,-0.23771,0.797412
1,1.062355,-0.181421,1.246505
2,-0.495741,1.126873,-0.903058
3,-1.17417,-1.706356,0.320435
4,0.824872,-0.037775,-0.379165
5,0.383134,1.505557,-0.593989
6,0.979732,-1.263265,-1.215306
7,-0.758765,0.113836,1.492569
8,-0.966004,0.49078,-0.996606
9,0.498009,1.158289,0.83316


In [18]:
df2

Unnamed: 0,col1,col2,col3
0,0.246039,0.122285,-0.381072
1,-0.387414,-0.878496,-0.479584
2,-0.038879,-0.45588,0.251671
3,0.195856,-0.686303,-0.977542
4,2.014761,1.559506,0.193647
5,-0.183643,1.528969,-1.038181
6,-0.209966,-0.211482,0.287373


In [21]:
df3 = df2

In [25]:
df3 = df3.drop('col1', axis=1)

In [26]:
df3

Unnamed: 0,col2,col3
0,0.122285,-0.381072
1,-0.878496,-0.479584
2,-0.45588,0.251671
3,-0.686303,-0.977542
4,1.559506,0.193647
5,1.528969,-1.038181
6,-0.211482,0.287373


In [29]:
df1 = df1.reindex_like(df3)

In [30]:
df1

Unnamed: 0,col2,col3
0,-0.23771,0.797412
1,-0.181421,1.246505
2,1.126873,-0.903058
3,-1.706356,0.320435
4,-0.037775,-0.379165
5,1.505557,-0.593989
6,-1.263265,-1.215306


In [19]:
df1 = df1.reindex_like(df2)

In [20]:
df1

Unnamed: 0,col1,col2,col3
0,1.152795,1.556473,-0.315966
1,-1.02061,-0.492461,-0.113144
2,-0.583347,1.276641,1.00922
3,0.476605,-0.694254,-0.509444
4,-1.028313,0.317305,0.838202
5,-0.518441,-1.837497,0.573793
6,0.110713,0.145388,-0.357553


# Filling with ReIndexing

reindex() takes an optional parameter method which is a filling method with values as follows:


* pad / ffill -- Fill values forward
* bfill / backfill -- Fill values backward
* nearest -- Fill from the nearest index values

In [31]:
import pandas as pd
import numpy as np

In [32]:
df1 = pd.DataFrame(np.random.randn(6, 3), columns=['col1', 'col2', 'col3'])
df2 = pd.DataFrame(np.random.randn(2, 3), columns=['col1', 'col2', 'col3'])

In [33]:
df1

Unnamed: 0,col1,col2,col3
0,-0.107919,-2.559443,0.692226
1,-1.322441,1.538948,2.749281
2,0.643396,1.343335,-0.190361
3,-1.262248,-0.477421,0.007466
4,0.491754,1.015808,-2.106218
5,0.875382,-1.439265,-0.951158


In [34]:
df2

Unnamed: 0,col1,col2,col3
0,0.038969,-0.034377,0.389862
1,-0.469447,-0.17388,1.365734


In [35]:
df1.reindex_like(df2)

Unnamed: 0,col1,col2,col3
0,-0.107919,-2.559443,0.692226
1,-1.322441,1.538948,2.749281


In [36]:
df2.reindex_like(df1)

Unnamed: 0,col1,col2,col3
0,0.038969,-0.034377,0.389862
1,-0.469447,-0.17388,1.365734
2,,,
3,,,
4,,,
5,,,


In [37]:
df2.reindex_like(df1, method = "ffill")

Unnamed: 0,col1,col2,col3
0,0.038969,-0.034377,0.389862
1,-0.469447,-0.17388,1.365734
2,-0.469447,-0.17388,1.365734
3,-0.469447,-0.17388,1.365734
4,-0.469447,-0.17388,1.365734
5,-0.469447,-0.17388,1.365734


In [54]:
count = 0
b = []

while True:
    a = input()

    if a == "0":
        break

    if "mo" in a:
        count = count + 1

        b.append(a)

print(count)

for i in b:
    print(i)

2
mouse
monitor
