# DataFrame 이리저리 조작하기

In [2]:
import numpy as np
import pandas as pd

In [4]:
df = pd.DataFrame(np.random.randn(6,4))
df

Unnamed: 0,0,1,2,3
0,-0.920283,1.105762,0.016225,1.121381
1,0.253941,-1.311256,0.194469,0.064023
2,-1.790089,-0.461627,1.214021,-0.05356
3,-0.223895,1.133097,0.31712,-0.60025
4,-1.511891,0.039647,0.704621,2.10596
5,-0.409408,-1.672791,-1.297271,0.921614


In [5]:
df.columns = ["A","B","C","D"]
df.index = pd.date_range("20160701",periods=6)
df

Unnamed: 0,A,B,C,D
2016-07-01,-0.920283,1.105762,0.016225,1.121381
2016-07-02,0.253941,-1.311256,0.194469,0.064023
2016-07-03,-1.790089,-0.461627,1.214021,-0.05356
2016-07-04,-0.223895,1.133097,0.31712,-0.60025
2016-07-05,-1.511891,0.039647,0.704621,2.10596
2016-07-06,-0.409408,-1.672791,-1.297271,0.921614


In [7]:
df.index

DatetimeIndex(['2016-07-01', '2016-07-02', '2016-07-03', '2016-07-04',
               '2016-07-05', '2016-07-06'],
              dtype='datetime64[ns]', freq='D')

In [8]:
df["F"] = [1.0, np.nan, 3.5, 6.1, np.nan, 7.0]
df

Unnamed: 0,A,B,C,D,F
2016-07-01,-0.920283,1.105762,0.016225,1.121381,1.0
2016-07-02,0.253941,-1.311256,0.194469,0.064023,
2016-07-03,-1.790089,-0.461627,1.214021,-0.05356,3.5
2016-07-04,-0.223895,1.133097,0.31712,-0.60025,6.1
2016-07-05,-1.511891,0.039647,0.704621,2.10596,
2016-07-06,-0.409408,-1.672791,-1.297271,0.921614,7.0


## dropna 
 - how 를 통해 삭제 방법을 선택한다
 - how = any를 할 경우 뭐라도 하나가 NaN는 그 행을 삭제 

In [10]:
df.dropna(how="any")

Unnamed: 0,A,B,C,D,F
2016-07-01,-0.920283,1.105762,0.016225,1.121381,1.0
2016-07-03,-1.790089,-0.461627,1.214021,-0.05356,3.5
2016-07-04,-0.223895,1.133097,0.31712,-0.60025,6.1
2016-07-06,-0.409408,-1.672791,-1.297271,0.921614,7.0


 - how = all 
 - 전체가 NaN일 경우 제거 

In [12]:
df.dropna(how="all")

Unnamed: 0,A,B,C,D,F
2016-07-01,-0.920283,1.105762,0.016225,1.121381,1.0
2016-07-02,0.253941,-1.311256,0.194469,0.064023,
2016-07-03,-1.790089,-0.461627,1.214021,-0.05356,3.5
2016-07-04,-0.223895,1.133097,0.31712,-0.60025,6.1
2016-07-05,-1.511891,0.039647,0.704621,2.10596,
2016-07-06,-0.409408,-1.672791,-1.297271,0.921614,7.0


## fillna
 - Na 값에 해당 값을 넣는다. 
 - 해당 DF에 들어간 것은 아니다. 

In [16]:
df.fillna(value=5.0)

Unnamed: 0,A,B,C,D,F
2016-07-01,-0.920283,1.105762,0.016225,1.121381,1.0
2016-07-02,0.253941,-1.311256,0.194469,0.064023,5.0
2016-07-03,-1.790089,-0.461627,1.214021,-0.05356,3.5
2016-07-04,-0.223895,1.133097,0.31712,-0.60025,6.1
2016-07-05,-1.511891,0.039647,0.704621,2.10596,5.0
2016-07-06,-0.409408,-1.672791,-1.297271,0.921614,7.0


In [18]:
df.isnull()

Unnamed: 0,A,B,C,D,F
2016-07-01,False,False,False,False,False
2016-07-02,False,False,False,False,True
2016-07-03,False,False,False,False,False
2016-07-04,False,False,False,False,False
2016-07-05,False,False,False,False,True
2016-07-06,False,False,False,False,False


In [20]:
df.loc[df.isnull()["F"],:]

Unnamed: 0,A,B,C,D,F
2016-07-02,0.253941,-1.311256,0.194469,0.064023,
2016-07-05,-1.511891,0.039647,0.704621,2.10596,


In [22]:
pd.to_datetime("20160701")

Timestamp('2016-07-01 00:00:00')

## DF.drop(index)
 - 복수개일 경우 drop([,])로 사용. 

In [24]:
df.drop(pd.to_datetime("20160701")) # Index를 통해 삭제. 

Unnamed: 0,A,B,C,D,F
2016-07-02,0.253941,-1.311256,0.194469,0.064023,
2016-07-03,-1.790089,-0.461627,1.214021,-0.05356,3.5
2016-07-04,-0.223895,1.133097,0.31712,-0.60025,6.1
2016-07-05,-1.511891,0.039647,0.704621,2.10596,
2016-07-06,-0.409408,-1.672791,-1.297271,0.921614,7.0


In [26]:
df.drop([pd.to_datetime("20160701"),pd.to_datetime("20160704")]) # Index를 통해 삭제. 

Unnamed: 0,A,B,C,D,F
2016-07-02,0.253941,-1.311256,0.194469,0.064023,
2016-07-03,-1.790089,-0.461627,1.214021,-0.05356,3.5
2016-07-05,-1.511891,0.039647,0.704621,2.10596,
2016-07-06,-0.409408,-1.672791,-1.297271,0.921614,7.0


In [28]:
df.drop("F",axis=1) # axis = 1 열 

Unnamed: 0,A,B,C,D
2016-07-01,-0.920283,1.105762,0.016225,1.121381
2016-07-02,0.253941,-1.311256,0.194469,0.064023
2016-07-03,-1.790089,-0.461627,1.214021,-0.05356
2016-07-04,-0.223895,1.133097,0.31712,-0.60025
2016-07-05,-1.511891,0.039647,0.704621,2.10596
2016-07-06,-0.409408,-1.672791,-1.297271,0.921614


In [29]:
df.drop(["B","F"],axis=1)

Unnamed: 0,A,C,D
2016-07-01,-0.920283,0.016225,1.121381
2016-07-02,0.253941,0.194469,0.064023
2016-07-03,-1.790089,1.214021,-0.05356
2016-07-04,-0.223895,0.31712,-0.60025
2016-07-05,-1.511891,0.704621,2.10596
2016-07-06,-0.409408,-1.297271,0.921614
