# DataFrame

In [229]:
import pandas as pd
import numpy as np

width = 77
df = pd.DataFrame(np.arange(10,22).reshape(3,4), index=["a","b","c"], columns=['A','B','C','D'])
df

Unnamed: 0,A,B,C,D
a,10,11,12,13
b,14,15,16,17
c,18,19,20,21


In [28]:
for column_name, column_series in df.items():
    print(type(column_name), column_name)
    for index_key, cell_value in column_series.items():
        print(index_key, cell_value)

<class 'str'> A
a 10
b 14
c 18
<class 'str'> B
a 11
b 15
c 19
<class 'str'> C
a 12
b 16
c 20
<class 'str'> D
a 13
b 17
c 21


## Selection

### Column

In [32]:
temp = df[df["B"] > 11]
print(type(temp), '\n', temp)

<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
b  14  15  16  17


In [185]:
s = df["B"] > 11
print(type(s), '\n', s)
temp = df[s]
print(type(temp), '\n', temp)

<class 'pandas.core.series.Series'> 
 a    False
b     True
c     True
Name: B, dtype: bool
<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
b  14  15  16  17
c  18  19  20  21


In [34]:
s = pd.Series([False,True,False], index=['a','b','c'])
print(type(s), '\n', s)

<class 'pandas.core.series.Series'> 
 a    False
b     True
c    False
dtype: bool


### Row

In [114]:
temp = df.loc[df.A > 15]
print(type(temp), '\n', temp)

<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
c  18  19  20  21


In [113]:
s = df.A > 15
print(type(s), '\n', s)
temp = df.loc[s]
print(type(temp), '\n', temp)

<class 'pandas.core.series.Series'> 
 a    False
b    False
c     True
Name: A, dtype: bool
<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
c  18  19  20  21


In [118]:
s = pd.Series([True,True,False], index=['a','b','c'])
print(type(s), '\n', s)
temp = df.loc[s]
print(type(temp), '\n', temp)

<class 'pandas.core.series.Series'> 
 a     True
b     True
c    False
dtype: bool
<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
a  10  11  12  13
b  14  15  16  17


## indexing & Slicing

In [232]:
column_series = df["B"] # Series (column 기준)
print(type(column_series), '\n', column_series)
print("-" * width)
row_series = df[1:2] # DataFrame (row 기준 : slicing 사용시 default index 기준으로 처리)
print(type(row_series), '\n', row_series)

<class 'pandas.core.series.Series'> 
 a    11
b    15
c    19
Name: B, dtype: int64
-----------------------------------------------------------------------------
<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
b  14  15  16  17


In [231]:
column_series = df["B"][:'b'] # Series의 slicing (column 기준)
print(type(series2), '\n', series2)
print("-" * width)
series1 = df['B']
print(type(series1), '\n', series1)
series2 = series1[:'b']
print(type(series2), '\n', series2)

<class 'pandas.core.series.Series'> 
 a    11
b    15
Name: B, dtype: int64
-----------------------------------------------------------------------------
<class 'pandas.core.series.Series'> 
 a    11
b    15
c    19
Name: B, dtype: int64
<class 'pandas.core.series.Series'> 
 a    11
b    15
Name: B, dtype: int64


In [233]:
temp = df[1:3] # DataFrame (row 기준으로 slicing 처리 : default index)
print(type(temp), '\n', temp)
temp = df.iloc[1:3] # DataFrame (row 기준으로 slicing 처리 : default index)
print(type(temp), '\n', temp)
print("-" * width)
temp = df["b":"c"] # DataFrame (row 기준으로 slicing 처리 : user index)
print(type(temp), '\n', temp)
temp = df.loc[['b','c']] # DataFrame  (row 기준으로 slicing 처리 : user index)
print(type(temp), '\n', temp)

<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
b  14  15  16  17
c  18  19  20  21
<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
b  14  15  16  17
c  18  19  20  21
-----------------------------------------------------------------------------
<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
b  14  15  16  17
c  18  19  20  21
<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
b  14  15  16  17
c  18  19  20  21


### Column

In [80]:
temp = df["B"] # Series
print(type(temp), '\n', temp)
temp = df["B"][:] # Series
print(type(temp), '\n', temp)

<class 'pandas.core.series.Series'> 
 a    11
b    15
c    19
Name: B, dtype: int64
<class 'pandas.core.series.Series'> 
 a    11
b    15
c    19
Name: B, dtype: int64


In [47]:
temp = df[["B"]] # DataFrame
print(type(temp), '\n', temp)

<class 'pandas.core.frame.DataFrame'> 
     B
a  11
b  15
c  19


In [48]:
temp = df[["B","D"]] # DataFrame
print(type(temp), '\n', temp)

<class 'pandas.core.frame.DataFrame'> 
     B   D
a  11  13
b  15  17
c  19  21


### Row

In [81]:
temp = df.loc['a'] # Series
print(type(temp), '\n', temp)
temp = df.loc['a', :] # Series
print(type(temp), '\n', temp)

<class 'pandas.core.series.Series'> 
 A    10
B    11
C    12
D    13
Name: a, dtype: int64
<class 'pandas.core.series.Series'> 
 A    10
B    11
C    12
D    13
Name: a, dtype: int64


In [82]:
temp = df.loc['a', 'B':'C'] # Series
print(type(temp), '\n', temp)
temp = df.loc['a', ['B','C']] # Series
print(type(temp), '\n', temp)

<class 'pandas.core.series.Series'> 
 B    11
C    12
Name: a, dtype: int64
<class 'pandas.core.series.Series'> 
 B    11
C    12
Name: a, dtype: int64


In [98]:
temp = df.loc[['a']] # DataFrame
print(type(temp), '\n', temp)
temp = df.loc[['a'], :] # DataFrame
print(type(temp), '\n', temp)
temp = df.loc['a':'a'] # DataFrame
print(type(temp), '\n', temp)
temp = df.iloc[0:1] # DataFrame
print(type(temp), '\n', temp)

<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
a  10  11  12  13
<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
a  10  11  12  13
<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
a  10  11  12  13
<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
a  10  11  12  13


In [84]:
temp = df.loc[['a'], 'B':'C'] # DataFrame
print(type(temp), '\n', temp)
temp = df.loc[['a'], ['B','C']] # DataFrame
print(type(temp), '\n', temp)

<class 'pandas.core.frame.DataFrame'> 
     B   C
a  11  12
<class 'pandas.core.frame.DataFrame'> 
     B   C
a  11  12


In [53]:
temp = df.loc[['a','c']] # DataFrame
print(type(temp), '\n', temp)

<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
a  10  11  12  13
c  18  19  20  21


### Cell

In [87]:
print(type(df['A']['a']), df['A']['a']) # df['A','a'] : Error

<class 'numpy.int64'> 10


In [131]:
print(df.loc['a',"A"], df.loc['a']["A"])
print(df.iloc[0,0])
print(df.iloc[0][0]) # FutureWarning

10 10
10
10




### 중요

In [99]:
temp = df.loc[['a','b']]['A'] # Series
print(type(temp), '\n', temp)
temp = df.loc[['a','b'],'A'] # Series
print(type(temp), '\n', temp)
temp = df.loc[['a','b'],['A']] # DataFrame
print(type(temp), '\n', temp)

<class 'pandas.core.series.Series'> 
 a    10
b    14
Name: A, dtype: int64
<class 'pandas.core.series.Series'> 
 a    10
b    14
Name: A, dtype: int64
<class 'pandas.core.frame.DataFrame'> 
     A
a  10
b  14


In [102]:
temp = df.loc['a'] # Series
print(type(temp), '\n', temp)
temp = df.loc[['a']] # DataFrame
print(type(temp), '\n', temp)
temp = df.loc['a', :] # Series
print(type(temp), '\n', temp)
temp = df.loc[['a'], :] # DataFrame
print(type(temp), '\n', temp)

<class 'pandas.core.series.Series'> 
 A    10
B    11
C    12
D    13
Name: a, dtype: int64
<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
a  10  11  12  13
<class 'pandas.core.series.Series'> 
 A    10
B    11
C    12
D    13
Name: a, dtype: int64
<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
a  10  11  12  13


In [129]:
temp = df.loc['b':] # DataFrame
print(type(temp), '\n', temp)
temp = df.loc['b':,'A'] # Series
print(type(temp), '\n', temp)
temp = df.loc['b':]['A'] # Series
print(type(temp), '\n', temp)
temp = df.loc['b':][['A']] # DataFrame
print(type(temp), '\n', temp)
temp = df.loc['b':,['A']] # DataFrame
print(type(temp), '\n', temp)
temp = df.loc['b':,'A':'A'] # DataFrame
print(type(temp), '\n', temp)
temp = df.loc['b':]['b':'b'] # DataFrame ???????
print(type(temp), '\n', temp)

<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
b  14  15  16  17
c  18  19  20  21
<class 'pandas.core.series.Series'> 
 b    14
c    18
Name: A, dtype: int64
<class 'pandas.core.series.Series'> 
 b    14
c    18
Name: A, dtype: int64
<class 'pandas.core.frame.DataFrame'> 
     A
b  14
c  18
<class 'pandas.core.frame.DataFrame'> 
     A
b  14
c  18
<class 'pandas.core.frame.DataFrame'> 
     A
b  14
c  18
<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
b  14  15  16  17


In [136]:
temp = df['a':'b'] # DataFrame
print(type(temp), '\n', temp)
temp = df.loc['a':'b'] # DataFrame
print(type(temp), '\n', temp)
temp = df.loc[['a','b']] # DataFrame
print(type(temp), '\n', temp)
temp = df.loc[['a','b']][['B','D']] # DataFrame
print(type(temp), '\n', temp)
temp = df.loc[['a','b'],['B','D']] # DataFrame
print(type(temp), '\n', temp)

<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
a  10  11  12  13
b  14  15  16  17
<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
a  10  11  12  13
b  14  15  16  17
<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
a  10  11  12  13
b  14  15  16  17
<class 'pandas.core.frame.DataFrame'> 
     B   D
a  11  13
b  15  17
<class 'pandas.core.frame.DataFrame'> 
     B   D
a  11  13
b  15  17


In [112]:
temp = df[1:2]
print(type(temp), '\n', temp)
temp = df.iloc[1:2]
print(type(temp), '\n', temp)
temp = df['b':'b']
print(type(temp), '\n', temp)
temp = df.loc['b':'b']
print(type(temp), '\n', temp)

<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
b  14  15  16  17
<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
b  14  15  16  17
<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
b  14  15  16  17
<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
b  14  15  16  17


In [184]:
temp = df.iloc[0,1]
print(type(temp), '\n', temp)
temp = df.iloc[0:2,1:2]
print(type(temp), '\n', temp)
temp = df.iloc[0:2][1:2]
print(type(temp), '\n', temp)

<class 'numpy.int64'> 
 11
<class 'pandas.core.frame.DataFrame'> 
     B
a  11
b  15
<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
b  14  15  16  17


#### index가 1부터 시작되는 정수인 경우 df[1:2] 결과는?

In [225]:
df1 = pd.DataFrame(np.arange(10,22).reshape(3,4), index=[1,3,2], columns=['A','B','C','D'])
print(type(df1), '\n', df1)
temp = df1[1:2] # 무조건 0부터 시작되는 default index 기준으로 처리
print(type(temp), '\n', temp)
temp = df1.iloc[1:2] # 무조건 0부터 시작되는 default index 기준으로 처리
print(type(temp), '\n', temp)
temp = df1.loc[1:2] # 생성시 주어진 index를 기준으로 처리
print(type(temp), '\n', temp)
print(type(df1.index), '\n', df1.index)
print()
df2 = pd.DataFrame(np.arange(10,22).reshape(3,4), columns=['A','B','C','D'])
print(type(df2), '\n', df2)
temp = df2[1:2]
print(type(temp), '\n', temp)

<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
1  10  11  12  13
3  14  15  16  17
2  18  19  20  21
<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
3  14  15  16  17
<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
3  14  15  16  17
<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
1  10  11  12  13
3  14  15  16  17
2  18  19  20  21
<class 'pandas.core.indexes.base.Index'> 
 Index([1, 3, 2], dtype='int64')

<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
0  10  11  12  13
1  14  15  16  17
2  18  19  20  21
<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
1  14  15  16  17


## Function

### info()

In [177]:
temp = df.info()
print(type(temp), '\n', temp)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, a to c
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   A       3 non-null      int64
 1   B       3 non-null      int64
 2   C       3 non-null      int64
 3   D       3 non-null      int64
dtypes: int64(4)
memory usage: 228.0+ bytes
<class 'NoneType'> 
 None


### describe()

In [176]:
temp = df.describe()
print(type(temp), '\n', temp)

<class 'pandas.core.frame.DataFrame'> 
           A     B     C     D
count   3.0   3.0   3.0   3.0
mean   14.0  15.0  16.0  17.0
std     4.0   4.0   4.0   4.0
min    10.0  11.0  12.0  13.0
25%    12.0  13.0  14.0  15.0
50%    14.0  15.0  16.0  17.0
75%    16.0  17.0  18.0  19.0
max    18.0  19.0  20.0  21.0


### value_counts()

In [171]:
print(type(df), '\n', df)
df1 = df.copy()
print(id(df1) == id(df)) # False (깊은복사)
print(id(df1['A']) == id(df['A'])) # False (깊은복사)
print(id(df1.loc['a']) == id(df.loc['a'])) # True (얕은복사)
#
df1.loc['d'] = [10,11,12,13] # 행 추가
print(type(df1), '\n', df1)
vc = df1.value_counts()
print(type(vc), '\n', vc)
vc = df['A'].value_counts()
print(type(vc), '\n', vc)
vc = df['A'].value_counts(normalize=True)
print(type(vc), '\n', vc)

<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
a  10  11  12  13
b  14  15  16  17
c  18  19  20  21
False
False
True
<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
a  10  11  12  13
b  14  15  16  17
c  18  19  20  21
d  10  11  12  13
<class 'pandas.core.series.Series'> 
 A   B   C   D 
10  11  12  13    2
14  15  16  17    1
18  19  20  21    1
Name: count, dtype: int64
<class 'pandas.core.series.Series'> 
 A
10    1
14    1
18    1
Name: count, dtype: int64
<class 'pandas.core.series.Series'> 
 A
10    0.333333
14    0.333333
18    0.333333
Name: proportion, dtype: float64


### count(axis=0) : NaN은 포함시키지 않음

In [160]:
print(type(df), '\n', df)
temp = df.count() # axis=0
print(type(temp), '\n', temp)
temp = df.count(axis=1)
print(type(temp), '\n', temp)

<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
a  10  11  12  13
b  14  15  16  17
c  18  19  20  21
<class 'pandas.core.series.Series'> 
 A    3
B    3
C    3
D    3
dtype: int64
<class 'pandas.core.series.Series'> 
 a    4
b    4
c    4
dtype: int64


### sum(axis=0) : 합계

In [155]:
print(type(df), '\n', df)
temp = df.sum() # axis=0
print(type(temp), '\n', temp)
temp = df.sum(axis=1)
print(type(temp), '\n', temp)

<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
a  10  11  12  13
b  14  15  16  17
c  18  19  20  21
<class 'pandas.core.series.Series'> 
 A    42
B    45
C    48
D    51
dtype: int64
<class 'pandas.core.series.Series'> 
 a    46
b    62
c    78
dtype: int64


### mean(axis=0) : 평균

In [154]:
print(type(df), '\n', df)
temp = df.mean() # axis=0
print(type(temp), '\n', temp)
temp = df.mean(axis=1)
print(type(temp), '\n', temp)

<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
a  10  11  12  13
b  14  15  16  17
c  18  19  20  21
<class 'pandas.core.series.Series'> 
 A    14.0
B    15.0
C    16.0
D    17.0
dtype: float64
<class 'pandas.core.series.Series'> 
 a    11.5
b    15.5
c    19.5
dtype: float64


### min(axis=0) : 최소

In [153]:
print(type(df), '\n', df)
temp = df.min() # axis=0
print(type(temp), '\n', temp)
temp = df.min(axis=1)
print(type(temp), '\n', temp)

<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
a  10  11  12  13
b  14  15  16  17
c  18  19  20  21
<class 'pandas.core.series.Series'> 
 A    10
B    11
C    12
D    13
dtype: int64
<class 'pandas.core.series.Series'> 
 a    10
b    14
c    18
dtype: int64


### max(axis=0) : 최대

In [156]:
print(type(df), '\n', df)
temp = df.max() # axis=0
print(type(temp), '\n', temp)
temp = df.max(axis=1)
print(type(temp), '\n', temp)

<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
a  10  11  12  13
b  14  15  16  17
c  18  19  20  21
<class 'pandas.core.series.Series'> 
 A    18
B    19
C    20
D    21
dtype: int64
<class 'pandas.core.series.Series'> 
 a    13
b    17
c    21
dtype: int64


### var(axis=0) : 분산

In [174]:
print(type(df), '\n', df)
temp = df.var() # axis=0
print(type(temp), '\n', temp)
temp = df.var(axis=1)
print(type(temp), '\n', temp)

<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
a  10  11  12  13
b  14  15  16  17
c  18  19  20  21
<class 'pandas.core.series.Series'> 
 A    16.0
B    16.0
C    16.0
D    16.0
dtype: float64
<class 'pandas.core.series.Series'> 
 a    1.666667
b    1.666667
c    1.666667
dtype: float64


### std(axis=0) : 표준편차

In [172]:
print(type(df), '\n', df)
temp = df.std() # axis=0
print(type(temp), '\n', temp)
temp = df.std(axis=1)
print(type(temp), '\n', temp)

<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
a  10  11  12  13
b  14  15  16  17
c  18  19  20  21
<class 'pandas.core.series.Series'> 
 A    4.0
B    4.0
C    4.0
D    4.0
dtype: float64
<class 'pandas.core.series.Series'> 
 a    1.290994
b    1.290994
c    1.290994
dtype: float64


### median(axis=0) : 중앙값

In [178]:
print(type(df), '\n', df)
temp = df.median() # axis=0
print(type(temp), '\n', temp)
temp = df.median(axis=1)
print(type(temp), '\n', temp)

<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
a  10  11  12  13
b  14  15  16  17
c  18  19  20  21
<class 'pandas.core.series.Series'> 
 A    14.0
B    15.0
C    16.0
D    17.0
dtype: float64
<class 'pandas.core.series.Series'> 
 a    11.5
b    15.5
c    19.5
dtype: float64


### quantile(q, ...) : 백분위수

In [179]:
print(type(df), '\n', df)
temp = df.quantile(0.5) # 중앙값
print(type(temp), '\n', temp)

<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
a  10  11  12  13
b  14  15  16  17
c  18  19  20  21
<class 'pandas.core.series.Series'> 
 A    14.0
B    15.0
C    16.0
D    17.0
Name: 0.5, dtype: float64


### sort_values(by=[], ascending=True)

In [248]:
print(type(df), '\n', df)
temp = df.sort_values(by='C')
print(type(temp), '\n', temp)
print(id(temp) == id(df)) # False
print(id(temp.iloc[0]) == id(df.iloc[0])) # True

<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
a  10  11  12  13
b  14  15  16  17
c  18  19  20  21
<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
a  10  11  12  13
b  14  15  16  17
c  18  19  20  21
False
True


In [247]:
print(type(temp), '\n', temp)
temp = df.sort_values(by=['C','B'], ascending=False)
print(type(temp), '\n', temp)
print(id(temp) == id(df)) # False
print(id(temp.iloc[0]) == id(df.iloc[0])) # True

<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
c  18  19  20  21
b  14  15  16  17
a  10  11  12  13
<class 'pandas.core.frame.DataFrame'> 
     A   B   C   D
c  18  19  20  21
b  14  15  16  17
a  10  11  12  13
False
True


### sort_index(ascending=True)

In [254]:
df1 = pd.DataFrame({'num_legs':[2,4,4,5], 'num_wings':[2,0,0,0]}, index=['falcon','cat','dog','ant'])
df1

Unnamed: 0,num_legs,num_wings
falcon,2,2
cat,4,0
dog,4,0
ant,5,0


In [255]:
temp = df1.sort_index()
print(type(temp), '\n', temp)
print(type(df1), '\n', df1)

<class 'pandas.core.frame.DataFrame'> 
         num_legs  num_wings
ant            5          0
cat            4          0
dog            4          0
falcon         2          2
<class 'pandas.core.frame.DataFrame'> 
         num_legs  num_wings
falcon         2          2
cat            4          0
dog            4          0
ant            5          0


In [257]:
temp = df1.sort_index(ascending=False)
print(type(temp), '\n', temp)
print(type(df1), '\n', df1)

<class 'pandas.core.frame.DataFrame'> 
         num_legs  num_wings
falcon         2          2
dog            4          0
cat            4          0
ant            5          0
<class 'pandas.core.frame.DataFrame'> 
         num_legs  num_wings
falcon         2          2
cat            4          0
dog            4          0
ant            5          0


### drop(inplace=False)

In [303]:
np.random.seed(1)
df2 = pd.DataFrame(np.random.randint(10,size=(4,8)))
df2

Unnamed: 0,0,1,2,3,4,5,6,7
0,5,8,9,5,0,0,1,7
1,6,9,2,4,5,2,4,2
2,4,7,7,9,1,7,0,6
3,9,9,7,6,9,1,0,1


In [291]:
df2.loc['max'] = df2.max()
df2.loc['min'] = df2.min(axis=0)
df2['sum'] = df2[[0,1,2,3,4,5,6,7]].sum(axis=1)
df2

Unnamed: 0,0,1,2,3,4,5,6,7,sum
0,5,8,9,5,0,0,1,7,35
1,6,9,2,4,5,2,4,2,34
2,4,7,7,9,1,7,0,6,41
3,9,9,7,6,9,1,0,1,42
max,9,9,9,9,9,7,4,7,63
min,4,7,2,4,0,0,0,1,18


In [292]:
print(type(df2), '\n', df2)
temp = df2.drop(index=['max','min'])
print(type(temp), '\n', temp)
print(type(df2), '\n', df2)

<class 'pandas.core.frame.DataFrame'> 
      0  1  2  3  4  5  6  7  sum
0    5  8  9  5  0  0  1  7   35
1    6  9  2  4  5  2  4  2   34
2    4  7  7  9  1  7  0  6   41
3    9  9  7  6  9  1  0  1   42
max  9  9  9  9  9  7  4  7   63
min  4  7  2  4  0  0  0  1   18
<class 'pandas.core.frame.DataFrame'> 
    0  1  2  3  4  5  6  7  sum
0  5  8  9  5  0  0  1  7   35
1  6  9  2  4  5  2  4  2   34
2  4  7  7  9  1  7  0  6   41
3  9  9  7  6  9  1  0  1   42
<class 'pandas.core.frame.DataFrame'> 
      0  1  2  3  4  5  6  7  sum
0    5  8  9  5  0  0  1  7   35
1    6  9  2  4  5  2  4  2   34
2    4  7  7  9  1  7  0  6   41
3    9  9  7  6  9  1  0  1   42
max  9  9  9  9  9  7  4  7   63
min  4  7  2  4  0  0  0  1   18


In [293]:
print(type(df2), '\n', df2)
temp = df2.drop(index='max', inplace=True)
print(type(temp), '\n', temp)
print(type(df2), '\n', df2)

<class 'pandas.core.frame.DataFrame'> 
      0  1  2  3  4  5  6  7  sum
0    5  8  9  5  0  0  1  7   35
1    6  9  2  4  5  2  4  2   34
2    4  7  7  9  1  7  0  6   41
3    9  9  7  6  9  1  0  1   42
max  9  9  9  9  9  7  4  7   63
min  4  7  2  4  0  0  0  1   18
<class 'NoneType'> 
 None
<class 'pandas.core.frame.DataFrame'> 
      0  1  2  3  4  5  6  7  sum
0    5  8  9  5  0  0  1  7   35
1    6  9  2  4  5  2  4  2   34
2    4  7  7  9  1  7  0  6   41
3    9  9  7  6  9  1  0  1   42
min  4  7  2  4  0  0  0  1   18


In [294]:
print(type(df2), '\n', df2)
temp = df2.drop(columns=['sum',0])
print(type(temp), '\n', temp)
print(type(df2), '\n', df2)

<class 'pandas.core.frame.DataFrame'> 
      0  1  2  3  4  5  6  7  sum
0    5  8  9  5  0  0  1  7   35
1    6  9  2  4  5  2  4  2   34
2    4  7  7  9  1  7  0  6   41
3    9  9  7  6  9  1  0  1   42
min  4  7  2  4  0  0  0  1   18
<class 'pandas.core.frame.DataFrame'> 
      1  2  3  4  5  6  7
0    8  9  5  0  0  1  7
1    9  2  4  5  2  4  2
2    7  7  9  1  7  0  6
3    9  7  6  9  1  0  1
min  7  2  4  0  0  0  1
<class 'pandas.core.frame.DataFrame'> 
      0  1  2  3  4  5  6  7  sum
0    5  8  9  5  0  0  1  7   35
1    6  9  2  4  5  2  4  2   34
2    4  7  7  9  1  7  0  6   41
3    9  9  7  6  9  1  0  1   42
min  4  7  2  4  0  0  0  1   18


In [295]:
print(type(df2), '\n', df2)
temp = df2.drop(columns='sum', inplace=True)
print(type(temp), '\n', temp)
print(type(df2), '\n', df2)

<class 'pandas.core.frame.DataFrame'> 
      0  1  2  3  4  5  6  7  sum
0    5  8  9  5  0  0  1  7   35
1    6  9  2  4  5  2  4  2   34
2    4  7  7  9  1  7  0  6   41
3    9  9  7  6  9  1  0  1   42
min  4  7  2  4  0  0  0  1   18
<class 'NoneType'> 
 None
<class 'pandas.core.frame.DataFrame'> 
      0  1  2  3  4  5  6  7
0    5  8  9  5  0  0  1  7
1    6  9  2  4  5  2  4  2
2    4  7  7  9  1  7  0  6
3    9  9  7  6  9  1  0  1
min  4  7  2  4  0  0  0  1


In [296]:
print(type(df2), '\n', df2)
temp = df2.drop(index=[1,2], columns=[1,3,5,7])
print(type(temp), '\n', temp)
print(type(df2), '\n', df2)

<class 'pandas.core.frame.DataFrame'> 
      0  1  2  3  4  5  6  7
0    5  8  9  5  0  0  1  7
1    6  9  2  4  5  2  4  2
2    4  7  7  9  1  7  0  6
3    9  9  7  6  9  1  0  1
min  4  7  2  4  0  0  0  1
<class 'pandas.core.frame.DataFrame'> 
      0  2  4  6
0    5  9  0  1
3    9  7  9  0
min  4  2  0  0
<class 'pandas.core.frame.DataFrame'> 
      0  1  2  3  4  5  6  7
0    5  8  9  5  0  0  1  7
1    6  9  2  4  5  2  4  2
2    4  7  7  9  1  7  0  6
3    9  9  7  6  9  1  0  1
min  4  7  2  4  0  0  0  1


In [297]:
print(type(df2), '\n', df2)
temp = df2.drop(index=[1,2], columns=[1,3,5,7], inplace=True)
print(type(temp), '\n', temp)
print(type(df2), '\n', df2)

<class 'pandas.core.frame.DataFrame'> 
      0  1  2  3  4  5  6  7
0    5  8  9  5  0  0  1  7
1    6  9  2  4  5  2  4  2
2    4  7  7  9  1  7  0  6
3    9  9  7  6  9  1  0  1
min  4  7  2  4  0  0  0  1
<class 'NoneType'> 
 None
<class 'pandas.core.frame.DataFrame'> 
      0  2  4  6
0    5  9  0  1
3    9  7  9  0
min  4  2  0  0


### dropna(axis)

In [315]:
df3 = pd.DataFrame(np.random.randint(10,size=(4,8)))
df3.loc[2,3] = np.nan
df3.loc[1,5] = np.nan
print(type(df3), '\n', df3)

<class 'pandas.core.frame.DataFrame'> 
    0  1  2    3  4    5  6  7
0  7  0  6  5.0  1  4.0  6  0
1  6  5  1  2.0  1  NaN  4  0
2  7  8  9  NaN  7  0.0  9  3
3  9  1  4  4.0  6  8.0  8  9


In [313]:
print(type(df3), '\n', df3)
temp = df3.dropna(axis=0)
print(type(temp), '\n', temp)
temp = df3.dropna(axis=1, inplace=True)
print(type(temp), '\n', temp)
print(type(df3), '\n', df3)

<class 'pandas.core.frame.DataFrame'> 
    0  1  2    3  4    5  6  7
0  4  7  7  4.0  9  0.0  2  0
1  7  1  7  9.0  8  NaN  0  1
2  9  8  2  NaN  1  2.0  7  2
3  6  0  9  2.0  6  6.0  2  7
<class 'pandas.core.frame.DataFrame'> 
    0  1  2    3  4    5  6  7
0  4  7  7  4.0  9  0.0  2  0
3  6  0  9  2.0  6  6.0  2  7
<class 'NoneType'> 
 None
<class 'pandas.core.frame.DataFrame'> 
    0  1  2  4  6  7
0  4  7  7  9  2  0
1  7  1  7  8  0  1
2  9  8  2  1  7  2
3  6  0  9  6  2  7


### fillna(...)

In [320]:
print(type(df3), '\n', df3)
temp = df3.fillna(0)
print(type(temp), '\n', temp)
print(type(df3), '\n', df3)
temp = df3.fillna(-1, inplace=True)
print(type(temp), '\n', temp)
print(type(df3), '\n', df3)

<class 'pandas.core.frame.DataFrame'> 
    0  1  2    3  4    5  6  7
0  7  0  6  5.0  1  4.0  6  0
1  6  5  1  2.0  1  NaN  4  0
2  7  8  9  NaN  7  0.0  9  3
3  9  1  4  4.0  6  8.0  8  9
<class 'pandas.core.frame.DataFrame'> 
    0  1  2    3  4    5  6  7
0  7  0  6  5.0  1  4.0  6  0
1  6  5  1  2.0  1  0.0  4  0
2  7  8  9  0.0  7  0.0  9  3
3  9  1  4  4.0  6  8.0  8  9
<class 'pandas.core.frame.DataFrame'> 
    0  1  2    3  4    5  6  7
0  7  0  6  5.0  1  4.0  6  0
1  6  5  1  2.0  1  NaN  4  0
2  7  8  9  NaN  7  0.0  9  3
3  9  1  4  4.0  6  8.0  8  9
<class 'NoneType'> 
 None
<class 'pandas.core.frame.DataFrame'> 
    0  1  2    3  4    5  6  7
0  7  0  6  5.0  1  4.0  6  0
1  6  5  1  2.0  1 -1.0  4  0
2  7  8  9 -1.0  7  0.0  9  3
3  9  1  4  4.0  6  8.0  8  9


### astype(...) : 형 변환

In [330]:
print(type(df3), '\n', df3)
temp = df3.astype(int)
print(type(temp), '\n', temp)
print(type(df3), '\n', df3)
print(id(temp) == id(df3)) # False
print(id(temp.loc[0]) == id(df3.loc[0])) # True ???
print(temp.loc[0,3], df3.loc[0,3]) # 5 5.0
print(type(temp.loc[0,3]), type(df3.loc[0,3])) # <class 'numpy.int64'> <class 'numpy.float64'>
print(id(temp.loc[0,3]) == id(df3.loc[0,3])) # False

<class 'pandas.core.frame.DataFrame'> 
    0  1  2    3  4    5  6  7
0  7  0  6  5.0  1  4.0  6  0
1  6  5  1  2.0  1 -1.0  4  0
2  7  8  9 -1.0  7  0.0  9  3
3  9  1  4  4.0  6  8.0  8  9
<class 'pandas.core.frame.DataFrame'> 
    0  1  2  3  4  5  6  7
0  7  0  6  5  1  4  6  0
1  6  5  1  2  1 -1  4  0
2  7  8  9 -1  7  0  9  3
3  9  1  4  4  6  8  8  9
<class 'pandas.core.frame.DataFrame'> 
    0  1  2    3  4    5  6  7
0  7  0  6  5.0  1  4.0  6  0
1  6  5  1  2.0  1 -1.0  4  0
2  7  8  9 -1.0  7  0.0  9  3
3  9  1  4  4.0  6  8.0  8  9
False
True
5 5.0
<class 'numpy.int64'> <class 'numpy.float64'>
False
