## Pandas Manual


In [5]:
import pandas as pd
import numpy as np
import matplotlib as plt
import seaborn as sns

In [4]:
df = pd.DataFrame(np.random.randn(5, 3))
df.head()

Unnamed: 0,0,1,2
0,0.443299,-1.045097,-0.819157
1,0.9634,1.931435,-0.511899
2,-0.564519,-1.222006,-0.9808
3,0.207136,-0.287287,-0.022779
4,-1.034727,0.704107,-0.011698


- DataFrame은 2차원 테이블이고 테이블의 한줄(행/열)을 Series라고 한다.
- Series의 모임이 곧, DataFrame이 된다.

In [6]:
# Series 생성
pd.Series([1, 3, 5, 6, 8])

0    1
1    3
2    5
3    6
4    8
dtype: int64

In [7]:
df = pd.DataFrame(data=np.arange(1, 49).reshape(12, 4),
                 columns=["X1", "X2", "X3", "X4"])
df

Unnamed: 0,X1,X2,X3,X4
0,1,2,3,4
1,5,6,7,8
2,9,10,11,12
3,13,14,15,16
4,17,18,19,20
5,21,22,23,24
6,25,26,27,28
7,29,30,31,32
8,33,34,35,36
9,37,38,39,40


In [8]:
df.index

RangeIndex(start=0, stop=12, step=1)

In [9]:
df.columns

Index(['X1', 'X2', 'X3', 'X4'], dtype='object')

In [10]:
df.values

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12],
       [13, 14, 15, 16],
       [17, 18, 19, 20],
       [21, 22, 23, 24],
       [25, 26, 27, 28],
       [29, 30, 31, 32],
       [33, 34, 35, 36],
       [37, 38, 39, 40],
       [41, 42, 43, 44],
       [45, 46, 47, 48]])

In [11]:
# 특정 컬럼가져오기
df['X1']

0      1
1      5
2      9
3     13
4     17
5     21
6     25
7     29
8     33
9     37
10    41
11    45
Name: X1, dtype: int32

In [12]:
df['X1'] + 2

0      3
1      7
2     11
3     15
4     19
5     23
6     27
7     31
8     35
9     39
10    43
11    47
Name: X1, dtype: int32

In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12 entries, 0 to 11
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   X1      12 non-null     int32
 1   X2      12 non-null     int32
 2   X3      12 non-null     int32
 3   X4      12 non-null     int32
dtypes: int32(4)
memory usage: 320.0 bytes


In [14]:
df.describe()

Unnamed: 0,X1,X2,X3,X4
count,12.0,12.0,12.0,12.0
mean,23.0,24.0,25.0,26.0
std,14.422205,14.422205,14.422205,14.422205
min,1.0,2.0,3.0,4.0
25%,12.0,13.0,14.0,15.0
50%,23.0,24.0,25.0,26.0
75%,34.0,35.0,36.0,37.0
max,45.0,46.0,47.0,48.0


In [15]:
# X2기준 내림차순 정렬
df.sort_values(by='X2', ascending=False)

Unnamed: 0,X1,X2,X3,X4
11,45,46,47,48
10,41,42,43,44
9,37,38,39,40
8,33,34,35,36
7,29,30,31,32
6,25,26,27,28
5,21,22,23,24
4,17,18,19,20
3,13,14,15,16
2,9,10,11,12


## Indexing

In [18]:
# 앞에서 3줄
df[0:3]

Unnamed: 0,X1,X2,X3,X4
0,1,2,3,4
1,5,6,7,8
2,9,10,11,12


In [20]:
# index 기준 indexing
df.loc[0] # 0번째 줄

X1    1
X2    2
X3    3
X4    4
Name: 0, dtype: int32

In [22]:
df.loc[0][2]
df.loc[0]['X3']

3

In [25]:
df.loc[0, 'X3']

3

In [26]:
df.loc[[0, 3], ['X1', 'X2']]

Unnamed: 0,X1,X2
0,1,2
3,13,14


In [27]:
df.loc[0:4, 'X1':'X3']

Unnamed: 0,X1,X2,X3
0,1,2,3
1,5,6,7
2,9,10,11
3,13,14,15
4,17,18,19


In [33]:
df[(df['X1']>10) & (df['X1']<20)]

Unnamed: 0,X1,X2,X3,X4
3,13,14,15,16
4,17,18,19,20


In [31]:
df.loc[df['X2']>20, 'X4']

5     24
6     28
7     32
8     36
9     40
10    44
11    48
Name: X4, dtype: int32

In [35]:
df.iloc[2:7, 0:3] # 행과 열 선택

Unnamed: 0,X1,X2,X3
2,9,10,11
3,13,14,15
4,17,18,19
5,21,22,23
6,25,26,27


## Reference
- [pandas docs](https://pandas.pydata.org/)
- [pandas tutorial](https://pandas.pydata.org/)