# Pandas Data Viewing
https://3months.tistory.com/292

In [6]:
# Create DataFrame
import pandas as pd
import numpy as np

dates = pd.date_range('20230101', periods=6)                                  # 20230101로 시작하는 datetimeindex 생성
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))   # 임의의 숫자 데이터 가지는 dataframe 생성

print(df)
print(df.head(2))
print(df.tail(3))

                   A         B         C         D
2023-01-01  0.207644  0.499216  0.849756 -0.517725
2023-01-02 -1.581086 -0.718013  0.169183 -0.373700
2023-01-03  0.565877 -0.442898 -1.051488 -1.123515
2023-01-04  0.169621  0.341821  0.139733  0.513013
2023-01-05 -0.902647 -0.905398  0.058214  1.043567
2023-01-06  1.346534  0.481297 -0.258618  0.514701
                   A         B         C         D
2023-01-01  0.207644  0.499216  0.849756 -0.517725
2023-01-02 -1.581086 -0.718013  0.169183 -0.373700
                   A         B         C         D
2023-01-04  0.169621  0.341821  0.139733  0.513013
2023-01-05 -0.902647 -0.905398  0.058214  1.043567
2023-01-06  1.346534  0.481297 -0.258618  0.514701


In [8]:
print(df.index)
print(df.columns)

DatetimeIndex(['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04',
               '2023-01-05', '2023-01-06'],
              dtype='datetime64[ns]', freq='D')
Index(['A', 'B', 'C', 'D'], dtype='object')


In [10]:
# Numpy array - total array에 대해 하나의 dtype(자료형) 가짐
print(df.to_numpy())

[[ 0.20764369  0.49921593  0.84975637 -0.51772543]
 [-1.5810857  -0.71801287  0.16918257 -0.37369998]
 [ 0.56587735 -0.44289821 -1.05148833 -1.1235155 ]
 [ 0.16962143  0.34182063  0.13973339  0.51301297]
 [-0.90264718 -0.90539807  0.05821411  1.04356659]
 [ 1.34653386  0.48129746 -0.25861835  0.51470058]]


In [16]:
print(df.describe())    # 데이터에 대한 통계적 요

              A         B         C         D
count  6.000000  6.000000  6.000000  6.000000
mean  -0.032343 -0.123996 -0.015537  0.009390
std    1.050103  0.638264  0.623648  0.810750
min   -1.581086 -0.905398 -1.051488 -1.123515
25%   -0.634580 -0.649234 -0.179410 -0.481719
50%    0.188633 -0.050539  0.098974  0.069656
75%    0.476319  0.446428  0.161820  0.514279
max    1.346534  0.499216  0.849756  1.043567


In [19]:
print(df.T)   # dataframe의 행과 열을 교환(transpose)

   2023-01-01  2023-01-02  2023-01-03  2023-01-04  2023-01-05  2023-01-06
A    0.207644   -1.581086    0.565877    0.169621   -0.902647    1.346534
B    0.499216   -0.718013   -0.442898    0.341821   -0.905398    0.481297
C    0.849756    0.169183   -1.051488    0.139733    0.058214   -0.258618
D   -0.517725   -0.373700   -1.123515    0.513013    1.043567    0.514701


In [25]:
print(df.sort_index(axis=0, ascending=False))    # 축 정렬

                   A         B         C         D
2023-01-06  1.346534  0.481297 -0.258618  0.514701
2023-01-05 -0.902647 -0.905398  0.058214  1.043567
2023-01-04  0.169621  0.341821  0.139733  0.513013
2023-01-03  0.565877 -0.442898 -1.051488 -1.123515
2023-01-02 -1.581086 -0.718013  0.169183 -0.373700
2023-01-01  0.207644  0.499216  0.849756 -0.517725


In [26]:
print(df.sort_values(by='B'))

                   A         B         C         D
2023-01-05 -0.902647 -0.905398  0.058214  1.043567
2023-01-02 -1.581086 -0.718013  0.169183 -0.373700
2023-01-03  0.565877 -0.442898 -1.051488 -1.123515
2023-01-04  0.169621  0.341821  0.139733  0.513013
2023-01-06  1.346534  0.481297 -0.258618  0.514701
2023-01-01  0.207644  0.499216  0.849756 -0.517725


## Data Selection

In [31]:
# 열 선택하기
print(df['A'])
print()
print(type(df['A']))

2023-01-01    0.207644
2023-01-02   -1.581086
2023-01-03    0.565877
2023-01-04    0.169621
2023-01-05   -0.902647
2023-01-06    1.346534
Freq: D, Name: A, dtype: float64

<class 'pandas.core.series.Series'>


In [33]:
# 행 선택하기
print(df[0:3])
print()
print(df['20230102':'20230104'])

                   A         B         C         D
2023-01-01  0.207644  0.499216  0.849756 -0.517725
2023-01-02 -1.581086 -0.718013  0.169183 -0.373700
2023-01-03  0.565877 -0.442898 -1.051488 -1.123515

                   A         B         C         D
2023-01-02 -1.581086 -0.718013  0.169183 -0.373700
2023-01-03  0.565877 -0.442898 -1.051488 -1.123515
2023-01-04  0.169621  0.341821  0.139733  0.513013


In [38]:
# 레이블로 선택하기
print(df.loc[dates[0]])
print()
print(df.loc[:, ['A', 'B']])
print()
print(df.loc['20230102':'20230104', ['A', 'B']])
print()
print(df.loc['20230102', ['A', 'B']])
print()
print(df.loc[dates[0], ['A']])

A    0.207644
B    0.499216
C    0.849756
D   -0.517725
Name: 2023-01-01 00:00:00, dtype: float64

                   A         B
2023-01-01  0.207644  0.499216
2023-01-02 -1.581086 -0.718013
2023-01-03  0.565877 -0.442898
2023-01-04  0.169621  0.341821
2023-01-05 -0.902647 -0.905398
2023-01-06  1.346534  0.481297

                   A         B
2023-01-02 -1.581086 -0.718013
2023-01-03  0.565877 -0.442898
2023-01-04  0.169621  0.341821

A   -1.581086
B   -0.718013
Name: 2023-01-02 00:00:00, dtype: float64

A    0.207644
Name: 2023-01-01 00:00:00, dtype: float64


In [42]:
# 위치로 선택하기
print(df.iloc[3])
print()
print(df.iloc[3:5, 0:2])
print()
print(df.iloc[[1, 2, 4], [0, 2]])
print()
print(df.iloc[1:3, :])
pr

A    0.169621
B    0.341821
C    0.139733
D    0.513013
Name: 2023-01-04 00:00:00, dtype: float64

                   A         B
2023-01-04  0.169621  0.341821
2023-01-05 -0.902647 -0.905398

                   A         C
2023-01-02 -1.581086  0.169183
2023-01-03  0.565877 -1.051488
2023-01-05 -0.902647  0.058214

                   A         B         C         D
2023-01-02 -1.581086 -0.718013  0.169183 -0.373700
2023-01-03  0.565877 -0.442898 -1.051488 -1.123515
