In [2]:
import pandas as pd
index = pd.date_range('1/1/2000', periods=8)
print(index)

DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03', '2000-01-04',
               '2000-01-05', '2000-01-06', '2000-01-07', '2000-01-08'],
              dtype='datetime64[ns]', freq='D')


In [3]:
import numpy as np

# numpy 라이브러리로 8행 3열로 구성된 랜덤 데이터를 생성 후, 인덱스와 컬럼 이름을 정하여 데이터 프레임으로 만듬

df = pd.DataFrame(np.random.rand(8, 3), index=index, columns=list('ABC'))
df

Unnamed: 0,A,B,C
2000-01-01,0.262987,0.371363,0.24664
2000-01-02,0.255425,0.959442,0.784612
2000-01-03,0.568729,0.251105,0.631649
2000-01-04,0.937653,0.709541,0.7622
2000-01-05,0.394759,0.791897,0.180265
2000-01-06,0.277979,0.721979,0.616713
2000-01-07,0.32328,0.362273,0.094867
2000-01-08,0.794963,0.621065,0.766966


In [4]:
# 특정 행이나 열을 선택하면 '인덱스'와 1차원 배열 형태의 데이터로 이루어진 '시리즈'라는 데이터 구조 형태로 표현 됨

print(df['B'])

2000-01-01    0.371363
2000-01-02    0.959442
2000-01-03    0.251105
2000-01-04    0.709541
2000-01-05    0.791897
2000-01-06    0.721979
2000-01-07    0.362273
2000-01-08    0.621065
Freq: D, Name: B, dtype: float64


In [5]:
# 마스크는 특정한 조건을 만족하는지에 따라 True False를 반환.
print(df['B'] > 0.4)

2000-01-01    False
2000-01-02     True
2000-01-03    False
2000-01-04     True
2000-01-05     True
2000-01-06     True
2000-01-07    False
2000-01-08     True
Freq: D, Name: B, dtype: bool


In [8]:
# 마스크를 활용해서 솎아내기

df2 = df[df['B'] > 0.4]
df2

Unnamed: 0,A,B,C
2000-01-02,0.255425,0.959442,0.784612
2000-01-04,0.937653,0.709541,0.7622
2000-01-05,0.394759,0.791897,0.180265
2000-01-06,0.277979,0.721979,0.616713
2000-01-08,0.794963,0.621065,0.766966


In [9]:
# 행과 열의 데이터를 뒤집기

df2.T

Unnamed: 0,2000-01-02,2000-01-04,2000-01-05,2000-01-06,2000-01-08
A,0.255425,0.937653,0.394759,0.277979,0.794963
B,0.959442,0.709541,0.791897,0.721979,0.621065
C,0.784612,0.7622,0.180265,0.616713,0.766966


In [22]:
# 행 방향 축을 기준으로 한 연산

import pandas as pd
import numpy as np

index = pd.date_range('1/1/2000', periods=8)
df = pd.DataFrame(np.random.rand(8, 3), index=index, columns=list('ABC'))
df['D'] = df['A']/df['B'] # A열의 값을 B열의 값으로 나누 값을 D열에 저장
df['E'] = np.sum(df, axis=1) # 행 우선 계산 값을 E열에 저장
df = df.sub(df['A'], axis=0) # A열의 데이터를 기준으로 열 우선 계산, 모든 데이터에서 A열의 값을 빼기
df = df.div(df['C'], axis=0) # C열의 데이터를 기준으로 열 우선 계산, 모든 데이터를 C열의 값으로 나누기
df.to_csv('test.csv') # 데이터 프레임을 csv파일로 저장

# 많은 데이터 중 처음 5개의 데이터만 확인
df.head()

Unnamed: 0,A,B,C,D,E
2000-01-01,-0.0,1.24203,1.0,-126.666749,-128.194107
2000-01-02,-0.0,10.063759,1.0,-19.93155,-70.167642
2000-01-03,-0.0,1.130367,1.0,-6.331162,-8.942958
2000-01-04,0.0,1.019447,1.0,0.013361,2.227944
2000-01-05,-0.0,-1.552844,1.0,-0.679737,-21.137682


In [4]:
import pandas as pd
import numpy as np

index = pd.date_range('1/1/2000', periods=8)
df = pd.DataFrame(np.random.rand(8, 3), index=index, columns=list('ABC'))
df['D'] = df['A']/df['B'] # A열의 값을 B열의 값으로 나누 값을 D열에 저장
df['E'] = np.sum(df, axis=1) # 행 우선 계산 값을 E열에 저장
df = df.sub(df['A'], axis=0) # A열의 데이터를 기준으로 열 우선 계산, 모든 데이터에서 A열의 값을 빼기

df

Unnamed: 0,A,B,C,D,E
2000-01-01,0.0,0.245434,-0.317347,0.229139,1.284511
2000-01-02,0.0,0.100477,0.61902,0.443437,1.588148
2000-01-03,0.0,0.009325,-0.019883,0.007817,2.945611
2000-01-04,0.0,0.087463,-0.371355,0.26273,1.815666
2000-01-05,0.0,0.077616,-0.068016,0.487494,0.857874
2000-01-06,0.0,-0.738594,-0.429822,3.633102,5.299727
2000-01-07,0.0,0.10325,-0.102538,0.119698,2.402833
2000-01-08,0.0,-0.550164,-0.077387,16.224317,17.351658
