# Chatper 6 - Financial Time Series
시계열 분석

## 1 Pandas 로 데이터 다루기
DataFrame : table data

### 01 Pandas 기초
DataFrame : table data

In [2]:
import pandas as pd
df = pd.DataFrame([10, 20, 30, 40], columns=['numbers'],
                  index=['a', 'b', 'c', 'd'])
df

Unnamed: 0,numbers
a,10
b,20
c,30
d,40


In [8]:
print(df.index)    # the index values
print(df.columns)  # the column names
df.ix['c']         # selection via index

Index(['a', 'b', 'c', 'd'], dtype='object')
Index(['numbers'], dtype='object')


numbers    30
Name: c, dtype: int64

In [9]:
# DataFrame.ix[ 인덱스 ]
# selection of multiple indices
df.ix[['a', 'd']]  

Unnamed: 0,numbers
a,10
d,40


In [5]:
# 인덱스 주소값으로 선택하기
# selection via Index object
df.ix[df.index[1:3]]  

Unnamed: 0,numbers
b,20
c,30


In [17]:
# 컬럼별 총합을 출력한다 (field가 연산의 기준)
df.sum()  

floats                           17.75
names      GuidoFelixFrancescYvesHenry
numbers                            200
dtype: object

In [7]:
# 모든 cell에 일괄함수 적용하기
df.apply(lambda x: x ** 2)  

Unnamed: 0,numbers
a,100
b,400
c,900
d,1600


In [10]:
# 사칙연산은 lambda 안쓰고도 가능
df ** 2  

Unnamed: 0,numbers
a,100
b,400
c,900
d,1600


In [11]:
# 새로운 컬럼 생성하기
df['floats'] = (1.5, 2.5, 3.5, 4.5)
df

Unnamed: 0,numbers,floats
a,10,1.5
b,20,2.5
c,30,3.5
d,40,4.5


In [18]:
# 특정 컬럼만 선택
# Series : 1차원 DataFrame
df['floats']

pandas.core.series.Series

In [13]:
# 새로운 컬럼추가
df['names'] = pd.DataFrame(['Yves', 'Guido', 'Felix', 'Francesc'],
                           index=['d', 'a', 'b', 'c'])
df

Unnamed: 0,numbers,floats,names
a,10,1.5,Guido
b,20,2.5,Felix
c,30,3.5,Francesc
d,40,4.5,Yves


In [14]:
# 일시적으로 자료 추가하기 : [list]에서 객체추가와 동일
df.append({'numbers': 100, 'floats': 5.75, 'names': 'Henry'},
          ignore_index=True)

Unnamed: 0,numbers,floats,names
0,10,1.5,Guido
1,20,2.5,Felix
2,30,3.5,Francesc
3,40,4.5,Yves
4,100,5.75,Henry


In [15]:
# df.append() : row 추가
df = df.append(pd.DataFrame({
    'numbers': 100, 'floats': 5.75,'names': 'Henry'}, index=['z',]))
df

Unnamed: 0,floats,names,numbers
a,1.5,Guido,10
b,2.5,Felix,20
c,3.5,Francesc,30
d,4.5,Yves,40
z,5.75,Henry,100


In [19]:
# 이미 존재하는 index를 기준으로 데이터가 수정
df.join(pd.DataFrame([1, 4, 9, 16, 25],
        index=['a', 'b', 'c', 'd', 'y'], columns=['squares',]))

Unnamed: 0,floats,names,numbers,squares
a,1.5,Guido,10,1.0
b,2.5,Felix,20,4.0
c,3.5,Francesc,30,9.0
d,4.5,Yves,40,16.0
z,5.75,Henry,100,


In [20]:
# how='outer' : 기존의 index 보존하고, 새로운 index는 추가
df = df.join(pd.DataFrame([1, 4, 9, 16, 25],
                    index=['a', 'b', 'c', 'd', 'y'],
                    columns=['squares',]),  how='outer')
df

Unnamed: 0,floats,names,numbers,squares
a,1.5,Guido,10.0,1.0
b,2.5,Felix,20.0,4.0
c,3.5,Francesc,30.0,9.0
d,4.5,Yves,40.0,16.0
y,,,,25.0
z,5.75,Henry,100.0,


In [21]:
# 각 column의 평균
df[['numbers', 'squares']].mean()

numbers    40.0
squares    11.0
dtype: float64

In [22]:
# 각 column의 표준편차
df[['numbers', 'squares']].std()

numbers    35.355339
squares     9.669540
dtype: float64

### 02 Datetime Index
시계열 자료 생성하기

In [30]:
# B 	business day frequency
# D 	calendar day frequency
# W 	weekly frequency
# M 	month end frequency
# MS 	month start frequency
# Q 	quarter end frequency
# BQ 	business quarter endfrequency
# QS 	quarter start frequency
# BQS 	business quarter start frequency
# BA 	business year end frequency
# BAS 	business year start frequency
# BH 	business hour frequency
# H 	hourly frequency
# T, min 	minutely frequency

In [27]:
import numpy as np
a = np.random.standard_normal((9, 4))        # index 9, field 4 인 자료생성
a.round(6)                                   # 소숫점 6번쨰 자리서 반올림
df = pd.DataFrame(a)                         # 자료 연결하기
df.columns = [['No1', 'No2', 'No3', 'No4']]  # column 추가 (0,1,2.. 기본 column인 경우)
df

Unnamed: 0,No1,No2,No3,No4
0,0.532149,-0.09178,-0.914403,-0.165442
1,0.600013,-0.271546,-1.187857,-0.467085
2,0.930202,0.379083,0.876215,0.429029
3,-1.464343,-0.535883,1.899386,-0.055574
4,0.011865,-0.63255,-0.553967,0.273093
5,-0.119032,1.711402,-0.787218,-0.929512
6,-0.498346,-0.07946,1.683854,-0.031044
7,1.36203,0.113925,1.099436,0.207389
8,0.708497,1.079999,-1.897296,-0.550768


In [28]:
# value in column No2 at index position 3 : 특정값 추출
df['No2'][3]  

-0.53588311517901244

In [31]:
# Datetime Index 생성하기
dates = pd.date_range('2015-1-1', periods=9, freq='M')
print(dates)
df.index = dates
df

DatetimeIndex(['2015-01-31', '2015-02-28', '2015-03-31', '2015-04-30',
               '2015-05-31', '2015-06-30', '2015-07-31', '2015-08-31',
               '2015-09-30'],
              dtype='datetime64[ns]', freq='M')


Unnamed: 0,No1,No2,No3,No4
2015-01-31,0.532149,-0.09178,-0.914403,-0.165442
2015-02-28,0.600013,-0.271546,-1.187857,-0.467085
2015-03-31,0.930202,0.379083,0.876215,0.429029
2015-04-30,-1.464343,-0.535883,1.899386,-0.055574
2015-05-31,0.011865,-0.63255,-0.553967,0.273093
2015-06-30,-0.119032,1.711402,-0.787218,-0.929512
2015-07-31,-0.498346,-0.07946,1.683854,-0.031044
2015-08-31,1.36203,0.113925,1.099436,0.207389
2015-09-30,0.708497,1.079999,-1.897296,-0.550768


In [32]:
# DataFrame 을 array 객체로 변환하기
np.array(df).round(5)

array([[ 0.53215, -0.09178, -0.9144 , -0.16544],
       [ 0.60001, -0.27155, -1.18786, -0.46709],
       [ 0.9302 ,  0.37908,  0.87622,  0.42903],
       [-1.46434, -0.53588,  1.89939, -0.05557],
       [ 0.01187, -0.63255, -0.55397,  0.27309],
       [-0.11903,  1.7114 , -0.78722, -0.92951],
       [-0.49835, -0.07946,  1.68385, -0.03104],
       [ 1.36203,  0.11393,  1.09944,  0.20739],
       [ 0.7085 ,  1.08   , -1.8973 , -0.55077]])

### 03 기초적인 분석
