# [금융,경제 데이터 애플리케이션]

- Cross Section(크로스섹션) : 시간상 고정된 위치에서 존재하는 데이터를 나타내기 위한 방법.[예시: S&P 500 지수의 모든 주식 종목의 종가] 

In [1]:
from __future__ import division
from pandas import Series, DataFrame
import pandas as pd
from numpy.random import randn
import numpy as np
pd.options.display.max_rows = 12
np.set_printoptions(precision=4, suppress=True)
import os
import matplotlib.pyplot as plt
np.random.seed(12345)
plt.rc('figure', figsize=(12, 4))
np.set_printoptions(precision=4)

pd.options.display.notebook_repr_html = False

%matplotlib inline

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

## 11.1: 데이터 준비(Data munging topics)

### 11.1.1: 시계열과 크로스 섹션 정렬(Time series and cross-section alignment)

In [6]:
close_px = pd.read_csv('./ch11/stock_px.csv', parse_dates=True, index_col=0)
volume = pd.read_csv('./ch11/volume.csv', parse_dates=True, index_col=0)
prices = close_px.loc['2011-09-05':'2011-09-14',['AAPL','JNJ','SPX','XOM']]
volume = volume.loc['2011-09-05':'2011-09-12',['AAPL','JNJ','SPX','XOM']]

In [9]:
prices
volume

              AAPL    JNJ      SPX    XOM
2011-09-06  379.74  64.64  1165.24  71.15
2011-09-07  383.93  65.43  1198.62  73.65
2011-09-08  384.14  64.95  1185.90  72.82
2011-09-09  377.48  63.64  1154.23  71.01
2011-09-12  379.94  63.59  1162.27  71.84
2011-09-13  384.62  63.61  1172.87  71.65
2011-09-14  389.30  63.73  1188.68  72.64

                  AAPL         JNJ           SPX         XOM
2011-09-06  18173500.0  15848300.0  5.103980e+09  25416300.0
2011-09-07  12492000.0  10759700.0  4.441040e+09  23108400.0
2011-09-08  14839800.0  15551500.0  4.465170e+09  22434800.0
2011-09-09  20171900.0  17008200.0  4.586370e+09  27969100.0
2011-09-12  16697300.0  13448200.0  5.168550e+09  26205800.0

In [11]:
# 거래량 
prices * volume

                    AAPL           JNJ           SPX           XOM
2011-09-06  6.901205e+09  1.024434e+09  5.947362e+12  1.808370e+09
2011-09-07  4.796054e+09  7.040072e+08  5.323119e+12  1.701934e+09
2011-09-08  5.700561e+09  1.010070e+09  5.295245e+12  1.633702e+09
2011-09-09  7.614489e+09  1.082402e+09  5.293726e+12  1.986086e+09
2011-09-12  6.343972e+09  8.551710e+08  6.007251e+12  1.882625e+09
2011-09-13           NaN           NaN           NaN           NaN
2011-09-14           NaN           NaN           NaN           NaN

In [13]:
# 단일 거래에 대한, 거래된 한주당 평균 주가.
vwap = (prices * volume).sum() / volume.sum()
vwap

AAPL     380.655181
JNJ       64.394769
SPX     1172.588832
XOM       72.024288
dtype: float64

In [15]:
vwap.dropna()

AAPL     380.655181
JNJ       64.394769
SPX     1172.588832
XOM       72.024288
dtype: float64

In [17]:
# SPX는 발행주식(volume)이 없으므로 명시적으로 배제가능 > 데이터 정렬시 df의 align
# align : 새롭게 색인된 두 객체의 튜플을 반환.
prices.align(volume, join='inner')

(              AAPL    JNJ      SPX    XOM
 2011-09-06  379.74  64.64  1165.24  71.15
 2011-09-07  383.93  65.43  1198.62  73.65
 2011-09-08  384.14  64.95  1185.90  72.82
 2011-09-09  377.48  63.64  1154.23  71.01
 2011-09-12  379.94  63.59  1162.27  71.84,
                   AAPL         JNJ           SPX         XOM
 2011-09-06  18173500.0  15848300.0  5.103980e+09  25416300.0
 2011-09-07  12492000.0  10759700.0  4.441040e+09  23108400.0
 2011-09-08  14839800.0  15551500.0  4.465170e+09  22434800.0
 2011-09-09  20171900.0  17008200.0  4.586370e+09  27969100.0
 2011-09-12  16697300.0  13448200.0  5.168550e+09  26205800.0)

In [19]:
s1 = Series(range(3), index=['a','b','c'])
s2 = Series(range(4), index=['d','b','c','e'])
s3 = Series(range(3), index=['f','a','c'])
DataFrame({'one':s1, 'two': s2, 'three': s3})

   one  three  two
a  0.0    1.0  NaN
b  1.0    NaN  1.0
c  2.0    2.0  2.0
d  NaN    NaN  0.0
e  NaN    NaN  3.0
f  NaN    0.0  NaN

In [24]:
DataFrame({'one':s1, 'two':s2, 'three':s3}, index=list('face'))

   one  three  two
f  NaN    0.0  NaN
a  0.0    1.0  NaN
c  2.0    2.0  2.0
e  NaN    NaN  3.0

### 11.1.2: 다른빈도를 가지는 시계열 연산(Operations with time series of different frequencies)