In [None]:
from __future__ import print_function
from __future__ import division

import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
sns.set_context(rc={'figure.figsize': (14, 7) } )
figzize_me = figsize =(14, 7)
# import warnings; warnings.simplefilter('ignore')
import pandas as pd
pd.options.display.max_columns = 12

import os
import sys
# 使用insert 0即只使用github，避免交叉使用了pip安装的abupy，导致的版本不一致问题
sys.path.insert(0, os.path.abspath('../'))
import abupy
from abupy import xrange, range

# 打开测试数据环境，与书中的例子数据一致，使用RomDataBu下的df_kl.h5数据
abupy.env.enable_example_env_ipython()


In [None]:
print(sys.version)

## 4章：pandas-learn

### 4.1.1 ： DataFrame 构建及方法

In [None]:
import pandas as pd

In [None]:
stock_day_change = np.load('../gen/stock_day_change.npy')
stock_day_change.shape

In [None]:
pd.DataFrame(stock_day_change).head()
pd.DataFrame(stock_day_change).head(5)
pd.DataFrame(stock_day_change)[:5]


### 4.1.2 索引行列序列

In [None]:
stock_symbols = ['股票' + str(x) for x in xrange(stock_day_change.shape[0])]
pd.DataFrame(stock_day_change, index=stock_symbols).head(2)

In [None]:
days = pd.date_range('2017-1-1', periods=stock_day_change.shape[1], freq='1d')
stock_symbols = ['股票' + str(x) for x in xrange(stock_day_change.shape[0])]

df = pd.DataFrame(stock_day_change, index=stock_symbols, columns=days)
df.head(2)


### 4.1.3 金融时间序列


In [None]:
df = df.T
df.head()

In [None]:
from abupy import pd_resample
df_20 = pd_resample(df, '21D', how='mean')
df_20.head()

### 4.1.4 Series 构建方法

In [None]:
df_stock0 = df['股票0']
print(type(df_stock0))
df_stock0.head()


In [None]:
df_stock0.cumsum().plot()


### 4.1.5 重新采样

In [None]:
from abupy import pd_resample

df_stock0_5 = pd_resample(df_stock0.cumsum(), '5D', how='ohlc')
df_stock0_20 = pd_resample(df_stock0.cumsum(), '21D', how='ohlc')
df_stock0_5.head()

In [None]:
from abupy import ABuMarketDrawing
ABuMarketDrawing.plot_candle_stick(df_stock0_5.index, 
                                   df_stock0_5['open'].values, 
                                   df_stock0_5['high'].values, 
                                   df_stock0_5['low'].values, 
                                   df_stock0_5['close'].values, 
                                   np.random.random(len(df_stock0_5)),
                                   None, 'stock', day_sum=False, html_bk=False, save=False)

In [None]:
print(type(df_stock0_5['open'].values))
print(df_stock0_5['open'].index)
print(df_stock0_5.columns)

## 4.2 基本数据分析示例

In [None]:
from abupy import ABuSymbolPd

In [None]:
tsla_df = ABuSymbolPd.make_kl_df('usTSLA', n_folds=2)
tsla_df.tail()

### 4.2.1 数据整体分析

In [None]:
tsla_df[['close', 'volume']].plot(subplots = True, style=['r', 'g'], grid=True)
print(type(tsla_df['close']))

In [None]:
tsla_df.info()

In [None]:
tsla_df.describe()

### 4.2.2 索引选取和切片选择

In [None]:
tsla_df.loc['2014-07-23':'2014-07-31', 'open']

In [None]:
tsla_df.loc['2014-07-23':'2014-07-31', ['open', 'close']]

In [None]:
tsla_df.iloc[0:5, 0:5]

In [None]:
tsla_df.open[0:3]

### 4.2.3 逻辑条件进行数据筛选

In [None]:
tsla_df[np.abs(tsla_df.p_change) > 8] 

In [None]:
tsla_df[(np.abs(tsla_df.p_change) > 8) & (tsla_df.volume > 2.5 * tsla_df.volume.mean())]

In [None]:
# 成交量是前一天的3倍的交易日
tsla_df['pre_volume'] = tsla_df['volume'].shift(1)
tsla_df[['pre_volume', 'volume']].tail()
filtered_df = tsla_df[tsla_df.volume >= 3 * tsla_df['pre_volume']]
print(filtered_df)

In [None]:
print(tsla_df['2014-09-10':'2014-09-15'])

In [None]:
# 过滤交易量是原来的两倍，并且上涨的天数
filtered_df = tsla_df[(tsla_df['volume'] >= 2* tsla_df['pre_volume']) & (tsla_df['close'] > tsla_df['pre_close'])]
print(filtered_df)
print(filtered_df['p_change'].cumsum())

### 4.2.4 ：数据转换与规整

In [None]:
tsla_df.sort_index(by='p_change')[:5]

In [None]:
tsla_df.sort_index(by='p_change', ascending=False)[:5]

In [None]:
# 空值的处理,删除一整行
tsla_df.dropna()
tsla_df.dropna(how='all')
# null的替换
tsla_df.fillna(tsla_df.mean(), inplace=True).head()

In [None]:
tsla_df.close.pct_change()[:3]

In [None]:
(223.54 - 222.49) / 222.49, (223.57 - 223.54) / 223.54

In [None]:
# pct_change 对序列从第二项开始想前做减法，除以前一项，下面真是close做的正跌幅。
change_ratio = tsla_df.close.pct_change()
change_ratio.tail()

In [None]:
np.round(change_ratio[-5:] * 100 , 2)

In [None]:
format = lambda x : '%.2f' % x
tsla_df.atr21.map(format).tail()

In [None]:
tsla_df.atr21.tail()

### 4.2.5 数据本地序列化操作

In [None]:
tsla_df.to_csv('../gen/tsla_df_test.csv', columns=tsla_df.columns, index=True)

In [None]:
tsla_df_load = pd.read_csv('../gen/tsla_df_test.csv', parse_dates=True, index_col=0)
tsla_df_load.head()

## 4.3 实例 1： 寻找移动涨跌幅阀值

In [None]:
tsla_df.p_change.hist(bins=80)

### 4.3.1: 数据离散化

In [None]:
cats = pd.qcut(np.abs(tsla_df.p_change), 10)
cats.value_counts()

In [None]:
# 将涨跌幅数据手工分类，从负无穷到－7，－5，－3，0， 3， 5， 7，正无穷
bins = [-np.inf, -7.0, -5, -3, 0,3 , 5, 7, np.inf]
cats = pd.cut(tsla_df.p_change, bins)
cats.value_counts()

In [None]:
#pd.get_dummies 是 Pandas 库中的一个函数，用于将分类变量（categorical variable）
# 转换为虚拟变量（dummy variables），也称为独热编码（one-hot encoding）。
change_ration_dummies = pd.get_dummies(cats, prefix='cr_dummies')
change_ration_dummies.head()

### 4.3.2 concat , append, merge的使用

In [None]:
pd.concat([tsla_df, change_ration_dummies], axis=1).tail()

In [None]:
pd.concat([tsla_df[tsla_df.p_change > 10], tsla_df[tsla_df.atr14 > 16]], axis=0)

In [None]:
tsla_df[tsla_df.p_change > 10].append(tsla_df[tsla_df.atr14> 16])

In [None]:
tsla_df[tsla_df.p_change > 10]

In [None]:
stock_a = pd.DataFrame({'stock_a':['a', 'b', 'c', 'd', 'e'], 'data':list(range(5))})
stock_b = pd.DataFrame({'stock_b':['a', 'b', 'c'], 'data':list(range(3))})
print(stock_a)
print(stock_b)
pd.merge(stock_a, stock_b, left_on='stock_a', right_on='stock_b')

## 4.4 实例2 ： 星期几是这个股票最好的日子

In [None]:
tsla_df['positive'] = np.where(tsla_df.p_change > 0, 1, 0)
tsla_df.tail()

### 4.4.1 构建交叉表

In [None]:
xt = pd.crosstab(tsla_df.date_week, tsla_df.positive)
xt

In [None]:
xt_pct = xt.div(xt.sum(1).astype(float), axis=0)
xt_pct

In [None]:
xt_pct.plot(figsize=(8, 5), kind='bar', stacked=True, title='date_week -> positive')
plt.xlabel('date_week')
plt.xlabel('positive')

### 4.4.2 构建透视表

In [None]:
tsla_df.pivot_table(['positive'], index=['date_week'])

In [None]:
tsla_df.groupby(['date_week', 'positive'])['positive'].count()

### 4.4.3 跳空缺口

In [None]:
jump_threshold = tsla_df.close.median() * 0.03
jump_threshold

In [None]:
jump_pd = pd.DataFrame()
def judge_jump(p_today):
    global jump_pd
    if p_today.p_change > 0 and (p_today.low - p_today.pre_close) > jump_threshold:
        p_today['jump'] = 1
        # 向上跳能量＝（今天最低 － 昨收）／ 跳空阀值
        p_today['jump_power'] = (p_today.low - p_today.pre_close) / jump_threshold
        jump_pd = jump_pd.append(p_today)
    elif p_today.p_change < 0 and (p_today.pre_close - p_today.high) > jump_threshold:
        p_today['jump'] = -1
        # 向下跳能量＝（昨收 － 今天最高）／ 跳空阀值
        p_today['jump_power'] = (p_today.pre_close - p_today.high) / jump_threshold
        jump_pd = jump_pd.append(p_today)
    
for kl_index in np.arange(0, tsla_df.shape[0]):
    today = tsla_df.ix[kl_index]
    judge_jump(today)
    
# print(jump_pd.head())
jump_pd.filter(['jump', 'jump_power', 'close', 'date', 'p_change', 'pre_close'])

In [None]:
jump_pd = pd.DataFrame()
# axis=1即行数据，tsla_df的每一条行数据即为每一个交易日数据
tsla_df.apply(judge_jump, axis=1)
jump_pd

In [None]:
from abupy import ABuMarketDrawing
ABuMarketDrawing.plot_candle_form_klpd(tsla_df, view_indexs=jump_pd.index)

## 4.6 pandas 三维面板使用

In [None]:
abupy.env.disable_example_env_ipython()

In [None]:
abupy.env.g_data_fetch_mode = abupy.env.EMarketDataFetchMode.E_DATA_FETCH_FORCE_NET

In [None]:
from abupy import ABuIndustries
r_symbol = 'usTSLA'
p_date, _ = ABuIndustries.get_industries_panel_from_target(r_symbol, show=False)