# 2.1 基础语法与数据结构

In [1]:
i = 1
type(i)

int

In [2]:
f = 1.1
type(f)

float

In [3]:
price_str = '30.14, 29.58, 26.36, 32.56, 32.82'
type(price_str)

str

In [4]:
isinstance(price_str, str)

True

In [5]:
print('id = {}'.format(id(price_str)))

id = 3031610646576


In [6]:
price_str = price_str.replace(' ', '')
print('id = {}'.format(id(price_str)))

id = 3031610888720


In [7]:
print(price_str)

30.14,29.58,26.36,32.56,32.82


In [8]:
price_array = price_str.split(',')
print(price_array)

['30.14', '29.58', '26.36', '32.56', '32.82']


In [9]:
# 4.列表推导式
date_base = 20190410
date_array = [str(date_base + ind) for ind, _ in enumerate(price_array)]
date_array

['20190410', '20190411', '20190412', '20190413', '20190414']

In [10]:
stock_tuple_list = [(date, price) for date, price in zip(date_array, price_array)]
stock_tuple_list

[('20190410', '30.14'),
 ('20190411', '29.58'),
 ('20190412', '26.36'),
 ('20190413', '32.56'),
 ('20190414', '32.82')]

In [11]:
# 5.可命名元组
from collections import namedtuple
stock_namedtuple = namedtuple('stock', ('date', 'price'))
stock_namedtuple_list = [stock_namedtuple(date, price) for date, price in zip(date_array, price_array)]
stock_namedtuple_list

[stock(date='20190410', price='30.14'),
 stock(date='20190411', price='29.58'),
 stock(date='20190412', price='26.36'),
 stock(date='20190413', price='32.56'),
 stock(date='20190414', price='32.82')]

In [12]:
# 字典推导式 {key : value for in}
stock_dict = {date: price for date, price in zip(date_array, price_array)}
stock_dict

{'20190410': '30.14',
 '20190411': '29.58',
 '20190412': '26.36',
 '20190413': '32.56',
 '20190414': '32.82'}

In [13]:
stock_dict.values()

dict_values(['30.14', '29.58', '26.36', '32.56', '32.82'])

In [14]:
stock_dict.keys()

dict_keys(['20190410', '20190411', '20190412', '20190413', '20190414'])

In [15]:
# 有序字典 OrderedDict
from collections import OrderedDict
stock_dict = OrderedDict((date, price) for date, price in zip(date_array, price_array))
stock_dict

OrderedDict([('20190410', '30.14'),
             ('20190411', '29.58'),
             ('20190412', '26.36'),
             ('20190413', '32.56'),
             ('20190414', '32.82')])

# 2.2 函数

In [16]:
# 内置函数
min(stock_dict)

'20190410'

In [17]:
min(zip(stock_dict.values(), stock_dict.keys()))

('26.36', '20190412')

In [18]:
# 自定义函数:查找第二大的数字
def find_second_max(dict_array):
    stock_prices_sorted = sorted(zip(stock_dict.values(), stock_dict.keys()))
    # 第二大值也就是倒数第二个数
    return stock_prices_sorted[-2]

# 系统函数callable()验证是否为一个可调用(call)的函数
if callable(find_second_max):
    print(find_second_max(stock_dict))

('32.56', '20190413')


In [19]:
# lambda 函数
find_second_max_lambda = lambda dict_array: sorted(zip(stock_dict.values(), stock_dict.keys()))[-2]
find_second_max_lambda(stock_dict)

('32.56', '20190413')

In [20]:
# 2.2.3 高阶函数
# 从收盘价格，推导每天的涨跌幅度
# 将字符串的的价格通过列表推导式显示转换为float类型
# 由于stock_dict是OrderedDict所以才可以直接
# 使用stock_dict.values()获取有序日期的收盘价格
price_float_array = [float(price_str) for price_str in stock_dict.values()]
print(price_float_array)
# 通过将时间平移形成两个错开的收盘价序列，通过zip打包成为一个新的序列，
# 通过[:-1]:从第0个到倒数第二个，[1:]：从第一个到最后一个 错开形成相邻
# 组成的序列每个元素为相邻的两个收盘价格
pp_array = [(price1, price2) for price1, price2 in zip(price_float_array[:-1], price_float_array[1:])]
pp_array

[30.14, 29.58, 26.36, 32.56, 32.82]


[(30.14, 29.58), (29.58, 26.36), (26.36, 32.56), (32.56, 32.82)]

In [21]:
from functools import reduce
# 使用map(), reduce() 和 lambda 推导每天的涨跌幅度
# round 将 float 保留几位小数， 以下保留3位 round(1.234, 2) = 1.23
change_array = list(map(lambda pp: reduce(lambda a, b: round((b - a) / a, 3), pp), pp_array))
# list insert插入数据，将第一天的涨跌幅设置为0
change_array.insert(0, 0)
change_array



[0, -0.019, -0.109, 0.235, 0.008]

In [22]:
reduce(lambda x,y:(x+y)/2, [1,2,3,4])
## (1+2)/2  = 1.5
## (1.5+3)/2 = 2.25
## (2.25+4)/2 = 3.125

3.125

In [23]:
# 使用 namedtuple 重新构建数据结构
stock_namedtuple = namedtuple('stock', ('date', 'price', 'change'))
# 通过 zip 分别从 date_array, price_array, change_array 拿数据组成
# stock_namedtuple 可以以 date 作为key 组成 OrderedDict
stock_dict = OrderedDict((date, stock_namedtuple(date, price, change)) for date, price, change in
                         zip(date_array, price_array, change_array))
stock_dict

OrderedDict([('20190410', stock(date='20190410', price='30.14', change=0)),
             ('20190411',
              stock(date='20190411', price='29.58', change=-0.019)),
             ('20190412',
              stock(date='20190412', price='26.36', change=-0.109)),
             ('20190413', stock(date='20190413', price='32.56', change=0.235)),
             ('20190414',
              stock(date='20190414', price='32.82', change=0.008))])

In [24]:
# 使用高阶函数 filter()进行筛选， 以下代码筛选除 上涨的交易日
up_days = list(filter(lambda day: day.change > 0, stock_dict.values()))
up_days

[stock(date='20190413', price='32.56', change=0.235),
 stock(date='20190414', price='32.82', change=0.008)]

In [25]:
# 使用高阶函数 filter()进行筛选， 以下代码筛选除 下跌的交易日
up_days = list(filter(lambda day: day.change < 0, stock_dict.values()))
up_days

[stock(date='20190411', price='29.58', change=-0.019),
 stock(date='20190412', price='26.36', change=-0.109)]

In [26]:
#
def filter_stock(stock_array_dict, want_up = True, want_calc_sum = False):
    if not isinstance(stock_array_dict, OrderedDict):
        # 如果类型不符合则产生错误
        raise TypeError('stock_array_dict must be OrderedDict!')
        
    # python 中三目表达式的写法
    filter_func = (lambda day: day.change>0) if want_up else (lambda day:day.change < 0)
    
    # 使用fliter_func 函数作为筛选函数
    want_days = list(filter(filter_func, stock_array_dict.values()))

    if not want_calc_sum:
        return want_days
    
    # 需要计算涨跌幅和
    change_sum = 0.0
    for day in want_days:
        change_sum += day.change
    return change_sum

# 使用示例
# 全部使用默认参数
print('所有上涨的交易日:{}'.format(filter_stock(stock_dict)))

# want_up = False
print('所有下跌的交易日:{}'.format(filter_stock(stock_dict, want_up = False)))

# want_calc_sum = True 
print('计算所有上涨的总和：{}'.format(filter_stock(stock_dict, want_calc_sum = True)))

# 计算所有下跌的总和
print('计算所有下跌的总和：{}'.format(filter_stock(stock_dict, want_up = False, want_calc_sum = True)))

所有上涨的交易日:[stock(date='20190413', price='32.56', change=0.235), stock(date='20190414', price='32.82', change=0.008)]
所有下跌的交易日:[stock(date='20190411', price='29.58', change=-0.019), stock(date='20190412', price='26.36', change=-0.109)]
计算所有上涨的总和：0.243
计算所有下跌的总和：-0.128


In [27]:
# 2.2.4 偏函数
from functools import partial
# 筛选上涨交易日
filter_stock_up_days = partial(filter_stock, want_up = True, want_calc_sum = False)

# 筛选下跌交易日
filter_stock_down_days = partial(filter_stock, want_up = False, want_calc_sum = False)

# 筛选上涨交易日
filter_stock_up_sums = partial(filter_stock, want_up = True, want_calc_sum = True)

# 筛选上涨交易日
filter_stock_down_sums = partial(filter_stock, want_up = False, want_calc_sum = True)

print('所有上涨的交易日:{}'.format(filter_stock_up_days(stock_dict)))
print('所有下跌的交易日:{}'.format(filter_stock_down_days(stock_dict)))
print('计算所有上涨的总和：{}'.format(filter_stock_up_sums(stock_dict)))
print('计算所有下跌的总和：{}'.format(filter_stock_down_sums(stock_dict)))


所有上涨的交易日:[stock(date='20190413', price='32.56', change=0.235), stock(date='20190414', price='32.82', change=0.008)]
所有下跌的交易日:[stock(date='20190411', price='29.58', change=-0.019), stock(date='20190412', price='26.36', change=-0.109)]
计算所有上涨的总和：0.243
计算所有下跌的总和：-0.128


In [28]:
# 2.3 面向对象
## 单下划线表示 protect
## 双下划线表示 private
from collections import namedtuple
from collections import OrderedDict
class StockTradeDays(object):
    def __init__(self, price_array, start_date, date_array=None):
        # 私有价格序列
        self.__price_array = price_array
        # 私有日期序列
        self.__date_array = self._init_days(start_date, date_array)
        # 私有涨跌幅序列
        self.__change_array = self.__init_change()
        # 进行OrderedDict 的组装
        self.stock_dict = self._init_stock_dict()
        
    def __init_change(self):
        """
        从price_array 生成 change_array
        : return :
        """
        price_float_array = [float(price_str) for price_str in self.__price_array]
        # 通过将时间平移形成两个错开的收盘价序列，通过zip()函数打包成为一个新的序列
        pp_array = [(price1, price2) for price1, price2 in zip(price_float_array[:-1], price_float_array[1:])]
        # 涨幅度
        change_array = list(map(lambda pp:reduce(lambda a,b:round((b-a)/a, 3),pp), pp_array))
        change_array.insert(0,0)
        return change_array
    
    def _init_days(self, start_date, date_array):
        """
        protect 方法，
        :param start_date: 初始日期
        :param date_array: 给定日期序列
        :return:
        """
        if date_array is None:
            # 由start_date 和 self.__price_array 来确定日期序列
            date_array = [str(start_date + ind) for ind, _ in enumerate(self.__price_array)]
        else:
            # 稍后的内容会使用外部直接设置的方式
            # 如果外面设置了date_array，就直接转换str类型组成新date_array
            date_array = [str(date) for date in date_array]
        return date_array
    
    def _init_stock_dict(self):
        """
        使用namedtuple, OrderedDict 将结果合并
        :return
        """
        stock_namedtuple = namedtuple('stock', ('date', 'price', 'change'))
        # 使用以被赋值的__date_array等进行OrderedDict的组装
        stock_dict = OrderedDict(
            (date, stock_namedtuple(date, price, change))
            for date, price, change in
            zip(self.__date_array, self.__price_array,
                self.__change_array))
        return stock_dict
    
    def filter_stock(self, want_up=True, want_calc_sum=False):
        """
        筛选结果子集
        :param want_up: 是否筛选上涨
        :param want_calc：是否筛选涨跌幅和
        :return 
        """
        # python 中三目表达式的写法
        filter_func = (lambda day: day.change>0) if want_up else (lambda day:day.change < 0)

        # 使用fliter_func 函数作为筛选函数
        want_days = list(filter(filter_func, self.stock_dict.values()))

        if not want_calc_sum:
            return want_days

        # 需要计算涨跌幅和
        change_sum = 0.0
        for day in want_days:
            change_sum += day.change
        return change_sum
    
    """
        下面的__str__，__iter__， __getitem__， __len__稍后会详细讲解作
    """

    def __str__(self):
        return str(self.stock_dict)

    __repr__ = __str__

    def __iter__(self):
        """
        通过代理stock_dict的迭代，yield元素
        :return:
        """
        for key in self.stock_dict:
            yield self.stock_dict[key]

    def __getitem__(self, ind):
        date_key = self.__date_array[ind]
        return self.stock_dict[date_key]

    def __len__(self):
        return len(self.stock_dict)

In [29]:
price_array = '30.14,29.58,26.36,32.56,32.82'.split(',')
data_base = 20170118
# 从 StockTradeDays类初始化一个实例对象 trade_days, 内部会调用 __init__
trade_days = StockTradeDays(price_array, data_base)
trade_days

OrderedDict([('20170118', stock(date='20170118', price='30.14', change=0)), ('20170119', stock(date='20170119', price='29.58', change=-0.019)), ('20170120', stock(date='20170120', price='26.36', change=-0.109)), ('20170121', stock(date='20170121', price='32.56', change=0.235)), ('20170122', stock(date='20170122', price='32.82', change=0.008))])

In [30]:
print('trade_days 对象长度：{}'.format(len(trade_days)))

trade_days 对象长度：5


In [31]:
from collections import Iterable
if isinstance(trade_days, Iterable):
    for day in trade_days:
        print(day)

stock(date='20170118', price='30.14', change=0)
stock(date='20170119', price='29.58', change=-0.019)
stock(date='20170120', price='26.36', change=-0.109)
stock(date='20170121', price='32.56', change=0.235)
stock(date='20170122', price='32.82', change=0.008)


In [32]:
trade_days.filter_stock()

[stock(date='20170121', price='32.56', change=0.235),
 stock(date='20170122', price='32.82', change=0.008)]

In [38]:
from abupy import ABuSymbolPd
# 两年的TSLA收盘数据
price_array = ABuSymbolPd.make_kl_df('TSLA', n_folds = 2).close.tolist()
# 两年的TSLA收盘日期 to list，这里的写法不考虑效率，只做演示使用
date_array = ABuSymbolPd.make_kl_df('TSLA', n_folds = 2).date.tolist()
price_array[:5], date_array[:5]

([310.17, 308.63, 314.07, 322.83, 318.89],
 [20170426, 20170427, 20170428, 20170501, 20170502])

In [40]:
trade_days = StockTradeDays(price_array, date_base, date_array)
print('trade_days 对象长度为：{}'.format(len(trade_days)))
print('最后一天交易数据为：{}'.format(trade_days[-1]))

trade_days 对象长度为：424
最后一天交易数据为：stock(date='20181231', price=326.09, change=0.0)
