## Intro


In [None]:
import this

In [None]:
%run bsm_mcs_euro.py

### 高性能

-   numexpr 还内建 并行执行单独运算的功能


In [None]:
import math
loops = 2500000
a = range(1, loops)

def f(x):
    return 3* math.log(x) + math.cos(x) ** 2
%timeit r = [f(x) for x in a]

In [None]:
import numpy as np
a = np.arange(1, loops)
%timeit r = 3 * np.log(a) + np.cos(a) ** 2

In [None]:
import numexpr as ne
ne.set_num_threads(4)
f = '3 * log(a) + cos(a) ** 2'
%timeit r = ne.evaluate(f)

### Time-to-Results


In [None]:
import numpy as np
import pandas as pd
from pylab import plt, mpl

plt.style.use("seaborn-v0_8")
mpl.rcParams['font.family'] = 'serif'
%matplotlib inline
%config InlineBackend.figure_format = 'svg'

data = pd.read_csv('./../data/tr_eikon_eod_data.csv',
                           index_col=0, parse_dates=True)
data = pd.DataFrame(data['.SPX'])
data.dropna(inplace=True)
data.info()

data['rets'] = np.log(data / data.shift(1))
data['vola'] = data['rets'].rolling(252).std() * np.sqrt(252)

data[['.SPX', 'vola']].plot(subplots=True, figsize=(10, 6));

### Data-Driven Finance


In [None]:
import numpy as np
import pandas as pd

data = pd.read_csv('http://hilpisch.com/tr_eikon_eod_data.csv',
                   index_col=0, parse_dates=True)
data = pd.DataFrame(data['AAPL.O'])
data['Returns'] = np.log(data / data.shift())
data.dropna(inplace=True)
lags = 6

cols = []
for lag in range(1, lags + 1):
    col = 'lag_{}'.format(lag)
    data[col] = np.sign(data['Returns'].shift(lag))
    cols.append(col)
data.dropna(inplace=True)

In [None]:
# Eikon Data API 需要订阅和 API 连接才能使用,不仅可以访问结构化金融数据(如历史报价数据)，还可以访问新闻等 非结构化数据
from sklearn.svm import SVC

model = SVC(gamma='scale')
data['Prediction'] = model.predict(data[cols])
data[['Returns', 'Strategy']].cumsum().apply(np.exp).plot(figsize=(10, 6))
plt.savefig('./images/AAPL_trading_strategy.png')

In [None]:
import eikon as ek

data = ek.get_timeseries('AAPL.O', fields='*',
                         start_date='2018-10-18 16:00:00',
                         end_date='2018-10-18 17:00:00', interval='tick')
data.info()

In [None]:
data.tail()

In [None]:
news = ek.get_news_headlines('R:AAPL.O Language:LEN',
                             date_from='2018-05-01',
                             date_to='2018-06-29', count=7)

story_html = ek.get_news_story(news.iloc[1, 2])

from bs4 import BeautifulSoup
story = BeautifulSoup(story_html, 'html5lib').get_text()
print(story[83:958])

In [None]:
import numpy as np
import pandas as pd

data = pd.read_csv('../../source/tr_eikon_eod_data.csv', index_col=0, parse_dates=True)
data = pd.DataFrame(data['AAPL.O'])
data['Returns'] = np.log(data / data.shift())
data.dropna(inplace=True)
lags = 6
cols = []
for lag in range(1, lags + 1):
    col = 'lag_{}'.format(lag)
    data[col] = np.sign(data['Returns'].shift(lag))
    cols.append(col)
data.dropna(inplace=True)

from sklearn.svm import SVC
model = SVC(gamma='auto')
model.fit(data[cols], np.sign(data['Returns']))
data['Prediction'] = model.predict(data[cols])
data['Strategy'] = data['Prediction'] * data['Returns']
data[['Returns', 'Strategy']].cumsum().apply(np.exp).plot(figsize=(10, 6))

## Python Basic Archtect


## 数据类型与结构


In [None]:
import keyword
keyword.kwlist

In [None]:
a = 10
type(a)

In [None]:
a.bit_length()

In [None]:
b = 0.35
b + 0.1

In [None]:
'http://www.python.org'.strip('htp:/')

In [None]:
'this is a float %f' % 15.3456
'this is a float %8f' % 15.3456
'this is a float %.2f' % 15.345
'this is a float %8.2f' % 15.345
'this is a float %08.2f' % 15.345
'this is a string %10s' % 'Python'

In [None]:
'this is a float {:.2f}'.format(15.3456)

### 正则表达式


In [None]:
import re

series = """
            '01/18/2014 13:00:00', 100, '1st';
            '01/18/2014 13:30:00', 110, '2nd';
            '01/18/2014 14:00:00', 120, '3rd'
            """
dt = re.compile("'[0-9/:\\s]+'")
result = dt.findall(series)
result

In [None]:
from datetime import datetime

pydt = datetime.strptime(result[0].replace("'", ""),
                         '%m/%d/%Y %H:%M:%S')
pydt

In [None]:
 print(pydt, type(pydt))

### 基本数据结构


In [None]:
t = (1, 2.5, 'data')
type(t)

In [None]:
t.count('data')

In [None]:
t.index(1)

In [None]:
l = [1, 2.5, 'data', 3, 4, 8, 9]

In [None]:
for i in range(2, 5):
    print(l[i])

#### 拷贝


In [None]:
l = [1, 2.5, 'data', 3, 4, 8, 9]
new_l = l
new_l[2] = 99
l

In [None]:
l.append([5, 6])
new_l = l.copy()
new_l[2] = 199
new_l[-1][1] = 8
l

In [None]:
import copy

new_l = copy.deepcopy(l)
new_l[2] = 299
new_l[-1][1] = 18
l

In [None]:
 [i ** 2 for i in range(5)]

In [None]:
def even(x):
    return x % 2 == 0


list(map(lambda x: x ** 2, range(10)))

In [None]:
list(filter(even, range(15)))

In [None]:
d = {
    'Name': 'Angela Merkel',
    'Country': 'Germany',
    'Profession': 'Chancelor',
    'Age': 64
}

print(d.keys(), d.values(), d.items())

In [None]:
for item in d.items():
    print(item)

In [None]:
s = set(['u', 'd', 'ud', 'du', 'd', 'du'])
t = set(['d', 'dd', 'uu', 'u'])
print(s.union(t), s.intersection(t), s.difference(t), s.symmetric_difference(t))

## 用 NumPy 进行数值计算


In [None]:
from copy import deepcopy

v = [0.5, 0.75, 1.0, 1.5, 2.0]
m = 3 * [deepcopy(v), ]

In [None]:
import numpy as np

np.array([0, 0.5, 1.0, 1.5, 2.0])

#### 特性


In [None]:
a = np.arange(8, dtype=float)
print(a, a.sum(), a.std(), a.cumsum())

In [None]:
b = np.array([a, a * 2])
print(b, b[:, 1], b[1, :])

In [None]:
print(b.sum(axis=0), b.sum(axis=1))

In [None]:
c = np.zeros((2, 3), dtype='i', order='C')
c1 = np.ones((2, 3, 4), dtype='i', order='C')
c2 = np.empty((2, 3, 2))
print(c, c1, c2, np.eye(5), np.linspace(5, 15, 12))

In [None]:
g = np.linspace(5, 15, 12)
print(g.size, g.itemsize, g.ndim, g.shape, g.dtype, g.nbytes)

#### 改变组成与大小


In [None]:
g = np.arange(15)
h = g.reshape((3, 5))
h

In [None]:
h.T

In [None]:
h.transpose()

In [None]:
np.resize(g, (4, 5))

In [None]:
np.hstack((h, 2 * h))

In [None]:
np.vstack((h, 0.5 * h))

In [None]:
h.flatten(order='F')

In [None]:
h > 8

In [None]:
(h == 5).astype(int)

In [None]:
(h > 4) & (h <= 12)

In [None]:
h[h > 8]

In [None]:
np.where(h > 7, 1, 0)

#### 速度对比


In [None]:
import random
I = 5000
%time mat = [[random.gauss(0, 1) for j in range(I)] for i in range(I)]

In [None]:
%time sum([sum(l) for l in mat])

In [None]:
import sys
sum([sys.getsizeof(l) for l in mat])

In [None]:
%time mat = np.random.standard_normal((I, I))

In [None]:
%time mat.sum()

In [None]:
mat.nbytes

In [None]:
sys.getsizeof(mat)

#### 结构化数组


In [None]:
dt = np.dtype([('Name', 'S10'), ('Age', 'i4'),
               ('Height', 'f'), ('Children/Pets', 'i4', 2)])
dt

In [None]:
dt1 = np.dtype({'names': ['Name', 'Age', 'Height', 'Children/Pets'],
                'formats': 'O int float int,int'.split()})
s = np.array([('Smith', 45, 1.83, (0, 1)),
              ('Jones', 53, 1.72, (2, 2))], dtype=dt1)
s

#### 代码向量化


In [None]:
np.random.seed(100)
r = np.arange(12).reshape((4, 3))
s = np.arange(12).reshape((4, 3)) * 0.5

In [None]:
2 * r + 3

In [None]:
s1 = np.arange(0, 12, 4)
s1.reshape(-1, 1)

In [None]:
s1.shape  # -1 是最大值

#### 内存布局


In [None]:
x = np.random.standard_normal((1000000, 5))
y = 2 * x + 3
C = np.array((x, y), order='C')
F = np.array((x, y), order='F')
x = 0.0
y = 0.0  # 内存释放
print(C.shape, F.shape)

In [None]:
C[:2].round(2)

In [None]:
%timeit C.sum()
%timeit F.sum()

In [None]:
%timeit C.sum(axis=0)
%timeit F.sum(axis=0)

In [None]:
%timeit C.sum(axis=1)
%timeit F.sum(axis=1)

In [None]:
np.random.standard_normal((10, 5))

## pandas 数据分析


In [None]:
import pandas as pd

df = pd.DataFrame([10, 20, 30, 40], columns=['numbers'], index=['a', 'b', 'c', 'd'])
df

In [None]:
print(df.index, df.columns)

In [None]:
df.loc['c']

In [None]:
df.loc[['a', 'd']]

In [None]:
df.iloc[1:3]

In [None]:
df.apply(lambda x: x ** 2)

In [None]:
df ** 2

In [None]:
df['floats'] = (1.5, 2.5, 3.5, 4.5)
df['names'] = pd.DataFrame(['Yves', 'Sandra', 'Lilli', 'Henry'], index=['d', 'a', 'b', 'c'])
df

In [None]:
# append 废弃
# df.append({'numbers': 100, 'floats': 5.75, 'names': 'Jil'}, ignore_index=True)

# df = df.append(pd.DataFrame({'numbers': 100, 'floats': 5.75, 'names': 'Jil'}, index=['y',]))
# df = df.append(pd.DataFrame({'names': 'Liz'}, index=['z',]), sort=False)
# df

In [None]:
import numpy as np

np.random.seed(100)
a = np.random.standard_normal((9, 4))
df = pd.DataFrame(a)
df.columns = ['No1', 'No2', 'No3', 'No4']
dates = pd.date_range('2019-1-1', periods=9, freq='ME')
df.index = dates
df

In [None]:
print(df.info(), df.describe(), df.sum(), df.mean(), df.mean(axis=0), df.cumsum())

In [None]:
from pylab import plt, mpl
import seaborn

plt.style.use('seaborn-v0_8')
mpl.rcParams['font.family'] = 'serif'
%matplotlib inline
df.cumsum().plot(lw=2.0, figsize=(10, 6));


In [None]:
df.plot.bar(figsize=(10, 6), rot=15)
# df.plot(kind='bar', figsize=(10, 6))

#### Series


In [None]:
S = pd.Series(np.linspace(0, 15, 7), name='series')
S

In [None]:
s = df['No1']
type(s)

In [None]:
s.plot(lw=2.0, figsize=(10, 6))

#### groupby 操作


In [None]:
df['Quarter'] = ['Q1', 'Q1', 'Q1', 'Q2', 'Q2', 'Q2', 'Q3', 'Q3', 'Q3']
groups = df.groupby('Quarter')
print(groups.mean(), '\r\n', groups.max())

In [None]:
groups.aggregate(["min", "max"]).round(2)

In [None]:
df['Odd_Even'] = ['Odd', 'Even', 'Odd', 'Even', 'Odd', 'Even', 'Odd', 'Even', 'Odd']
groups = df.groupby(['Quarter', 'Odd_Even'])
groups[['No1', 'No4']].aggregate(["sum", "mean"])

In [None]:
data = np.random.standard_normal((10, 2))
df = pd.DataFrame(data, columns=['x', 'y'])
df.tail()

#### 复杂选择


In [None]:
(df['x'] > 0) & (df['y'] < 0)

In [None]:
df[(df['x'] > 0) & (df['y'] < 0)]

#### 联接、连接和合并


In [None]:
df1 = pd.DataFrame(['100', '200', '300', '400'], index=['a', 'b', 'c', 'd'], columns=['A',])
df2 = pd.DataFrame(['200', '150', '50'], index=['f', 'b', 'd'], columns=['B',])
pd.concat((df1, df2), sort=False)

In [None]:
pd.concat((df1, df2), ignore_index=True, sort=False)

In [None]:
df1.join(df2)

In [None]:
df2.join(df1)

In [None]:
df1.join(df2, how='left')

In [None]:
df1.join(df2, how='right')

In [None]:
df1.join(df2, how='inner')

In [None]:
df1.join(df2, how='outer')

In [None]:
df = pd.DataFrame()
df['A'] = df1['A']
df['B'] = df2
df

In [None]:
df = pd.DataFrame({'A': df1['A'], 'B': df2['B']})
df

In [None]:
c = pd.Series([250, 150, 50], index=['b', 'd', 'c'])
df1['C'] = c
df2['C'] = c
pd.merge(df1, df2)

In [None]:
pd.merge(df1, df2, on='C')

pd.merge(df1, df2, how='outer')

In [None]:
pd.merge(df1, df2, left_on='A', right_on='B')
pd.merge(df1, df2,
         left_on='A', right_on='B', how='outer')
pd.merge(df1, df2, left_index=True, right_index=True)

#### 性能特征


In [None]:
data = np.random.standard_normal((1000000, 2))
df = pd.DataFrame(data, columns=['x', 'y'])

In [None]:
%time res = df['x'] + df['y']
%time res = df.sum(axis=1)

In [None]:
%time res = df.values.sum(axis=1)
%time res = np.sum(df, axis=1)
%time res = np.sum(df.values, axis=1)

%time res = df.eval('x + y')
%time res = df.apply(lambda row: row['x'] + row['y'], axis=1)

## 面向对象编程


### 类


In [None]:
class HumanBeing(object):
    def __init__(self, first_name, eye_color):
        self.first_name = first_name
        self.eye_color = eye_color
        self.position = 0

    def walk_steps(self, steps):
        self.position += steps


Sandra = HumanBeing('Sandra', 'blue')

In [None]:
n = 5
print(n.numerator, n.bit_length(), n.__sizeof__())  # __sizeof__ 内存使用量

In [None]:
class FinancialInstrument(object):
    author = 'Yves Hilpisch'

    def __init__(self, symbol, price):
        self.symbol = symbol
        self.__price = price


aapl = FinancialInstrument('AAPL', 100)
print(type(aapl), aapl, aapl.__str__())

In [None]:
class FinancialInstrument(FinancialInstrument):
    def get_price(self):
        return self.__price

    def set_price(self, price):
        self.__price = price


fi = FinancialInstrument('AAPL', 100)
fi.set_price(105)
fi.get_price()

In [None]:
class PortfolioPosition(object):
    def __init__(self, financial_instrument, position_size):
        self.position = financial_instrument
        self.__position_size = position_size

    def get_position_size(self):
        return self.__position_size

    def update_position_size(self, position_size):
        self.__position_size = position_size

    def get_position_value(self):
        return self.__position_size * self.position.get_price()


pp = PortfolioPosition(fi, 10)
pp.position.set_price(105)
pp.get_position_value()

### Python 数据模型

-   这些类支持以下任务和结构
    -   迭代
    -   集合处理
    -   属性访问
    -   运算符重载
    -   函数与方法调用
    -   对象创建与销毁
    -   字符串表示(例如，用于打印)
    -   托管上下文(即 with 块)。


In [None]:
class Vector(object):
    def __init__(self, x=0, y=0, z=0):
        self.x = x
        self.y = y
        self.z = z

    def __repr__(self):
        return 'Vector(%r, %r, %r)' % (self.x, self.y, self.z)

    def __abs__(self):
        return (self.x ** 2 + self.y ** 2 + self.z ** 2) ** 0.5

    def __bool__(self):
        return bool(abs(self))

    def __add__(self, other):
        x = self.x + other.x
        y = self.y + other.y
        z = self.z + other.z
        return Vector(x, y, z)

    def __mul__(self, scalar):
        return Vector(self.x * scalar, self.y * scalar, self.z * scalar)

    def __len__(self):
        return 3

    def __getitem__(self, i):
        if i in [0, -3]:
            return self.x
        elif i in [1, -2]:
            return self.y
        elif i in [2, -1]:
            return self.z
        else:
            raise IndexError('Index out of range.')

    def __iter__(self):
        for i in range(len(self)):
            yield self[i]


v = Vector(1, 2, 3)
print(v, abs(v), bool(v), len(v), v[-2])

In [None]:
v + Vector(2, 3, 4)


In [None]:
v * 2

## todo

-   正态分布有什么特点
-   类可以多种实现么
