## ベイズ的最適化
- ある関数の計算が重いとき、近似しないとどうしようもないことがある。
- **Gaussian Process**を使って関数近似をしながら最適化をする

手順

1. 評価関数f(x)を用意する
2. $\mu$と$\sigma$を、Gaussian Processにて計算する(UCBの場合)
    - $\mu_{t-1}(x)+\sqrt{\beta}\sigma_{t-1}(x)$が近似式、$\beta$は定数
3. $x_t = \mathrm{argmax} \mu_{t-1}(x)+\sqrt{\beta}\sigma_{t-1}(x)$
4. 評価関数に入れて、新しい点$(x_{t},f(x_t))$を得る
5. 2から4の繰り返し



In [None]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
def str2time(string):
    return datetime.strptime(string, '%Y/%m/%d')
# df = pd.read_csv("day14-3765-2017.csv")
df_orig = pd.read_csv("stock_nikkei_3yrs.csv")
df_orig.index = df_orig.date.apply(str2time)
df_orig = df_orig.sort_index(ascending=True)
df_orig.head()

## 評価関数
- 100株&100万円からスタート
- ゴールデンクロスで買い(全資産)、デッドクロスで売り(全株)
- 2017/12/13時点で株を全部売り
- 評価値は、何%資産が増えたか


In [None]:
import GPy
import GPyOpt
def f_2D(windows):
    window1, window2 = windows[0]
    window1 = int(window1)
    window2 = int(window2)
#     df1 = df_orig.sort_values(by="date")
    df1 = df_orig.sort_index(ascending=True)
    df1["ma1"] = df1.close.rolling(window=window1).mean()
    df1["ma2"] = df1.close.rolling(window=window2).mean()
    df1["diff"] = df1.ma1-df1.ma2
    # df1 = df.date.apply(datetime.timestamp())
    df1["unixtime"] = [datetime.timestamp(t) for t in df_orig.index]
    asset = np.array([1000000, 100])
    first = asset[0]+asset[1]*df1.iloc[0]["close"]
    for i in range(1, len(df1)):
        if df1.iloc[i-1]["diff"] < 0 and df1.iloc[i]["diff"] > 0:
            if asset[0] != 0:
                asset[1] += asset[0]/df1.iloc[i]["close"]
                asset[0] = 0
        if df1.iloc[i-1]["diff"] > 0 and df1.iloc[i]["diff"] < 0:
            if asset[1] != 0:
                asset[0] += asset[1]*df1.iloc[i]["close"]
                asset[1] = 0
    last = asset[0]+asset[1]*df1.iloc[-1]["close"]
    print("window1:{} window2:{} result : {}".format(window1, window2,((last/first)-1)*100))
    return -((last/first)-1)*100

## 最適化のsetting

In [None]:
# create the object function
bounds =[{'name': 'var_1', 'type': 'continuous', 'domain': (3,300)},
         {'name': 'var_2', 'type': 'continuous', 'domain': (3,300)}]
# Creates three identical objects that we will later use to compare the optimization strategies 
max_iter = 50
max_time = 60  # maximum time 60 seconds
myBopt2D = GPyOpt.methods.BayesianOptimization(f=f_2D,
                                              domain=bounds,
                                              model_type = 'GP',
                                              acquisition_type='EI',
                                              acquisition_weight=2,
                                              kernel=GPy.kern.Matern52(input_dim=2))
#                                               normalize_Y = True,)
myBopt2D.run_optimization(max_iter,max_time,verbosity=False)
myBopt2D.plot_acquisition()
print("opt (window,value) = ({},{:.3f})".format(myBopt2D.x_opt,float(myBopt2D.fx_opt[0])*-1))

## ゴールデンクロスの確認

In [None]:
def plot_MA(windows):
    window1, window2 = windows
    print(window1,window2)
    window1 = int(window1)
    window2 = int(window2)
    df1 = df_orig.sort_index(ascending=True)
    xdate = [x.date() for x in df1.index]
    df1["ma1"] = df1.close.rolling(window=window1).mean()
    df1["ma2"] = df1.close.rolling(window=window2).mean()
    df1["diff"] = df1.ma1-df1.ma2
    plt.figure(figsize=(15,5))
    plt.plot(xdate, df1.close,label="original")
    plt.plot(xdate, df1.ma2,label="{}days".format(window2))
    plt.plot(xdate, df1.ma1,label="{}days".format(window1))
    plt.legend()
    asset = np.array([1000000, 100])
    first = asset[0]+asset[1]*df1.iloc[0]["close"]
    print("First Asset:{}".format(first))
    for i in range(1, len(df1)):
        if df1.iloc[i-1]["diff"] < 0 and df1.iloc[i]["diff"] > 0:
            print("{}:GOLDEN CROSS at {}".format(df1.iloc[i]["date"],df1.iloc[i]["close"]))
            plt.scatter(xdate[i],df1.iloc[i]["ma1"],marker="o",s=100,color="b")
            plt.scatter(xdate[i],df1.iloc[i]["close"],marker="o",s=50,color="b",alpha=0.5)
            if asset[0] != 0:
                asset[1] += asset[0]/df1.iloc[i]["close"]
                asset[0] = 0
                print(asset)
        if df1.iloc[i-1]["diff"] > 0 and df1.iloc[i]["diff"] < 0:
            print("{}:DEAD  CROSS at {}".format(df1.iloc[i]["date"],df1.iloc[i]["close"]))
            plt.scatter(xdate[i],df1.iloc[i]["close"],marker="o",s=100,color="r")
            plt.scatter(xdate[i],df1.iloc[i]["close"],marker="o",s=50,color="r",alpha=0.5)
            if asset[1] != 0:
                asset[0] += asset[1]*df1.iloc[i]["close"]
                asset[1] = 0
                print(asset)
    last = asset[0]+asset[1]*df1.iloc[-1]["close"]
    print("Final Asset: {}".format(last))
    return ((last/first)-1)*100
plot_MA(myBopt2D.x_opt)