### 1.定义环境变量


In [1]:
import boto3

aws_account_id = boto3.client('sts').get_caller_identity().get('Account')
repository_name = 'backtest-repo'
aws_region = 'us-east-1'
s3_source = 'backtest-source-2022-03-07' # 用于存储数据源，请按照自己的习惯修改修改名称
s3_dest = 'backtest-dest-2022-03-07' # 用于存储计算结果，请按照自己的习惯修改名称

### 2.创建S3存储桶

In [None]:
import boto3

s3 = boto3.client('s3',region_name=aws_region)

# 创建存储桶

s3.create_bucket(Bucket=s3_source)
s3.create_bucket(Bucket=s3_dest)

# 确认存储桶创建成功
if s3.head_bucket(Bucket=s3_source)['ResponseMetadata']['HTTPStatusCode']==200:
    print(s3_source,' created')
if s3.head_bucket(Bucket=s3_dest)['ResponseMetadata']['HTTPStatusCode']==200:
    print(s3_dest,' created')

### 3.上传股票数据到S3存储桶

In [None]:
! rm -rf /home/ec2-user/SageMaker/build_backtest_with_aws_batch
! git clone https://github.com/forhead/build_backtest_with_aws_batch.git
! aws s3 sync build_backtest_with_aws_batch/data_source s3://{s3_source}/

### 4.编写Dockerfile

Batch执行任务会基于容器来运行，因为AWS Batch执行任务基于容器来运行，所以只需要让代码可以接受参数，在参数中定义历史数据存储桶位置，历史数据文件名，结果存储桶位置。

In [None]:
!mkdir batch

In [None]:
%%writefile batch/backtest.py
#!/usr/bin/env python
from __future__ import (absolute_import, division, print_function,
                        unicode_literals)
import datetime
import boto3
import json
import numpy as np
import pandas as pd
import os.path
import sys
import pytz
import time
from os.path import exists

import backtrader as bt

class MyStrategy(bt.Strategy):
    ## 1、全局参数
    params=(('maperiod', 15),
            ('printlog', False),)

    ## 2、初始化
    def __init__(self):

        # 初始化交易指令、买卖价格和手续费
        self.order = None
        self.buyprice = None
        self.buycomm = None

        # 添加15日移动均线指标。Backtrader 集成了 talib，可以自动算出一些常见的技术指标
        self.sma = bt.indicators.SimpleMovingAverage(self.datas[0], period=self.params.maperiod)

    ## 3、策略核心逻辑
    def next(self):
        # 记录收盘价
        # self.log('收盘价：%.2f' % self.datas[0].close[0])
        if self.order: # 检查是否有指令等待执行
            return
        # 检查是否持仓   
        if not self.position: # 没有持仓
            # 执行买入条件判断：收盘价格上涨突破15日均线
            if self.datas[0].close > self.sma[0]:
                self.size = int(self.broker.cash / self.datas[0].close[0])
                self.log('买入委托：%.2f * %.0f' % (self.datas[0].close[0], self.size))
                #执行买入
                self.order = self.buy(size=self.size)
        else:
            # 执行卖出条件判断：收盘价格跌破15日均线
            if self.datas[0].close < self.sma[0]:
                self.log('卖出委托：%.2f * %.0f' % (self.datas[0].close[0], self.size))
                #执行卖出
                self.order = self.sell(size=self.size)

    ## 4、日志记录
    # 交易记录日志（可选，默认不输出结果）
    def log(self, txt, dt=None, doprint=False):
        if self.params.printlog or doprint:
            dt = dt or self.datas[0].datetime.date(0)
            print(f'{dt.isoformat()},{txt}')

    # 记录交易执行情况（可选，默认不输出结果）
    def notify_order(self, order):
        # 如果 order 为 submitted/accepted，返回空
        if order.status in [order.Submitted, order.Accepted]:
            return
        # 如果 order 为 buy/sell executed，报告价格结果
        if order.status in [order.Completed]: 
            if order.isbuy():
                self.log(f'买入：\n价格：%.2f,\
                现金流：-%.2f,\
                手续费：%.2f' % (order.executed.price, order.executed.value, order.executed.comm))
                self.buyprice = order.executed.price
                self.buycomm = order.executed.comm
            else:
                self.log(f'卖出:\n价格：%.2f,\
                现金流：%.2f,\
                手续费：%.2f' % (order.executed.price, order.executed.price*self.size, order.executed.comm))
            self.bar_executed = len(self) 

        # 如果指令取消/交易失败, 报告结果
        elif order.status in [order.Canceled, order.Margin, order.Rejected]:
            self.log('交易失败')
        self.order = None

    # 记录交易收益情况（可省略，默认不输出结果）
    def notify_trade(self,trade):
        if not trade.isclosed:
            return
        self.log(f'策略收益：\n毛收益 {trade.pnl:.2f}, 净收益 {trade.pnlcomm:.2f}')

    # 回测结束后输出结果（可省略，默认输出结果）
    def stop(self):
        self.log('(MA均线： %2d日) 期末总资金 %.2f' %
                 (self.params.maperiod, self.broker.getvalue()), doprint=True)


def downloadFile(bucket_name, object_name, file_name):
    s3 = boto3.client('s3',region_name='us-east-1')
    s3.download_file(bucket_name, object_name, file_name)
    
def uploadFile(file_name,bucket_name, key_name):
    s3 = boto3.client('s3',region_name='us-east-1')
    s3.upload_file(file_name,bucket_name, key_name)
        
def readData(file_name):
    df = pd.read_csv(file_name)
    df['ticker'] = df['ticker'].apply(lambda x: str(x))
    df['ticker'] = df['ticker'].apply(lambda x: '0'*(6-len(x)) + x)
    df['openprice'] = df['openprice'] * df['accumadjfactor'] / df['accumadjfactor'].iloc[-1]
    df['closeprice'] = df['closeprice'] * df['accumadjfactor'] / df['accumadjfactor'].iloc[-1]
    df['highestprice'] = df['highestprice'] * df['accumadjfactor'] / df['accumadjfactor'].iloc[-1]
    df['lowestprice'] = df['lowestprice'] * df['accumadjfactor'] / df['accumadjfactor'].iloc[-1]
    df = df[df['isopen'] == True]
    df.drop('isopen', 1, inplace=True)
    df.drop('accumadjfactor', 1, inplace=True)
    df.set_index('tradedate', inplace=True)

    df.rename(columns={'openprice': 'open'}, inplace=True)
    df.rename(columns={'closeprice': 'close'}, inplace=True)
    df.rename(columns={'highestprice': 'high'}, inplace=True)
    df.rename(columns={'lowestprice': 'low'}, inplace=True)
    df.rename(columns={'turnovervol': 'volume'}, inplace=True)
    df['openinterest'] = 0 # A股回测中一般并不考虑利率，通常可以直接设为 0
    return df
    
        
if __name__ == '__main__':
    
    # 创建 Cerebro 对象
    cerebro = bt.Cerebro()

    # 读取输入参数，读取s3数据源数据，然后转化为dataframe
    source_bucket_name = sys.argv[1]
    source_file_name = sys.argv[2]
    dest_bucket_name = sys.argv[3]
    dest_file_name = source_file_name[:-3]+time.strftime("%Y-%m-%d-%H_%M_%S",time.localtime(time.time())) 
    
    downloadFile(source_bucket_name, source_file_name, source_file_name)
    
    while not os.path.exists(source_file_name):
        time.sleep(5)
        
    df = readData(source_file_name)
    
    # 创建 Data Feed
    df.index = pd.to_datetime(df.index)
    start = df.index[0]
    end = df.index[-1]
    print(start, '-', end)
    data = bt.feeds.PandasData(dataname=df, fromdate=start, todate=end)
    # 将 Data Feed 添加至 Cerebro
    cerebro.adddata(data)

    # 添加策略 Cerebro
    cerebro.addstrategy(MyStrategy, maperiod=15, printlog=True)
    
    # 设置初始资金
    cerebro.broker.setcash(100000.0)
    # 设置手续费为万二
    cerebro.broker.setcommission(commission=0.0002) 

    # 在开始时 print 初始账户价值
    print('Starting Portfolio Value: %.2f' % cerebro.broker.getvalue())

    # 运行回测流程
    cerebro.run()

    # 在结束时写入结果到S3存储桶
    f = open(dest_file_name, "a")
    f.write('Final Portfolio Value: %.2f\n' % cerebro.broker.getvalue())
    f.write('Return: %.4f' % (float(cerebro.broker.getvalue())/1e5 - 1))
    f.close()
    uploadFile(dest_file_name,dest_bucket_name,dest_file_name)
    
    sys.exit(0)

### 5.安装backtrader相关模块

In [None]:
!pip install --upgrade pip
!pip install backtrader
!pip install matplotlib==3.2.0
!pip show backtrader

### 6.验证代码可行性

In [None]:
!python batch/backtest.py {s3_source} 600519.csv {s3_dest}

### 7.创建一个镜像仓库，并推送容器镜像

In [None]:
import boto3
ecr = boto3.client('ecr', region_name=aws_region)
ecr.create_repository(repositoryName=repository_name)

创建Dockerfile

In [None]:
%%writefile batch/Dockerfile
FROM python:3.8

RUN pip --no-cache-dir install \
    backtrader\
    boto3 \
    pandas
RUN pip install matplotlib==3.2.0

ENV PYTHONUNBUFFERED=TRUE
ENV PYTHONDONTWRITEBYTECODE=TRUE

COPY backtest.py /
RUN chmod -R 777 backtest.py


将容器推送到远程的ECR镜像仓库

In [None]:
!docker build batch -t {repository_name}
!docker tag {repository_name} {aws_account_id}.dkr.ecr.{aws_region}.amazonaws.com/{repository_name}
!aws ecr get-login-password | docker login --username AWS --password-stdin {aws_account_id}.dkr.ecr.{aws_region}.amazonaws.com
!docker push {aws_account_id}.dkr.ecr.{aws_region}.amazonaws.com/{repository_name}

## 提交多个任务

接下来，我们会通过代码来提交并行任务，注意，我们需要从在console上找到我们的jobqueue以及job definition名字，用来填写进入以下程序中。实际环境中，我们可以通过代码实现更高程度的自动化。

In [18]:
import boto3

batch_client = boto3.client('batch')

def submit_job(job_name, queue_name, job_definition, command):
    response = batch_client.submit_job(
        jobName=job_name,
        jobQueue= queue_name,
        jobDefinition=job_definition,
        containerOverrides={
            'command': command
        }
    )


# 在AWS Batch中定义好的任务queue
quene_name = 'backtest-queue'
# 在AWS Batch中定义好的job definition名
job_definition = 'backtest_strategy_1'

# 存储桶名
s3_source = 'backtest-source-2022-03-07' # 用于存储数据源，请按照自己的习惯修改修改名称
s3_dest = 'backtest-dest-2022-03-07' # 用于存储计算结果，请按照自己的习惯修改名称

source_file_list=['600519.csv','600559.csv','600560.csv']

# 循环提交所有的任务，通过复用job definition，覆盖Command的方式提交Job
for file in source_file_list:
    # 依据文件名生成不同的Job任务的执行指令
    #"python","backtest.py","backtest-source-2022-03-07","600519.csv","backtest-dest-2022-03-07"
    command = ["python","backtest.py",s3_source,file,s3_dest]
    job_name = job_definition + '_for_'+file[:-4] # Job名称为 jobDefinition_for_filename
    submit_job(job_name,quene_name,job_definition,command) 
