In [2]:
# Imports and setup

# Start with the imports.

# You’ll use pandas_datareader to get data to compare your strategy with the S&P 500, matplotlib for charting, and PyFolio for performance analysis.

import pandas as pd
import pandas_datareader.data as web

import matplotlib.pyplot as plt

from zipline import run_algorithm
from zipline.api import order_target, record, symbol
from zipline.finance import commission, slippage


import pyfolio as pf

import warnings
warnings.filterwarnings('ignore')
# Since you’re building the backtest in Jupyter Notebook, you need to load the Zipline “magics.” Running this lets you run the Zipline command line right in your Notebook.

%load_ext zipline

The zipline extension is already loaded. To reload it, use:
  %reload_ext zipline


In [None]:
# Ingesting free price data

# Zipline creates data “bundles” for backtesting. You can build custom bundles to ingest any data you want.

# Today, you’ll use the pre-built Quandl bundle to ingest price data between 2000 and 2018 for free.
! set QUANDL_API_KEY=os.getenv('QUANDL_API_KEY')
! zipline ingest -b quandl
# You will see Zipline working its magic to download the data and package it into highly efficient data stores.

zsh:1: unknown file attribute: Q
[2023-05-27T09:59:44-0600-INFO][zipline.data.bundles.core]
 Ingesting quandl
[2023-05-27T09:59:44-0600-INFO][zipline.data.bundles.quandl]
 Downloading WIKI metadata.
[?25lDownloading WIKI Prices table from Quandl  [------------------------------------]    0%  05:38:55

In [3]:
# Building the algorithm

# Every Zipline strategy must have an initialize function. This is run at the beginning of the strategy.

# Here, you set a counter to track the days, the symbol to trade, and set the commission and slippage models.

def initialize(context):
    context.i = 0
    context.asset = symbol("AAPL")

    context.set_commission(commission.PerShare(cost=0.01))
    context.set_slippage(slippage.FixedSlippage(spread=0.01))
# Every Zipline strategy must also have a handle_data function.

# This function is run at every “bar.” Depending on your data, it might run every minute or day. handle_data is where your strategy logic lives.

# In today’s example, you will build a simple dual-moving average cross-over strategy.

def handle_data(context, data):
    # Skip first 50 days to get full windows
    context.i += 1
    if context.i < 50:
        return

    # Compute averages
    # data.history() has to be called with the same params
    # from above and returns a pandas dataframe.
    short_mavg = data.history(
        context.asset,
        "price",
        bar_count=14,
        frequency="1d"
    ).mean()

    long_mavg = data.history(
        context.asset,
        "price",
        bar_count=50,
        frequency="1d"
    ).mean()

    # Trading logic
    if short_mavg > long_mavg:
        # order_target orders as many shares as needed to
        # achieve the desired number of shares.
        order_target(context.asset, 100)
    elif short_mavg < long_mavg:
        order_target(context.asset, 0)
# Use the counter to make sure there is enough data to compute the moving averages. If not, skip processing for the day.

# If there is enough data, get 14 and 50 days’ worth of prices and calculate the moving average.

# Then, execute the trading logic.

# When the 14-day moving average crosses over a 50-day moving average, the strategy buys 100 shares. When the 14-day moving average crosses under the 50-moving average, it sells them.

In [4]:
# Run the backtest

# The first step is to define the start and end dates.

start = pd.Timestamp('2000')
end = pd.Timestamp('2018')
# Then, get data to compare your strategy with the S&P 500.

sp500 = web.DataReader('SP500', 'fred', start, end).SP500
benchmark_returns = sp500.pct_change()
# Finally, run the backtest.

perf = run_algorithm(
    start=start,
    end=end,
    initialize=initialize,
    handle_data=handle_data,
    # analyze=analyze,
    capital_base=100000,
    benchmark_returns=benchmark_returns,
    bundle="quandl",
    data_frequency="daily",
)
# Take a minute to explore the data in the perf DataFrame. There are 40 columns of rolling analytics! That’s the power of Zipline.

ValueError: SQLite file '/Users/baslad01/.zipline/data/quandl/2023-05-27T15;44;29.025999/assets-7.sqlite' doesn't exist.

In [5]:
# Analyze performance

# Now that the backtest is finished, use PyFolio to get a breakdown of the results.

returns, positions, transactions = \
    pf.utils.extract_rets_pos_txn_from_zipline(perf)

pf.create_full_tear_sheet(
    returns,
    positions=positions,
    transactions=transactions,
    live_start_date="2016-01-01",
    round_trips=True,
)
# This creates a full tear sheet based on your backtest results. There’s a ton of information here, but here are the highlights:

NameError: name 'perf' is not defined