In [1]:
#import relevant libraries 
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import json
from pathlib import Path
import alpaca_trade_api as tradeapi
import os 
from dotenv import load_dotenv

%matplotlib inline

In [2]:
#engage API keys by activating .emv file
load_dotenv()

alpaca_api_key = os.getenv("Api_key")
alpaca_secret_key = os.getenv("Secret_key")

api = tradeapi.REST(alpaca_api_key, alpaca_secret_key, api_version='v2')


type(alpaca_api_key)

str

In [3]:
#load in historical data for S&P 500
sp500_df = api.alpha_vantage.historic_quotes('SPY', adjusted=True, output_format='pandas')

In [4]:
#DATA CLEANUP

#Select the column we need,  "adjusted close", and drop the others

sp500_df = sp500_df['5. adjusted close']
sp500_df.head()

date
2020-06-25    307.35
2020-06-24    304.09
2020-06-23    312.05
2020-06-22    310.62
2020-06-19    308.64
Name: 5. adjusted close, dtype: float64

In [5]:
#Sort earliest to latest so that .pct_change() function works right.

sp500_df.sort_index(inplace=True, ascending=True)
sp500_df.head()

date
2000-06-26    99.9424
2000-06-27    99.2056
2000-06-28    99.4832
2000-06-29    98.5435
2000-06-30    99.2910
Name: 5. adjusted close, dtype: float64

In [6]:
#create a dataframe for the daily returns (pct_change) values and concat with SP500 close
returns_df = sp500_df.pct_change()
concat_returns = pd.concat([sp500_df, returns_df], axis="columns", join="inner")
concat_returns.head()

Unnamed: 0_level_0,5. adjusted close,5. adjusted close
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2000-06-26,99.9424,
2000-06-27,99.2056,-0.007372
2000-06-28,99.4832,0.002798
2000-06-29,98.5435,-0.009446
2000-06-30,99.291,0.007585


In [7]:
#Change column names to avoid confusion
columns = ['S&P 500 close',
          'S&P 500 Daily Returns']
concat_returns.columns = columns
concat_returns.head()

Unnamed: 0_level_0,S&P 500 close,S&P 500 Daily Returns
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2000-06-26,99.9424,
2000-06-27,99.2056,-0.007372
2000-06-28,99.4832,0.002798
2000-06-29,98.5435,-0.009446
2000-06-30,99.291,0.007585


In [8]:
# Drop nulls
concat_returns.dropna(inplace=True)

#count nulls again
concat_returns.isnull().sum()

S&P 500 close            0
S&P 500 Daily Returns    0
dtype: int64

In [9]:
# Check for duplicate rows
returns_df.duplicated()


date
2000-06-26    False
2000-06-27    False
2000-06-28    False
2000-06-29    False
2000-06-30    False
              ...  
2020-06-19    False
2020-06-22    False
2020-06-23    False
2020-06-24    False
2020-06-25    False
Name: 5. adjusted close, Length: 5032, dtype: bool

In [10]:
# drop duplicates
returns_df.drop_duplicates(inplace=True)
returns_df.head()

date
2000-06-26         NaN
2000-06-27   -0.007372
2000-06-28    0.002798
2000-06-29   -0.009446
2000-06-30    0.007585
Name: 5. adjusted close, dtype: float64

In [1]:
# Grab just the `date` and `close` from the dataset
sp500_signals = sp500_df.copy()

# Generate the short and long moving averages (50 and 100 days, respectively)
sp500_signals["EWM20"] = pd.DataFrame.ewm(sp500_signals, span=20).mean()
sp500_signals["EWM50"] = pd.DataFrame.ewm(sp500_signals, span=50).mean()
signals_df["Signal"] = 0.0

# Generate the trading signal 0 or 1,
# where 0 is when the SMA50 is under the SMA100, and
# where 1 is when the SMA50 is higher (or crosses over) the SMA100
signals_df["Signal"][short_window:] = np.where(
    signals_df["SMA50"][short_window:] > signals_df["SMA100"][short_window:], 1.0, 0.0
)

# Calculate the points in time at which a position should be taken, 1 or -1
signals_df["Entry/Exit"] = signals_df["Signal"].diff()

signals_df.head(10)

NameError: name 'sp500_df' is not defined