In [6]:
#importing necessary libraries 
import numpy as np
import pandas as pd
import os
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import alpaca_trade_api as tradeapi
import yfinance as yf
import seaborn as sns #library not from class
%matplotlib inline

In [7]:
#Load .env environment variables
from dotenv import load_dotenv
load_dotenv()


True

In [8]:
# Set a random seed for reproducibility
np.random.seed(40)

In [10]:
# Set Alpaca API key and secret
APCA_API_KEY_ID = os.getenv("APCA_API_KEY")
APCA_API_SECRET_KEY = os.getenv("APCA_SECRET_KEY")
ALPACA_ENDPOINT_KEY = os.getenv("ALPACA_END_POINT")

#ensuring api keys are correct 
#print(os.getenv("APCA_API_KEY_ID"))
#print(os.getenv("APCA_API_SECRET_KEY"))
#print(os.getenv("ALPACA_ENDPOINT_KEY"))

# Create the Alpaca API object
alpaca = tradeapi.REST(APCA_API_KEY_ID, APCA_API_SECRET_KEY, api_version="v2",base_url= ALPACA_ENDPOINT_KEY)

## Part 1 - Portfolio Optimization 


In [29]:
#initializing variables
initial_investment = 10000
print(f'The initial investment is: ${initial_investment}')

stock_tickers = ['AAPL', 'AMZN', 'MSFT']  # Replace with your desired stock symbols
ticker_string = ', '.join(stock_tickers)
print(f'The selected stocks are: {stock_tickers}')

start_date = '2020-11-08'  # Set your desired start date
print(f'The start date is: {start_date}')

end_date = '2023-11-08'  # Set your desired start date
print(f'The start date is: {start_date}')

num_simulations = 500 
print(f'The number of monte carlo simulations is: {num_simulations}')

The initial investment is: $10000
The selected stocks are: ['AAPL', 'TSLA', 'MSFT']
The start date is: 2020-11-08
The start date is: 2020-11-08
The number of monte carlo simulations is: 500


In [36]:
# Importing Data and cleaning
# CODE
spy = "SPY"
list_of_tickers = stock_tickers + [spy]  # Combine stock tickers and 'SPY' into a list
ticker_string_for_query = ', '.join(list_of_tickers)  # Create a string for query display

start_date = pd.Timestamp(start_date, tz='America/New_York').isoformat()
end_date = pd.Timestamp(end_date, tz='America/New_York').isoformat()

# Remove 'after' and 'until' arguments from the function call
df = alpaca.get_bars(
    list_of_tickers, 
    timeframe,
    limit=None,
    start=start_date,
    end=end_date,
).df

# Print the entire DataFrame to inspect its structure
print("DataFrame from Alpaca:")
print(df)

df = df.sort_index()

# Print the columns to inspect what columns are present before the drop operation
print("\nColumns before drop:", df.columns)

# Drop columns only if they exist in the DataFrame
columns_to_drop = [col for col in list_of_tickers if col in df.columns]
df = df.drop(columns=columns_to_drop, errors='ignore')

df.index = df.index.date
df.dropna(inplace=True)

# Check if 'SPY' column exists before accessing it
if spy in df.columns:
    spy_df = df[spy]
    df.drop(columns=spy, inplace=True)
    print(df.tail())
else:
    print(f"{spy} column not present in the DataFrame.")


DataFrame from Alpaca:
                            close      high     low  trade_count    open  \
timestamp                                                                  
2020-11-09 05:00:00+00:00  116.32  121.9900  116.05       997694  120.50   
2020-11-10 05:00:00+00:00  116.00  117.5900  114.13       820549  115.55   
2020-11-11 05:00:00+00:00  119.49  119.6300  116.44       621700  117.19   
2020-11-12 05:00:00+00:00  119.21  120.5300  118.57       613393  119.62   
2020-11-13 05:00:00+00:00  119.26  119.6717  117.87       505192  119.44   
...                           ...       ...     ...          ...     ...   
2023-11-02 04:00:00+00:00  218.51  219.2000  211.45      1338281  212.97   
2023-11-03 04:00:00+00:00  219.96  226.3701  218.40      1247127  221.15   
2023-11-06 05:00:00+00:00  219.27  226.3200  215.00      1266125  223.98   
2023-11-07 05:00:00+00:00  222.18  223.1200  215.72      1141257  219.98   
2023-11-08 05:00:00+00:00  222.11  224.1500  217.64      1070149 