In [224]:
#These are the libraries you can use.  You may add any libraries directy related to threading if this is a direction
#you wish to go (this is not from the course, so it's entirely on you if you wish to use threading).  Any
#further libraries you wish to use you must email me, james@uwaterloo.ca, for permission.

from IPython.display import display, Math, Latex

import pandas as pd
import numpy as np
import numpy_financial as npf
import yfinance as yf
import matplotlib.pyplot as plt
import random
from datetime import datetime
from scipy.optimize import minimize
import time ## REMOVE THIS LATER

## Group Assignment
### Team Number: 12
### Team Member Names: Sharuga, Derek, Alex
### Team Strategy Chosen: Market Meet

Disclose any use of AI for this assignment below (detail where and how you used it).  Please see the course outline for acceptable uses of AI.


In [225]:
START_DATE = '2023-11-25'
END_DATE = '2024-11-23'
INVESTMENT = 1000000


In [226]:

def validity(tickers):
    final_list = []
    for ticker in tickers:
        #only append the stock to the final stock list if it is valid
        if (not check_delist(ticker) and
            check_currency(ticker) and
            check_volume(ticker)):
                final_list.append(ticker)
    return final_list

def check_delist(ticker):
    stock = yf.Ticker(ticker)
    try:
        data = stock.history(period='1d')
        if data.empty:
            #if we can't find any data on the stock, it's delisted
            return True
        else:
            #check that there is actually valid market data for this stock - AI
            if 'Close' not in data.columns or data['Close'].isnull().all():
                #if there are no valid close prices, the stock is delisted
                return True
            return False
    except Exception as e:
        #if there is an error in finding the stock's data, we can assume that it's delisted
        return True

def check_volume(ticker):
    volume = yf.Ticker(ticker).history(period='1d')['Volume']
    avg_monthly_volume = volume.resample('ME').mean()
    return avg_monthly_volume.mean() >= 100000

def check_currency(ticker):
    """Check if the stock is in USD or CAD currency."""
    stock = yf.Ticker(ticker)
    try:
        currency = stock.info.get('currency', None)
        return currency in ['USD', 'CAD']
    except Exception as e:
        return False  # If error, assume it’s invalid

all_data = pd.read_csv('Tickers_Example.csv', header=None)

all_data = pd.DataFrame(validity(all_data[0]))
all_data.rename(columns = {0:'Ticker'}, inplace=True)

$AGN: possibly delisted; no price data found  (period=1d) (Yahoo error = "No data found, symbol may be delisted")
$CELG: possibly delisted; no price data found  (period=1d) (Yahoo error = "No data found, symbol may be delisted")
$MON: possibly delisted; no price data found  (period=1d) (Yahoo error = "No data found, symbol may be delisted")
$RTN: possibly delisted; no price data found  (period=1d) (Yahoo error = "No data found, symbol may be delisted")


In [227]:
# Create a list of all the stock betas (retrieved from yfinance) of stock tickers in a list
def get_betas(tickers):
    betas = []
    
    for ticker in tickers:
        betas.append(yf.Ticker(ticker).info.get('beta'))

    return betas

In [228]:
def get_sector(tickers):
    sectors = []

    for ticker in tickers:
        sectors.append(yf.Ticker(ticker).info.get('sector'))

    return sectors

In [229]:
# Add a column containing all the stock betas to [all_data]
all_data = all_data.assign(Beta=get_betas(all_data['Ticker']))
all_data = all_data.assign(Sector=get_sector(all_data['Ticker']))

all_data

Unnamed: 0,Ticker,Beta,Sector
0,AAPL,1.24,Technology
1,ABBV,0.613,Healthcare
2,ABT,0.722,Healthcare
3,ACN,1.245,Technology
4,AIG,1.069,Financial Services
5,AMZN,1.146,Consumer Cyclical
6,AXP,1.214,Financial Services
7,BA,1.572,Industrials
8,BAC,1.325,Financial Services
9,BB.TO,1.068,Technology


In [230]:
# Choose the max(24, data.length) stocks that match the market the closest
def filter_betas(data):
    # Remove any negative betas, as long as data ends with at least 12 characters
    data.sort_values(by='Beta', axis=0, inplace=True, kind='quicksort')
    data.drop(index=data[(data['Beta'] <= 0) & (len(data) > 12)].index, inplace=True)
    data.reset_index(drop=True, inplace=True)

    data['temp'] = abs(1-data['Beta'])
    data.sort_values(by='temp', axis=0, inplace=True, kind='quicksort')    # Sort data by temp
    data.drop(columns='temp', axis=1, inplace=True)    # Remove temp

    data.index = range(0, len(data))    # Reassign the index
    
    return data[:24] if len(data) > 24 else data    # Return first 24 elements of data if it is greater than 24, other wise return data

In [231]:
stocks = [x for _, x in all_data.groupby('Sector')]
stocks = list(map(filter_betas, stocks))
stocks

[  Ticker   Beta                  Sector
 0   T.TO  0.722  Communication Services,
   Ticker   Beta             Sector
 0   AMZN  1.146  Consumer Cyclical,
   Ticker   Beta              Sector
 0     MO  0.670  Consumer Defensive
 1     KO  0.620  Consumer Defensive
 2     PM  0.560  Consumer Defensive
 3    PEP  0.542  Consumer Defensive
 4     CL  0.415  Consumer Defensive
 5     PG  0.414  Consumer Defensive,
   Ticker   Beta              Sector
 0    USB  1.040  Financial Services
 1     BK  1.060  Financial Services
 2    AIG  1.069  Financial Services
 3  RY.TO  0.842  Financial Services
 4  TD.TO  0.822  Financial Services
 5    AXP  1.214  Financial Services
 6    BLK  1.311  Financial Services
 7    BAC  1.325  Financial Services
 8      C  1.426  Financial Services
 9   PYPL  1.436  Financial Services,
   Ticker   Beta      Sector
 0    ABT  0.722  Healthcare
 1    PFE  0.615  Healthcare
 2   ABBV  0.613  Healthcare
 3    UNH  0.591  Healthcare
 4    BMY  0.441  Healthcare
 5

In [232]:
all_data = filter_betas(all_data)
all_data.index = all_data.index + 1

In [233]:
tsx60_info = yf.Ticker('XIU.TO')
tsx60 = tsx60_info.history(start=START_DATE, end=END_DATE, interval='1d')

sp500_info = yf.Ticker('^GSPC')
sp500 = sp500_info.history(start=START_DATE, end=END_DATE, interval='1d')
sp500 = sp500.reindex(tsx60.index, method='nearest')

market_returns = (tsx60['Close'].pct_change() + sp500['Close'].pct_change())/2
market_returns.index = pd.to_datetime(market_returns.index.strftime('%Y-%m-%d'))
market_returns = market_returns.dropna()
market_returns_mean = market_returns.mean()

market_returns_df = pd.DataFrame(market_returns)

display(market_returns_df.index)

DatetimeIndex(['2023-11-28', '2023-11-29', '2023-11-30', '2023-12-01',
               '2023-12-04', '2023-12-05', '2023-12-06', '2023-12-07',
               '2023-12-08', '2023-12-11',
               ...
               '2024-11-07', '2024-11-08', '2024-11-11', '2024-11-12',
               '2024-11-13', '2024-11-14', '2024-11-15', '2024-11-18',
               '2024-11-19', '2024-11-20'],
              dtype='datetime64[ns]', name='Date', length=247, freq=None)

In [234]:
ticker_returns = pd.DataFrame(columns=all_data['Ticker'])

for ticker in all_data['Ticker']:
        data = yf.Ticker(ticker).history(start=START_DATE, end=END_DATE)['Close']
        ticker_returns[ticker] = data.pct_change().dropna()  # Calculate daily returns
        
ticker_returns.index = pd.to_datetime(ticker_returns.index.strftime("%Y-%m-%d"))

In [235]:
months = market_returns_df.resample('MS').bfill().index

for month in months:
    temp = market_returns_df[(market_returns_df.index.month == month.month) & (market_returns_df.index.year == month.year)].index
    if len(temp) < 18:
        market_returns_df.drop(index=temp, inplace=True)
        ticker_returns.drop(index=temp, inplace=True)

print([val for val in ticker_returns.index if val not in market_returns_df.index])

print(len(market_returns_df))
print(len(ticker_returns))


[Timestamp('2023-12-26 00:00:00'), Timestamp('2024-05-20 00:00:00'), Timestamp('2024-07-01 00:00:00'), Timestamp('2024-08-05 00:00:00'), Timestamp('2024-10-14 00:00:00')]
230
231


In [19]:
portfolio_data = []
def portfolio_generator(tickers):
    global portfolio_data
    
    ticker_prices = {}  # Dictionary to store the closing prices
    target_date = '2024-11-05'  # Target date for historical data
    
    # Fetch the historical data for each ticker
    for ticker in tickers:
        stock = yf.Ticker(ticker)
        history = stock.history(start=target_date, end='2024-11-06')
        time.sleep(0.3)  # Add a small delay to avoid rate limiting
        
        # Fetch the closing price for the target date
        if not history.empty and target_date in history.index.strftime('%Y-%m-%d'):
            close_price = history.loc[history.index.strftime('%Y-%m-%d') == target_date, 'Close'].values[0]
            ticker_prices[ticker] = close_price
        else:
            ticker_prices[ticker] = None  # No data available for that date
    
    # Optimization setup for portfolio sizes between 12 and 24
    for i in range(12, 25):
        selected_tickers = tickers[:i]  # Select the first i tickers
        
        # Define the objective function for optimization
        def objective(weights):
            return np.std(np.dot(ticker_returns[selected_tickers].dropna().values, weights) - market_returns_df.dropna().values)
        
        # Initial weights (equal distribution for the selected tickers)
        initial_weights = [1 / len(selected_tickers)] * len(selected_tickers)
        
        # Constraint to ensure the sum of weights is 1
        def sum_weights(weights):
            return sum(weights) - 1
        
        # Constraints for the optimization
        constraints = [{'type': 'eq', 'fun': sum_weights}]
        
        # Bounds for each stock: minimum weight of 100/2n% and max 15% weight
        bounds = [(1 / (2 * len(selected_tickers)), 0.15)] * len(selected_tickers)
        
        # Perform optimization
        result = minimize(objective, initial_weights, constraints=constraints, bounds=bounds)
        
        if result.success:

            weights = result.x  # Get the optimized weights
            
            # Initialize variables for storing results
            total_costs = []
            shares_purchased = []
            inv = 1000000  # Investment amount
            
            # Calculate portfolio value and shares purchased, along with fees
            portfolio_val = 1000000 * weights  # Portfolio value based on optimized weights
            shares = portfolio_val / np.array([ticker_prices[ticker] for ticker in selected_tickers])  # Number of shares
            
            # Calculate fees: minimum of $3.95 or $0.001 per share * number of shares
            fee = np.minimum(3.95, 0.001 * shares)
            
            # Subtract the fees from the portfolio value
            portfolio_val -= fee
            
            # Add to the results
            portfolio_data.append({
                'tickers': ', '.join(selected_tickers),  # Tickers in the portfolio
                'weights': result.x,  # Optimized weights
                'STD Between Stock and Market Index': result.fun,  # Standard deviation (risk)
                'num_stocks': i,  # Number of stocks in the portfolio
                'total_cost': portfolio_val.sum() + sum(fee),  # Total cost after fees
                'shares_purchased': shares,  # Shares purchased
                'portfolio_value': portfolio_val.sum(),  # Total portfolio value after fees
                'final_fees': sum(fee)
            })

    # Return the final portfolio data as a DataFrame
    return pd.DataFrame(portfolio_data)

# Example usage
  # Initialize the global portfolio data list
portfolio_df = portfolio_generator(all_data['Ticker'])  # Assuming all_data['Ticker'] is a list of tickers
portfolio_df


Unnamed: 0,tickers,weights,STD Between Stock and Market Index,num_stocks,total_cost,shares_purchased,portfolio_value,final_fees
0,"UPS, TXN, USB, UNP, BK, BB.TO, AIG, CAT, AMZN,...","[0.04166666666666669, 0.041666666666666664, 0....",0.01009,12,1000000.0,"[315.1485102731415, 205.80196228854896, 875.16...",999987.616587,12.383413
1,"UPS, TXN, USB, UNP, BK, BB.TO, AIG, CAT, AMZN,...","[0.038461538461538464, 0.038461538461538464, 0...",0.009796,13,1000000.0,"[290.90631717520733, 189.97104211250675, 807.8...",999988.050784,11.949216
2,"UPS, TXN, USB, UNP, BK, BB.TO, AIG, CAT, AMZN,...","[0.03571428571428571, 0.03571428571428572, 0.0...",0.009649,14,1000000.0,"[270.12729451983535, 176.40168196161343, 750.1...",999988.526459,11.473541
3,"UPS, TXN, USB, UNP, BK, BB.TO, AIG, CAT, AMZN,...","[0.03333333333333333, 0.03333333333333334, 0.0...",0.009257,15,1000000.0,"[252.11880821851304, 164.64156983083922, 700.1...",999985.651217,14.348783
4,"UPS, TXN, USB, UNP, BK, BB.TO, AIG, CAT, AMZN,...","[0.031250000000000014, 0.03125000000000001, 0....",0.008987,16,1000000.0,"[236.36138270485606, 154.35147171641177, 656.3...",999985.45656,14.54344
5,"UPS, TXN, USB, UNP, BK, BB.TO, AIG, CAT, AMZN,...","[0.029411764705882353, 0.029411764705882363, 0...",0.009034,17,1000000.0,"[222.45777195751148, 145.27197338015228, 617.7...",999985.493942,14.506058
6,"UPS, TXN, USB, UNP, BK, BB.TO, AIG, CAT, AMZN,...","[0.027777777777777776, 0.027777777777777776, 0...",0.009032,18,1000000.0,"[210.09900684876084, 137.20130819236599, 583.4...",999985.676113,14.323887
7,"UPS, TXN, USB, UNP, BK, BB.TO, AIG, CAT, AMZN,...","[0.026315789473684216, 0.02631578947368421, 0....",0.009038,19,1000000.0,"[199.04116438303666, 129.98018670855726, 552.7...",999985.192128,14.807872
8,"UPS, TXN, USB, UNP, BK, BB.TO, AIG, CAT, AMZN,...","[0.025, 0.025000000000000015, 0.02500000000000...",0.008843,20,1000000.0,"[189.08910616388476, 123.48117737312946, 525.0...",999983.827113,16.172887
9,"UPS, TXN, USB, UNP, BK, BB.TO, AIG, CAT, AMZN,...","[0.02380952380952381, 0.023809523809523815, 0....",0.008642,21,1000000.0,"[180.0848630132236, 117.6011213077423, 500.095...",999982.878113,17.121887


## Contribution Declaration

The following team members made a meaningful contribution to this assignment:

Insert Names Here.