In [1]:
#These are the libraries you can use.  You may add any libraries directy related to threading if this is a direction
#you wish to go (this is not from the course, so it's entirely on you if you wish to use threading).  Any
#further libraries you wish to use you must email me, james@uwaterloo.ca, for permission.

from IPython.display import display, Math, Latex

import pandas as pd
import numpy as np
import numpy_financial as npf
import yfinance as yf
import matplotlib.pyplot as plt
import random
from datetime import datetime
from scipy.optimize import minimize
import time ## REMOVE THIS LATER

## Group Assignment
### Team Number: 12
### Team Member Names: Sharuga, Derek, Alex
### Team Strategy Chosen: Market Meet

Disclose any use of AI for this assignment below (detail where and how you used it).  Please see the course outline for acceptable uses of AI.


In [14]:
START_DATE = '2023-11-25'
END_DATE = '2024-11-23'
INVESTMENT = 1_000_000


In [3]:

def validity(tickers):
    final_list = []
    for ticker in tickers:
        #only append the stock to the final stock list if it is valid
        if (not check_delist(ticker) and
            check_currency(ticker) and
            check_volume(ticker)):
                final_list.append(ticker)
    return final_list

def check_delist(ticker):
    stock = yf.Ticker(ticker)
    try:
        data = stock.history(period='1d')
        if data.empty:
            #if we can't find any data on the stock, it's delisted
            return True
        else:
            #check that there is actually valid market data for this stock - AI
            if 'Close' not in data.columns or data['Close'].isnull().all():
                #if there are no valid close prices, the stock is delisted
                return True
            return False
    except Exception as e:
        #if there is an error in finding the stock's data, we can assume that it's delisted
        return True

def check_volume(ticker):
    volume = yf.Ticker(ticker).history(period='1d')['Volume']
    avg_monthly_volume = volume.resample('ME').mean()
    return avg_monthly_volume.mean() >= 100000

def check_currency(ticker):
    """Check if the stock is in USD or CAD currency."""
    stock = yf.Ticker(ticker)
    try:
        currency = stock.info.get('currency', None)
        return currency in ['USD', 'CAD']
    except Exception as e:
        return False  # If error, assume it’s invalid

all_data = pd.read_csv('Tickers_Example.csv', header=None)

all_data = pd.DataFrame(validity(all_data[0]))
all_data.rename(columns = {0:'Ticker'}, inplace=True)

$AGN: possibly delisted; no price data found  (period=1d) (Yahoo error = "No data found, symbol may be delisted")
$CELG: possibly delisted; no price data found  (period=1d) (Yahoo error = "No data found, symbol may be delisted")
$MON: possibly delisted; no price data found  (period=1d) (Yahoo error = "No data found, symbol may be delisted")
$RTN: possibly delisted; no price data found  (period=1d) (Yahoo error = "No data found, symbol may be delisted")


In [4]:
# Create a list of all the stock betas (retrieved from yfinance) of stock tickers in a list
def get_betas(tickers):
    betas = []
    
    for ticker in tickers:
        betas.append(yf.Ticker(ticker).info.get('beta'))

    return betas

In [5]:
def get_sector(tickers):
    sectors = []

    for ticker in tickers:
        sectors.append(yf.Ticker(ticker).info.get('sector'))

    return sectors

In [6]:
# Add a column containing all the stock betas to [all_data]
all_data = all_data.assign(Beta=get_betas(all_data['Ticker']))
all_data = all_data.assign(Sector=get_sector(all_data['Ticker']))

all_data

Unnamed: 0,Ticker,Beta,Sector
0,AAPL,1.24,Technology
1,ABBV,0.613,Healthcare
2,ABT,0.722,Healthcare
3,ACN,1.245,Technology
4,AIG,1.069,Financial Services
5,AMZN,1.146,Consumer Cyclical
6,AXP,1.214,Financial Services
7,BA,1.572,Industrials
8,BAC,1.325,Financial Services
9,BB.TO,1.068,Technology


In [7]:
# Choose the max(24, data.length) stocks that match the market the closest
def filter_betas(data):
    # Remove any negative betas, as long as data ends with at least 12 characters
    data.sort_values(by='Beta', axis=0, inplace=True, kind='quicksort')
    data.drop(index=data[(data['Beta'] <= 0) & (len(data) > 12)].index, inplace=True)
    data.reset_index(drop=True, inplace=True)

    data['temp'] = abs(1-data['Beta'])
    data.sort_values(by='temp', axis=0, inplace=True, kind='quicksort')    # Sort data by temp
    data.drop(columns='temp', axis=1, inplace=True)    # Remove temp

    data.index = range(0, len(data))    # Reassign the index
    
    return data[:24] if len(data) > 24 else data    # Return first 24 elements of data if it is greater than 24, other wise return data

In [8]:
stocks = [x for _, x in all_data.groupby('Sector')]
stocks = list(map(filter_betas, stocks))
stocks

[  Ticker   Beta                  Sector
 0   T.TO  0.722  Communication Services,
   Ticker   Beta             Sector
 0   AMZN  1.146  Consumer Cyclical,
   Ticker   Beta              Sector
 0     MO  0.670  Consumer Defensive
 1     KO  0.620  Consumer Defensive
 2     PM  0.560  Consumer Defensive
 3    PEP  0.542  Consumer Defensive
 4     CL  0.415  Consumer Defensive
 5     PG  0.414  Consumer Defensive,
   Ticker   Beta              Sector
 0    USB  1.040  Financial Services
 1     BK  1.060  Financial Services
 2    AIG  1.069  Financial Services
 3  RY.TO  0.842  Financial Services
 4  TD.TO  0.822  Financial Services
 5    AXP  1.214  Financial Services
 6    BLK  1.311  Financial Services
 7    BAC  1.325  Financial Services
 8      C  1.426  Financial Services
 9   PYPL  1.436  Financial Services,
   Ticker   Beta      Sector
 0    ABT  0.722  Healthcare
 1    PFE  0.615  Healthcare
 2   ABBV  0.613  Healthcare
 3    UNH  0.591  Healthcare
 4    BMY  0.441  Healthcare
 5

In [9]:
all_data = filter_betas(all_data)
all_data.index = all_data.index + 1

In [15]:
tsx60_info = yf.Ticker('XIU.TO')
tsx60 = tsx60_info.history(start=START_DATE, end=END_DATE, interval='1d')

sp500_info = yf.Ticker('^GSPC')
sp500 = sp500_info.history(start=START_DATE, end=END_DATE, interval='1d')
sp500 = sp500.reindex(tsx60.index, method='nearest')

market_returns = (tsx60['Close'].pct_change() + sp500['Close'].pct_change())/2
market_returns.index = pd.to_datetime(market_returns.index.strftime('%Y-%m-%d'))
market_returns = market_returns.dropna()
market_returns_mean = market_returns.mean()

market_returns_df = pd.DataFrame(market_returns)

In [16]:
ticker_returns = pd.DataFrame(columns=all_data['Ticker'])

for ticker in all_data['Ticker']:
        data = yf.Ticker(ticker).history(start=START_DATE, end=END_DATE)['Close']
        ticker_returns[ticker] = data.pct_change().dropna()  # Calculate daily returns
        
ticker_returns.index = pd.to_datetime(ticker_returns.index.strftime("%Y-%m-%d"))

In [18]:
months = market_returns_df.resample('MS').bfill().index

for month in months:
    temp = market_returns_df[(market_returns_df.index.month == month.month) & (market_returns_df.index.year == month.year)].index
    if len(temp) < 18:
        market_returns_df.drop(index=temp, inplace=True)
        ticker_returns.drop(index=temp, inplace=True)

print([val for val in ticker_returns.index if val not in market_returns_df.index])

[Timestamp('2023-12-26 00:00:00'), Timestamp('2024-05-20 00:00:00'), Timestamp('2024-07-01 00:00:00'), Timestamp('2024-08-05 00:00:00'), Timestamp('2024-10-14 00:00:00')]


In [13]:
portfolio_data = []
def portfolio_generator(tickers):
    global portfolio_data
    
    ticker_prices = {}  # Dictionary to store the closing prices
    target_date = '2024-11-05'  # Target date for historical data
    
    # Fetch the historical data for each ticker
    for ticker in tickers:
        stock = yf.Ticker(ticker)
        history = stock.history(start=target_date, end='2024-11-06')
        time.sleep(0.3)  # Add a small delay to avoid rate limiting
        
        # Fetch the closing price for the target date
        if not history.empty and target_date in history.index.strftime('%Y-%m-%d'):
            close_price = history.loc[history.index.strftime('%Y-%m-%d') == target_date, 'Close'].values[0]
            ticker_prices[ticker] = close_price
        else:
            ticker_prices[ticker] = None  # No data available for that date
    
    # Optimization setup for portfolio sizes between 12 and 24
    for i in range(12, 25):
        selected_tickers = tickers[:i]  # Select the first i tickers
        
        # Define the objective function for optimization
        def objective(weights):
            return np.std(np.dot(ticker_returns[selected_tickers].dropna().values, weights) - market_returns_df.dropna().values)
        
        # Initial weights (equal distribution for the selected tickers)
        initial_weights = [1 / len(selected_tickers)] * len(selected_tickers)
        
        # Constraint to ensure the sum of weights is 1
        def sum_weights(weights):
            return sum(weights) - 1
        
        # Constraints for the optimization
        constraints = [{'type': 'eq', 'fun': sum_weights}]
        
        # Bounds for each stock: minimum weight of 1/2n% and max 15% weight
        bounds = [(1 / (2 * len(selected_tickers)), 0.15)] * len(selected_tickers)
        
        # Perform optimization
        result = minimize(objective, initial_weights, constraints=constraints, bounds=bounds)
        
        if result.success:

            weights = result.x  # Get the optimized weights
            
            # Initialize variables for storing results
            total_costs = []
            shares_purchased = []
            inv = 1000000 
            
            # # Loop through each ticker and calculate costs
            # for weight, ticker in zip(weights, selected_tickers):
            #     if ticker_prices.get(ticker) is not None:  # Ensure the ticker has a valid price
            #         # Investment allocated for this stock
            #         allocated_investment =  inv * weight
                    
            #         # Calculate the number of shares you can buy (allow fractional shares)
            #         num_shares = allocated_investment / ticker_prices[ticker]
                    
            #         # Calculate the fee: the smaller of $3.95 or $0.001 per share * number of shares
            #         fee_per_share = 0.001 * num_shares
            #         fee = min(3.95, fee_per_share)  # Apply the smaller fee
                    
            #         # Calculate the total cost for this stock (price * shares + fee)
            #         total_cost = (num_shares * ticker_prices[ticker]) + fee
                    
            #         # Add to results
            #         total_costs.append(total_cost)
            #         shares_purchased.append(num_shares)
                    
            #         # Update the remaining budget after deducting the total cost for this stock
            #         inv -= total_cost  # Decrease the investment by the total cost for this stock
            #     else:
            #         # If no price is available, skip this ticker
            #         total_costs.append(0)
            #         shares_purchased.append(0)
            
            # # Calculate portfolio value (final value of all stocks based on current price)
            # portfolio_value = 0
            # for shares, ticker in zip(shares_purchased, selected_tickers):
            #     if ticker in ticker_prices and ticker_prices[ticker] is not None:
            #         portfolio_value += shares * ticker_prices[ticker]  # Add the value of each stock

            portfolio_data.append({
                'tickers': ', '.join(selected_tickers),  # Tickers in the portfolio
                'weights': result.x,  # Optimized weights
                'STD Between Stock and Market Index': result.fun,  # Standard deviation (risk)
                'num_stocks': i, # Number of stocks in the portfolio
                'total_cost': total_cost,
                'shares_purchased': shares_purchased,
                'portfolio_value':portfolio_value
            })

    # Return the final portfolio data as a DataFrame
    return pd.DataFrame(portfolio_data)

# Example usage
  # Initialize the global portfolio data list
portfolio_df = portfolio_generator(all_data['Ticker'])  # Assuming all_data['Ticker'] is a list of tickers
portfolio_df


NameError: name 'total_cost' is not defined

In [None]:
            # weights = result.x  # Get the optimized weights
            
            # Initialize variables for storing results
            # total_costs = []
            # shares_purchased = []
            
            # # Loop through each ticker and calculate costs
            # for weight, ticker in zip(weights, selected_tickers):
            #     if ticker_prices.get(ticker) is not None:  # Ensure the ticker has a valid price
            #         # Investment allocated for this stock
            #         allocated_investment = inv * weight
                    
            #         # Calculate the number of shares you can buy (allow fractional shares)
            #         num_shares = allocated_investment / ticker_prices[ticker]
                    
            #         # Calculate the fee: the smaller of $3.95 or $0.001 per share * number of shares
            #         fee_per_share = 0.001 * num_shares
            #         fee = min(3.95, fee_per_share)  # Apply the smaller fee
                    
            #         # Calculate the total cost for this stock (price * shares + fee)
            #         total_cost = (num_shares * ticker_prices[ticker]) + fee
                    
            #         # Add to results
            #         total_costs.append(total_cost)
            #         shares_purchased.append(num_shares)
                    
            #         # Update the remaining budget after deducting the total cost for this stock
            #         inv -= total_cost  # Decrease the investment by the total cost for this stock
            #     else:
            #         # If no price is available, skip this ticker
            #         total_costs.append(0)
            #         shares_purchased.append(0)
            
            # # Calculate portfolio value (final value of all stocks based on current price)
            # portfolio_value = 0
            # for shares, ticker in zip(shares_purchased, selected_tickers):
            #     if ticker in ticker_prices and ticker_prices[ticker] is not None:
            #         portfolio_value += shares * ticker_prices[ticker]  # Add the value of each stock
            
            # Add the portfolio data to the results
            


In [None]:
# Function to calculate the purchasing fee depending on the number of shares
def calculate_fee(shares):
    return max(3.95, 0.001*shares)

In [None]:
# AI

def calculate_fee(shares):
    return max(3.95, 0.001*shares)

def allocate_with_fees(selected_stocks, prices, budget, max_shares=1000000):
    n = len(selected_stocks)
    initial_shares = np.full(n, budget / (n * prices.mean()))  # Initial guess
    
    # Objective: Minimize leftover budget after accounting for fees
    def objective(shares):
        cost = np.sum(prices * shares + np.vectorize(calculate_fee)(prices, shares))
        return abs(budget - cost)
    
    # Constraints: Non-negative shares
    bounds = [(0, max_shares)] * n
    
    # Solve
    result = minimize(objective, initial_shares, bounds=bounds)
    return np.round(result.x)  # Return rounded number of shares

In [None]:
# AI
def create_portfolio_with_fees(index_ticker="^GSPC", num_stocks=12, budget=1_000_000):
    # Step 1: Benchmark data
    benchmark = yf.Ticker(index_ticker)
    stock_list = pd.DataFrame(benchmark. ())['Symbol'].sample(num_stocks)
    
    # Step 2: Historical prices
    prices = {ticker: yf.Ticker(ticker).history(period="1d")['Close'].iloc[-1] for ticker in stock_list}
    prices = pd.Series(prices)
    
    # Step 3: Optimize shares with fees
    shares = allocate_with_fees(stock_list, prices, budget)
    
    # Step 4: Create portfolio
    portfolio = pd.DataFrame({
        "Stock": stock_list,
        "Price (CAD)": prices.values,
        "Shares": shares,
        "Investment (CAD)": prices.values * shares,
        "Fee (CAD)": [calculate_fee(price, share) for price, share in zip(prices.values, shares)]
    })
    
    portfolio["Total Cost (CAD)"] = portfolio["Investment (CAD)"] + portfolio["Fee (CAD)"]
    portfolio = portfolio[portfolio["Shares"] > 0]  # Remove zero-share stocks
    
    return portfolio, portfolio["Total Cost (CAD)"].sum()

In [None]:
portfolio_data

In [None]:
import pandas as pd
import numpy as np
from scipy.optimize import linprog

def generate_portfolio(stock_df, market_return, budget, fee_min=3.95, fee_rate=0.001):
    """
    Generate a portfolio that matches the average market return.

    Parameters:
    - stock_df (pd.DataFrame): DataFrame with columns ['Stock', 'MeanReturn', 'StdDev', 'Price'].
    - market_return (float): Target average market return.
    - budget (float): Total budget to allocate to stocks.
    - fee_min (float): Minimum fee per transaction.
    - fee_rate (float): Fee rate per share purchased.

    Returns:
    - dict: A dictionary with stock allocations and total cost.
    """
    # Extract relevant columns
    returns = stock_df['MeanReturn'].values
    prices = stock_df['Price'].values
    num_stocks = len(returns)

    # Objective: minimize total cost (negative return maximization for the problem setup)
    c = prices

    # Constraints
    A_eq = [
        returns,               # Ensure portfolio matches the market return
        np.ones(num_stocks)    # Ensure budget constraint
    ]
    b_eq = [market_return, budget]

    bounds = [(0, None) for _ in range(num_stocks)]  # No short-selling, no upper limit

    # Solve the linear program
    result = linprog(c, A_eq=A_eq, b_eq=b_eq, bounds=bounds, method='highs')

    if result.success:
        allocations = result.x
        total_cost = np.dot(allocations, prices)
        fees = np.sum(np.maximum(fee_min, fee_rate * allocations))
        return {
            'Allocations': {stock: alloc for stock, alloc in zip(stock_df['Stock'], allocations)},
            'TotalCost': total_cost,
            'Fees': fees,
            'PortfolioReturn': np.dot(allocations, returns) / total_cost,
        }
    else:
        raise ValueError("Optimization failed to find a valid portfolio.")
        
# Sample stock data
data = {
    'Stock': ['AAPL', 'MSFT', 'GOOG', 'TSLA'],
    'MeanReturn': [0.12, 0.08, 0.15, 0.10],
    'StdDev': [0.20, 0.25, 0.30, 0.35],
    'Price': [150, 300, 2800, 700]
}
stock_df = pd.DataFrame(data)

market_return = 0.10  # Target market return
budget = 1_000_000   # Budget in dollars

portfolio = generate_portfolio(stock_df, market_return, budget)
print(portfolio)



## Contribution Declaration

The following team members made a meaningful contribution to this assignment:

Insert Names Here.