In [1]:
#These are the libraries you can use.  You may add any libraries directy related to threading if this is a direction
#you wish to go (this is not from the course, so it's entirely on you if you wish to use threading).  Any
#further libraries you wish to use you must email me, james@uwaterloo.ca, for permission.

from IPython.display import display, Math, Latex

import pandas as pd
import numpy as np
import numpy_financial as npf
import yfinance as yf
import matplotlib.pyplot as plt
import random
from datetime import datetime
from scipy.optimize import minimize
import time
from math import comb

## Group Assignment
### Team Number: 12
### Team Member Names: Sharuga, Derek, Alex
### Team Strategy Chosen: Market Meet

Disclose any use of AI for this assignment below (detail where and how you used it).  Please see the course outline for acceptable uses of AI.

CHECK DELIST:
- Purpose: checks that there is actually valid market data for this stock
- Code: if 'Close' not in data.columns or data['Close'].isnull().all():

PORTFOLIO GENERATOR
- Purpose: calculate how many shares of each stock to buy, and how much to pay in fees
- Code:

portfolio_value = 1000000 * weights  # Portfolio value based on optimized weights

shares = portfolio_val / np.array([ticker_prices[ticker] for ticker in selected_tickers])  # Number of shares

fee = np.minimum(3.95, 0.001 * shares)

portfolio_val -= fee


In [2]:
#October 01, 2023 to September 30, 2024, 2023

START_DATE = '2023-10-01'
END_DATE = '2024-11-20'

 # date on which we will take stock Close prices
TARGET_DATE = pd.to_datetime('2024-10-22').date()

INVESTMENT = 1000000

In [3]:
# read in CSV
csv = pd.read_csv('Tickers_Example.csv', header=None)

csv

Unnamed: 0,0
0,AAPL
1,ABBV
2,ABT
3,ACN
4,AGN
5,AIG
6,AMZN
7,AXP
8,BA
9,BAC


In [4]:
# filter out invalid stocks
def validity(tickers):

    # list to keep all of the valid stocks
    final_list = []

    for ticker in tickers:
        #only append the stock to the final stock list if it is isn't delisted, has a US or CAD currency, and has the required monthly share volume
        if (not check_delist(ticker) and
            check_currency(ticker) and
            check_volume(ticker)):
                final_list.append(ticker)
    return final_list

# checks if the consumed Ticker is delisted
def check_delist(ticker):
    stock = yf.Ticker(ticker)
    try:
        data = stock.history(period='1d')
        time.sleep(0.3)
        if data.empty:
            #if we can't find any data on the stock, it's delisted
            return True
        else:
            #check that there is actually valid market data for this stock
            if 'Close' not in data.columns or data['Close'].isnull().all():
                return True
            return False
    except Exception as e:
        #if there is an error in finding the stock's data, we can assume that it's delisted
        return True

# checks if the consumed Ticker meets the requirement of an average monthly share volume of 100,000 shares
def check_volume(ticker):
    volume = yf.Ticker(ticker).history(start=START_DATE, end='2024-09-30')['Volume']
    time.sleep(0.3)
    avg_monthly_volume = volume.resample('ME').mean()
    return avg_monthly_volume.mean() >= 100000

# checks whether or not the consumed Ticker is listed in CAD or in USD
def check_currency(ticker):
    stock = yf.Ticker(ticker)
    try:
        currency = stock.info.get('currency', None)
        return currency in ['USD', 'CAD']
    except Exception as e:
        return False  # if there's an error retrieving the data, we can assume it doesnt satisfy the required monthly volume

# create a new DataFrame only containing the valid Tickers in the consumed CSV
all_data = pd.DataFrame(validity(csv))
all_data.rename(columns = {0:'Ticker'}, inplace=True)



AttributeError: 'int' object has no attribute 'upper'

In [None]:
def days_to_drop(markets):

    markets.dropna(inplace=True)
    months = markets.resample('MS').bfill().index
    
    dates = []

    for month in months:
        temp = markets[(markets.index.month==month.month) & (markets.index.year==month.year)].index
        if len(temp) < 18:
            dates.extend(temp)
    
    return dates

In [None]:
ticker_returns = pd.DataFrame(columns=all_data['Ticker'])

# calculate and store all returns for all all Tickers
for ticker in all_data['Ticker']:
        data = yf.Ticker(ticker).history(start=START_DATE, end=END_DATE)['Close']
        time.sleep(0.3)
        ticker_returns[ticker] = data.pct_change().dropna()  # this code calculates daily returns
        
ticker_returns.index = pd.to_datetime(ticker_returns.index.strftime("%Y-%m-%d"))

ticker_returns

In [None]:
def std_rank(returns):
    std_devs = returns.std()
    
    # Create a DataFrame with tickers and their respective standard deviations
    ranked_stocks = pd.DataFrame({
        "ticker": std_devs.index,
        "STD": std_devs.values
    })
    
    # Sort by standard deviation in ascending order (lowest STD at the top)
    ranked_stocks = ranked_stocks.sort_values(by="STD", ascending=True)
    
    return ranked_stocks['ticker'].values[:24].tolist()

In [None]:

ticker_lst = std_rank(ticker_returns)
print(ticker_lst)
index_lst = ["XIU.TO","^GSPC"]

exch_rate = yf.download("CADUSD=x",start="2024-11-20", end="2024-11-21")["Close"].iloc[0] #MUST BE CHANGED TO PULL NOVEMBER 22ND

#Takes a yf.Ticker "ticker", checks if the ticker is in canadian dollars. If so, it will just do a simple API pull for the price history.
#If the price is listed in USD, a conversion operation will simply be applied to each price.
def yfin_pull_convert_USD(ticker):
    listed_currency = ticker.fast_info["currency"]
    if listed_currency == "CAD":
        return ticker.history(start=START_DATE, end=END_DATE, interval="1d")["Close"]
    if listed_currency == "USD":
        return ticker.history(start=START_DATE, end=END_DATE, interval="1d")["Close"] * (1/exch_rate)
    else: print("ticker currency is not in USD or CAD error")

#Test cases
#display(yf.Ticker("NVDA").history(start=START_DATE, end=END_DATE, interval="1d")["Close"]
#yfin_pull_convert_USD(yf.Ticker("NVDA"))

#Main function to store the stock prices of the ticker
def gen_tickers(tlist, ilist):
    ret_dataframe = pd.DataFrame()
    check_month_dataframe = pd.DataFrame()
    for i_str in ilist:
        index_ticker = yf.Ticker(i_str)
        check_month_dataframe[i_str] = yfin_pull_convert_USD(index_ticker)
        ret_dataframe[i_str] = yfin_pull_convert_USD(index_ticker)
    
    for t_str in tlist:
        ticker = yf.Ticker(t_str)
        ret_dataframe[t_str] = yfin_pull_convert_USD(ticker)
    ret_dataframe.drop(days_to_drop(check_month_dataframe), inplace=True)
    return ret_dataframe

#Function call: stores a dataframe of index values and stock values
stock_values = gen_tickers(ticker_lst,index_lst)
# display(stock_values)

In [None]:
market_label = "Market Returns"
#Function that transforms a list of values into a list of percent returns.
#Also adds a column of the simple average returns of the index tickers.
def convert_pct_returns(stock_value_df):
    ret_dataframe = stock_value_df.interpolate().pct_change()
    ret_dataframe.dropna(inplace=True)
    ret_dataframe[market_label] = ret_dataframe[index_lst].mean(axis=1)
    df_col_order = index_lst + ["Market Returns"] + ticker_lst
    ret_dataframe = ret_dataframe[df_col_order]
    return ret_dataframe

stock_returns = convert_pct_returns(stock_values)
display(stock_returns)



In [None]:
#Creating the correlation matrix: takes a dataframe of values and a string of columns to drop to make the correlation matrix
def correl(data, dropvalue):
    data_marketdrop = data.drop(labels=dropvalue, axis=1)
    ret_corr = data_marketdrop.corr()
    return ret_corr

stock_correlations = correl(stock_returns, [market_label] + index_lst)
#Apply a format to the DataFrame
display(stock_correlations.style.background_gradient(cmap='RdYlGn_r'))


In [None]:
#finding the ticker coordinates of the lowest correlation value (AI)
def low_correl_ticker_pairs(correlation_matrix):
    if correlation_matrix.shape[0] > 1:
        correl_pairs = correlation_matrix.unstack()
        correl_pairs = correl_pairs[correl_pairs.index.get_level_values(0) != correl_pairs.index.get_level_values(1)]
        lowest_corr_pair = correl_pairs.idxmin()
        lowest_corr_value = correl_pairs.min()
        return list(lowest_corr_pair)
    else:
        remaining_column_ticker = correlation_matrix.columns[0]
        return [remaining_column_ticker]

#recursing through the entire correlation matrix to extract every correlation pair
def corr_pair_extract(correl_matrix):
    #creating a list to store low-correlation pairs into
    low_corr_pairs = []
    while correl_matrix.shape[0] > 0:
        pair = low_correl_ticker_pairs(correl_matrix)
        #removing the tickers after they are packaged into a correlation pair
        if isinstance(pair, str):
            ticker1 = pair
            correl_matrix = correl_matrix.drop(ticker1, axis=0)
            correl_matrix = correl_matrix.drop(ticker1, axis=1)
        else: 
            low_corr_pairs.append(pair)
            correl_matrix = correl_matrix.drop(pair, axis=0)
            correl_matrix = correl_matrix.drop(pair, axis=1)
    return low_corr_pairs

#running the correlation pairing function and storing it in variable "stock pairs"
stock_pairs = corr_pair_extract(stock_correlations)
print(stock_pairs)

In [None]:
#Arbitrarily assign a seed to keep results consistent
random.seed(21152764)

#This part creates lists of stocks, ranging from sizes 12 to 24, 5 of each size, which randomly selects stocks.
def make_port_list(stock_pairs):
    ret_list = []
    #iterates through the minimum selectable stock pairs (6) and the maximum (length of the stock pairs)
    print(len(stock_pairs))
    for i in range(6, len(stock_pairs)+1):
        print(i)
        #simple iteration, of 5 times
        for j in range(min(comb(len(stock_pairs), i), 5)):
            #generates a random list of indexes to pick from the stock pairs
            rand_list = random.sample(range(0,len(stock_pairs)), i)
            portfolio = []
            for k in rand_list:
                portfolio += stock_pairs[k]
            ret_list.append(portfolio)
    return ret_list

port_list = make_port_list(stock_pairs)

In [None]:
stock_values_just_date = stock_values
stock_values_just_date.index = pd.to_datetime(stock_values_just_date.index)
stock_values_just_date.index = stock_values_just_date.index.date

# Display the updated DataFrame
display(stock_values_just_date)


In [None]:
portfolio_data = []

def portfolio_generator():
    global portfolio_data

    # creating portfolios -- starting wtih 12 stocks, going upto 24 stocks
    for i in range(len(port_list)):

        # for the ith portfolio, select the first i tickers
        selected_tickers = port_list[i]
        
        # objective function -- will be used in optimization steps
        # returns the tracking error between the portfolio's returns and the market's returns
        def objective(weights):
            return np.std(np.dot(stock_returns[selected_tickers].values, weights) - stock_returns['Market Returns'].values)
            # return np.std(np.dot(stock_returns[selected_tickers].mean(), weights) - stock_returns['Market Returns'].dropna().mean())
        
        # initial weights are calcualted so that, to start, every stock has equal weight
        initial_weights = [1 / len(selected_tickers)] * len(selected_tickers)
        
        # this constraint ensures that all of the stock weights sum to 1 (100%)
        def sum_weights(weights):
            return sum(weights) - 1
        
        constraints = [{'type': 'eq', 'fun': sum_weights}]
        
        # the bounds for each stock are that each stock must have a minimum weight of 1/2n% and maximum weight of 15%
        bounds = [(1 / (2 * len(selected_tickers)), 0.15)] * len(selected_tickers)
        
        # optimize the portfolio weights
        result = minimize(objective, initial_weights, constraints=constraints, bounds=bounds)
        
        if result.success:

            weights = result.x
                        
            # calculate portfolio value and the number of shares to purchase
            portfolio_value = 1000000 * weights
            shares = portfolio_value / np.array([stock_values_just_date.at[TARGET_DATE, ticker] for ticker in selected_tickers]) 

            # calculate fees
            fee = np.minimum(3.95, 0.001 * shares)
            
            # subtract fees from portfolio value
            portfolio_value -= fee
            
            # Add to the results
            portfolio_data.append({
                'tickers': (selected_tickers),  # Tickers in the portfolio
                'weights': weights,  # Optimized weights
                'Tracking Error': result.fun,  # Standard deviation (risk)
                'Difference of Mean Returns': abs(np.dot(stock_returns[selected_tickers].values, weights).mean() - stock_returns['Market Returns'].values.mean()),
                'num_stocks': i,  # Number of stocks in the portfolio
                'total_cost': portfolio_value.sum() + sum(fee),  # Total cost after fees
                'shares_purchased': shares,  # Shares purchased
                'portfolio_value': portfolio_value.sum(),  # Total portfolio value after fees
                'final_fees': sum(fee)
            })

    return pd.DataFrame(portfolio_data)

portfolio_df = portfolio_generator()
# portfolio_df['weights'].iloc[0]

portfolio_df

When your code is run with our .csv file, it must create a DataFrame called “Portfolio_Final” where the index starts at 1 and ends at the number of stocks that your code chooses.  The headings must be as follows: Ticker, Price, Currency, Shares, Value, Weight. Ticker will be the ticker your code selected, Price is the price on November 25, 2023, Currency is either USD or CAD, Shares is the number of shares you purchased of that stock, Value is the total value of those shares, and Weight is the weight that the value of shares represents relative to the value of your portfolio (which again, must be $1,000,000 CAD).  You should show that your total adds to $1,000,000 CAD and that the weights add to 100%.  This DataFrame must be printed to the screen as the second to last output to the screen.  The final output to the screen will be a declaration to be detailed below.


In [None]:
temp = portfolio_df[portfolio_df['Difference of Mean Returns'] == portfolio_df['Difference of Mean Returns'].min()]


# Define columns for the final portfolio DataFrame
port = []

for i in range(len(temp['tickers'].iloc[0])):
    port.append({
        "Ticker": temp['tickers'].iloc[0][i],
        "Price": stock_values.loc[pd.to_datetime(TARGET_DATE).date(), temp['tickers'].iloc[0][i]],
        "Currency": yf.Ticker(temp['tickers'].iloc[0][i]).info['currency'],
        "Shares": temp['shares_purchased'].iloc[0][i],
        "Value": stock_values.loc[pd.to_datetime(TARGET_DATE).date(), temp['tickers'].iloc[0][i]] * temp['shares_purchased'].iloc[0][i],
        "Weight": temp['weights'].iloc[0][i]})

Portfolio_Final = pd.DataFrame(port)

# Reset index to start at 1
Portfolio_Final.index = Portfolio_Final.index + 1

Portfolio_Final

In [None]:
Portfolio_Final.to_csv("Stocks_Group_12.csv", sep='\t', encoding='UTF-8')

## Contribution Declaration

The following team members made a meaningful contribution to this assignment:

Alex, Derek & Sharuga