In [5]:
from IPython.display import display, Math, Latex

import pandas as pd
import numpy as np
import yfinance as yf
import matplotlib.pyplot as plt
import random as r
from datetime import datetime
import requests

## Group Assignment
### Team Number: 16
### Team Member Names: Caesar, Griffin, Harjosh
### Team Strategy Chosen: BEAT (Market Beat or Market Meet)

In [6]:
## Read in the CSV file we are given.
tickers_df = pd.read_csv("Tickers.csv", header=None)
tickers_list = tickers_df[0].tolist()
close = [] # Store the closing prices of valid stocks.
close_tickers = [] # Corresponding tickers to above.
## Temporarily assigned values to variables meant to store the first and last days of our data.
first_day = pd.to_datetime('2025-01-01')
last_day = pd.to_datetime('2018-01-01')
## The following loop iterates through the entire list of tickers we are given and adds all the valid
## ones into close and close_tickers to be used later.
for i in range (len(tickers_list)):
    stock = yf.Ticker(tickers_list[i])
    stock_hist = stock.history(period="3mo")
    ## We do not use empty data or data not in US or Canadian currency.
    if not stock_hist.empty and stock.info['currency'] in ["USD", "CAD"]:
        ## Checking monthly volume.
        stock_hist_copy = stock_hist.copy()
        stock_hist_copy.index = stock_hist_copy.index.tz_localize(None)
        stock_hist_copy['Month'] = stock_hist_copy.index.to_period("M")
        grouped = stock_hist_copy.groupby('Month')
        stock_hist.index = pd.to_datetime(stock_hist.index.strftime('%Y-%m-%d'))
        valid = False
        for month, data in grouped:
            if len(data) < 18: # Each month must have at least 18 days.
                continue
            avg_vol = data['Volume'].mean()
            if avg_vol >= 100000: # 100000 stocks also need to be traded.
                valid = True
        if valid: # Once all conditions are met, add stock and its ticker into its respective corresponding list.
            close.append(stock_hist['Close'])
            close_tickers.append(tickers_list[i])
        if stock_hist.index[len(stock_hist) - 1] > last_day: # Find last day of all data.
            last_day = stock_hist.index[len(stock_hist) - 1]
        elif stock_hist.index[0] < first_day: # Find first day of all data.
            first_day = stock_hist.index[0]
date = pd.date_range(start = first_day, end = last_day).strftime('%Y-%m-%d').values # Create index of the portfolio.
portfolio = pd.DataFrame({'Date': date}) # Initialize portfolio.
portfolio['Date'] = pd.to_datetime(portfolio['Date'])
portfolio = portfolio.set_index('Date')
for i in range (len(close)): # Add ticker data into portfolio.
    portfolio.insert(i, close_tickers[i], close[i])
portfolio = portfolio.dropna() # Remove entries with unusable data.

$AGN: possibly delisted; no price data found  (period=3mo) (Yahoo error = "No data found, symbol may be delisted")
$CELG: possibly delisted; no price data found  (period=3mo) (Yahoo error = "No data found, symbol may be delisted")
$MON: possibly delisted; no price data found  (period=3mo) (Yahoo error = "No data found, symbol may be delisted")
$RTN: possibly delisted; no price data found  (period=3mo) (Yahoo error = "No data found, symbol may be delisted")


In [7]:
def mpt(tickers, port):
    top_count = 15 # How many elements you want in the top list.
    top_sharpe = [] # Top N tickers by weight in the optimal portfolio for maximum Sharpe ratio.
    top_vol = [] # Bottom N tickers by volatility in the optimal portfolio for minimized risk.
    num_portfolios = 70000 # How many different randomly generated weighted portfolios we will test.
    num_assets = len(port.columns)
    return_portfolio = port.pct_change(fill_method=None)
    ## Empty lists to be filled during simulation.
    port_sharpe = []
    port_weights = []
    var_matrix = port.cov() # Covariance matrix of all the stocks in the portfolio.
    exp_return = port.mean() # Expected return.
    risk_free_rate = 0 # Assume no risk free rate.
    ## Will store the weights of each stock in the highest earning adjusted for risk portfolio and lowest volatility
    ## portfolio generated in the simulation, as well as their Sharpe ratios and volatilities respectively.
    weight_max_sharpe = 0
    max_sharpe = 0
    weight_min_vol = 0
    min_vol = 100000 # Very high number to ensure any actual stock volatility is lower.
    for i in range(num_portfolios):
        ## Randomly generate weights the normalize them so their sum is 1.
        weights = np.random.random(num_assets)
        weights = weights/np.sum(weights)
        port_weights.append(weights)
        ## Citation for the line of code below:
        port_exp_return = np.dot(weights, exp_return)
        ## Citation for the line of code below:
        port_vol = np.sqrt(np.dot(weights.T, np.dot(var_matrix, weights)))
        sharpe = port_exp_return / port_vol # Calculate Sharpe ratio.
        ## Update best portfolios if needed.
        if sharpe > max_sharpe:
            max_sharpe = sharpe
            weight_max_sharpe = weights.tolist()
        if port_vol < min_vol:
            min_vol = port_vol
            weight_min_vol = weights.tolist()
    ## The following loop returns the top top_count stocks in both of the best portfolios
    ## by how much of the portfolio they comprise by weight.
    for i in range(min(top_count, len(tickers))):
        temp1 = 0
        temp2 = 0
        temp1_index = 0
        temp2_index = 0
        ## Iterate through both lists at the same time since their lengths are the same.
        ## Replace highest weight stock at needed and store the index.
        for j in range(min(len(weight_max_sharpe), len(tickers))):
            if weight_max_sharpe[j] > temp1:
                temp1 = weight_max_sharpe[j]
                temp1_index = j
            if weight_min_vol[j] > temp2:
                temp2 = weight_min_vol[j]
                temp2_index = j
        ## Add the best stocks to their corresponding list and repeat.
        top_sharpe.append(tickers[temp1_index])
        top_vol.append(tickers[temp2_index])
        ## Ensure the stock cannot show up again in the list.
        weight_max_sharpe[weight_max_sharpe.index(temp1)] = -1
        weight_min_vol[weight_min_vol.index(temp2)] = -1
    return top_sharpe # Return the list of best stocks by Sharpe ratio.

**Algorithm Explanation:**
The function above is a simplified implementation of Modern Portfolio Theory. Using historical data for expected returns and covariance, the code estimates what combination of stock weightings would produce a portfolio with an optimal balance of risk and return by looking at its Sharpe ratio. It accounts for diversification inherently due to its use of the covariance matrix of returns for all the stocks. It differs from a more advanced approach by assuming no risk-free rate and using a Monte Carlo simulation approach to create the set of portfolios used rather than using an algorithm to directly compute an efficient frontier.

**Why?**
As our goal is to beat the market, we are not risk-averse in our strategy. Hence, instead of using MPT traditionally and choosing a portfolio with the best return for a specific risk, we choose the portfolio with the best return proportional to risk in general to maximize our potential gains. We decided to use this simplified MPT to ensure our final portfolio would end up being diversified as well as account for historical data, as we do have other approaches that simulate the future instead. The results of running this function are also an ordered list of stocks with a definitive hierarchy relative to each other. This is beneficial in choosing which stocks to put in our final portfolio later, as it gives us an idea of which stocks are definitively the best, as opposed to something like CAPM, which only produces a list of stocks that satisfy a condition, but with no further information on how they perform against each other.

The following is the implementation of the Capital Asset Pricing Model and the reference of the formula is linked below.
https://www.netsuite.com/portal/resource/articles/financial-management/capital-asset-pricing-model-capm.shtml#:~:text=The%20pieces%20of%20the%20CAPM,Ra%20being%20the%20expected%20return.


In [8]:
#This determines how far back data will be called for history
period = 5

#Capital Asset Pricing Model implementation
def capm(filtered_tickers_list):
    #Calculates Rf (risk free rate)
    #US Treasury note
    treasury_bond = yf.Ticker("^TNX")
    #Determines latest possible price
    treasury_bond_info = treasury_bond.history(period="1d")
    #Stores latest possible rate of US Treasury note 
    rf = treasury_bond_info["Close"].iloc[0]
    
    #Calculates Rm (market return)
    #S&P 500
    sp500 = yf.Ticker("^GSPC")
    #Checks history based on how long period is set
    sp500_info = sp500.history(period=(str(period)+"y"))
    #Stores the daily returns of the S&P 500 throughout the period 
    sp500_info['Daily Return'] = sp500_info['Close'].pct_change().dropna()
    #Gets an average per market days in percent form 
    rm = (sp500_info['Daily Return'].mean() * (len(sp500_info)/period)) * 100

    #Will store the chosen stocks
    stocks=[]
    #Calculates Beta (risk/volatility)
    #Goes through all tickers in list
    for i in range(len(filtered_tickers_list)):
        #Stores ticker
        ticker = yf.Ticker(filtered_tickers_list[i])
        #Finds market history values throughout period
        ticker_info = ticker.history(period=(str(period)+"y"))
        #Calculates daily returns of the stock
        ticker_info['Daily Return'] = ticker_info['Close'].pct_change().dropna()
        #Creates a dataframe of returns
        returns = pd.DataFrame()
        #Stores market's daily returns as previously calculated
        returns['Market'] = sp500_info['Daily Return']
        #Stores stock's daily returns
        returns['Stock'] = ticker_info['Daily Return']
        #Drops any missed values
        returns = returns.dropna()
        #Calculates covariance between the stock's returns and the market's
        covariance = np.cov(returns['Stock'], returns['Market'])[0][1]
        #Calculates variance of the market's returns
        market_variance = np.var(returns['Market'])
        #Calculates beta of the stock
        beta = covariance / market_variance
    
        #Stores capm based on formula
        capm = rf + beta * (rm - rf)
        #Checks if CAPM expected return is greater than the market return
        if (capm > rm):
            #Stores the stock into the chosen stock
            stocks.append(str(ticker.ticker))
    #Returns the stocks
    return stocks

**Algorithm Explanation:**
This use of the Capital Asset Pricing Model (CAPM) was chosen to build a list of stocks that would be expected to beat the market. Essentially, what the algorithm does is predict how much extra return someone would expect from a stock, given its level of risk compared to the market. The addition of Rf in the formula adds the minimum return expected for taking no market risk; the baseline return. The 
(Rm−Rf) is essentially the premium/additional return that an investor gets for investing in the market versus a risk-free asset (Rf). The final, and most important aspect, is the multiplication of Beta, which adjusts the market risk premium for the specific stock's level of risk. A higher Beta indicates that the stock is more volatile than the market, so it is expected to earn a higher return to compensate for that increased risk. Beta is calculated by looking at the daily returns of a specific stock and the market index (S&P 500) and examining the covariance of the two returns. It’s then divided by the market variance to create Beta.

**Why?**
When a stock is more volatile than the market, it reacts more strongly to market movements. For example, if the market were to go up 1%, the stock might go up 2%. We depend on this higher sensitivity to market movements to outperform the market. However, the main assumption of this method is that the market is going up, not down. If the market were to decline, the stock would underperform in this scenario and perform even worse than the market. This ties back to the concepts in Chapter 16 readings regarding risk and return. The higher the risk, the higher the expected return. If we wanted guaranteed returns, we could invest in a risk-free asset like government bonds. However, to outperform the market, we need to take on more risk than the market itself. The use of CAPM helps create a risk-adjusted return for a stock, which we compare to the S&P 500's return and store the stock if its expected return is higher.

In [9]:
def monte_carlo_simulation(ticker, filtered_tickers_list):
    num_simulations=50000
    num_days=7
    stock = yf.Ticker(ticker)
    stock_hist = stock.history(period="1y")
    # Get the percentage between the current and a prior element
    daily_returns = stock_hist['Close'].pct_change().dropna()
    mean_return = daily_returns.mean()
    std_dev = daily_returns.std()
    last_price = stock_hist['Close'].iloc[-1]
    # Store all the tryout
    simulations = []
    for simulation in range(num_simulations):
        # get the last market closing price
        prices = [last_price]
        # num_days = 7, we want to estimate the price after 7 days
        for day in range(num_days):
            # here we generate a random daily percentage change for the stock price
            # used a normal distribution with a mean equal to the stock's avg daily return
            # and a std to the volatility
            price_change = np.random.normal(mean_return, std_dev)

            # and then we calculate the new simulated price of the stock for the next day,
            # based on the previous day's price and the pct_change generated by the model
            new_price = prices[-1] * (1 + price_change)

            # Then we added it to the list
            prices.append(new_price)
        simulations.append(prices[-1])
    # Calculate the mean price value over 50000 times
    average_ending_price = np.mean(simulations)
    return average_ending_price, last_price

def mcs(filtered_tickers_list):
    # Store the filtered stock here
    each_stock_MCS = {}
    # Store the potential returns for each stock
    stock_potential_returns = {}
    # Call the function monte_carlo_simulation for each stock and calculate the expected return in
    # percentage.
    for stock in filtered_tickers_list:
        avg_future_price,last_price = monte_carlo_simulation(stock, filtered_tickers_list)
        expected_return_percentage = ((avg_future_price - last_price) / last_price) * 100
        stock_potential_returns[stock] = expected_return_percentage
    # Show the potential Percentage Returns for each stock
    for stock in each_stock_MCS:
        return_percentage = each_stock_MCS[stock]
    stock_list = list(stock_potential_returns.items())
    for i in range(len(stock_list)):
        max_index = i
        for j in range(i + 1, len(stock_list)):
            if stock_list[j][1] > stock_list[max_index][1]:
                max_index = j
        stock_list[i], stock_list[max_index] = stock_list[max_index], stock_list[i]
    
    # Store the top 15 potential returns stock
    top_15_stocks = stock_list[:15]
    
    # print it
    tickers_new = []
    for i in top_15_stocks:
        tickers_new.append(i[0])
    return tickers_new


**Algorithm Explanation:** 
To project stock price trajectories, the common practice is to use a Monte Carlo Simulation in which various scenarios are devised. 
This simulation was selected since it administers a 50,000 trial, 7-day simulation sequence, each set at the current stock value, and 
incorporates an average return randomly picked from a normal distribution derived from statistical market analysis. 
This broadens the scope of possible outcomes that can be considered, therefore revealing potential gains and losses. 
The resulting average price at the end of such simulations provides our predicted value which is a sound value given the chaos that’s existent
in the stock market.This method also integrates the returns and the volatility of each stock such that it is able to eliminate stocks that do 
not have potential return. Charts, confidence intervals and other types of visuals help us give shape to the data presented, thus reinforcing 
the rigorousness of the decision-making process. If nothing else, this enhances the chances that we end up with portfolio that has very high 
returns and moderate risks.

**Why?**
We chose the Monte Carlo Simulation because it provides us with a realistic perspective on how random stock prices are. 
By making 50,000 simulations which will span over a period of the next 7 days we are able to speculate what runs in every single stock. 
Unlike basic predictions, Monte Carlo utilizes the theory that many price changes are wholly random, and that they are reliant on the trends 
that one understands to be true of the market. This helps in estimating the gains but also the risks which is essential in having a comprehensive 
decision making process. It helps in knowing the stocks that are likely to do well and the ones that should be avoided.
In the end, Monte Carlo provides us with an even better view of the scenario, allowing us to look for high returns with a low risk strategy. 
The graphics and the graphs also make the decision making process easier which in turn enables us to formulate a more reliable portfolio which is 
based on evidence.


In [10]:
final_stocks = []
desired_len = 12 # Desired length of final portfolio.
## Make copies of the tickers and portfolio to be modified.
ticker_copy = close_tickers.copy()
portfolio_copy = portfolio.copy()
## Function to find the intersection of multiple lists.
def matches(lists):
    matches = list(set(lists[0]).intersection(lists[1]))
    lists = lists[1:]
    for i in lists:
        matches = list(set(i).intersection(matches))
    return matches
## Function to remove all elements of lst2 present in lst1.
def removeall(lst1, lst2):
    for i in lst1:
        if i in lst2:
            lst1.remove(i)
    return lst1
while len(final_stocks) < desired_len:
    stocks_1 = mcs(ticker_copy) # Monte Carlo Simulation results.
    stocks_2 = capm(ticker_copy) # Capital Asset Pricing Model results.
    stocks_3 = mpt(ticker_copy, portfolio_copy) # Modern Portfolio Theory results.
    stock_list = [stocks_1, stocks_2, stocks_3]
    match = matches(stock_list) # All stocks present in the results of every strategy.
    ticker_copy = removeall(ticker_copy, match) # Remove the tickers of the above stocks from the list of tickers we are using.
    for i in match: # Simultaneously add all stocks in match to our final stocks and remove them from the portfolio copy.
        if i in portfolio_copy.columns:
            final_stocks.append(i)
            portfolio_copy = portfolio_copy.drop(i, axis=1)
    if len(final_stocks) > desired_len: # Remove excess.
        final_stocks = final_stocks[:desired_len]

The code above produces a final list of tickers of the best stocks based on the combined outputs of all our strategies. It runs each of our strategies individually and compares their outputs, putting tickers that are present in all three into our final ticker list, removing them from the pool of tickers, and repeating the process until we have a final list of our desired length. We chose this approach because it allows us to pick stocks that are the best of all worlds. Every strategy covers a different base, and whatever ticker is omnipresent among them is optimal to beat the market as we desire.

In [11]:
#Purchases the ideal stocks from portfolio
def purchase(stocks_list):
    #Will store tickers
    stocks = []
    #Goes through all tickers in list
    for i in stocks_list:
        #Adds their ticker
        stocks.append(yf.Ticker(i))
    #The date as requested on assignment
    start_date = '2023-11-22'
    #Total funds of portfolio
    funds = 1000000
    #Funds allocated for each stock
    funds_stocks = funds/(len(stocks))
    #Will store info of final portfolio
    Portfolio_Final = pd.DataFrame()
    #Value of stock is same as funds allocated
    value = funds_stocks
    #Weight distributed eqaully
    weight = 100/(len(stocks))
    #Exchange rate ticker
    exchange_rate_ticker = yf.Ticker('USDCAD=x')
    #Stores latest possible exchange rate
    exchange_rate = exchange_rate_ticker.history(start=start_date)['Close'].iloc[-1]
    #Goes through all stocks
    for i in stocks:
        #Stores ticker
        ticker = i.ticker
        #Calculates latest price
        price = i.history(start=start_date)['Close'].iloc[-1]
        #Stores currency of stock
        currency = i.info['currency']
        #If currency is in USD
        if (currency == "USD"):
            #Gets price in CAD
            calculation_price = price * exchange_rate
        #If in CAD
        else:
            #Keeps price in CAD
            calculation_price = price
        #Checks if buying shares with individual share fee is greater than flat rate
        if (((funds_stocks/calculation_price)*0.001) > 3.95):
            #Uses flat rate (smaller value)
            shares = (funds_stocks-3.95)/calculation_price
        #Otherwise
        else:
            #Uses individual share fee rate
            shares = (funds_stocks/(calculation_price+0.001))
        #Creates a new row with information of the stock
        new_row = {'Ticker': ticker, 
                   'Price': price, 
                   'Currency': currency,
                   'Shares': shares,
                   'Value': round(value,2),
                   'Weight': str(round(weight,2)) + '%'}
        #Adds the row to the final portfolio
        Portfolio_Final = Portfolio_Final._append(new_row, ignore_index=True)
    #Creates a new row for the final totals
    new_row = {'Value': funds,
               'Weight': str(weight*(len(stocks)))+'%'}
    #Stores the totals
    Portfolio_Final = Portfolio_Final._append(new_row, ignore_index=True)
    #Will make index
    index = []
    #Goes through all indicies except last
    for i in range(len(Portfolio_Final.index)-1):
        #Adds one to index
        index.append(i+1)
    #Adds a total index
    index.append("Total")
    #Assigns the index
    Portfolio_Final.index = index
    #Returns the final portfolio
    return Portfolio_Final

#Creates the final portfolio based on final stocks from previous calculations
Portfolio_Final = purchase(final_stocks)
#Displays final portfolio
Portfolio_Final

Unnamed: 0,Ticker,Price,Currency,Shares,Value,Weight
1,BAC,47.0,USD,1268.256551,83333.33,8.33%
2,AXP,301.299988,USD,197.838785,83333.33,8.33%
3,BK,80.139999,USD,743.803762,83333.33,8.33%
4,USB,52.490002,USD,1135.609657,83333.33,8.33%
5,PYPL,86.769997,USD,686.971027,83333.33,8.33%
6,AMZN,197.119995,USD,302.398287,83333.33,8.33%
7,SHOP.TO,149.479996,CAD,557.484468,83333.33,8.33%
8,AAPL,229.869995,USD,259.315182,83333.33,8.33%
9,C,69.839996,USD,853.498822,83333.33,8.33%
10,QCOM,156.789993,USD,380.181744,83333.33,8.33%


The code above does not perform many calculations; however, some thinking still went into it. The main idea behind this section was to divide and distribute the funds equally among all stocks in the portfolio rather than analyzing different weights for each stock. There are strengths and weaknesses to this strategy. We believed this approach would automatically diversify the portfolio by treating all stocks equally, reducing the risk associated with any single stock. This negates *non-systematic (non-market) risk*. It also reduced the need for extensive analysis, allowing more time for our algorithm simulations to run.

However, a drawback is the missed potential for risk/return optimization and specific industry diversification. Some of our algorithms use risk to assess if a stock is more volatile than the S&P 500 index. For instance, we might find that the tech industry is much more volatile than others. In that case, it would be best to distribute the weight based on industry rather than across all stocks, as more tech stocks might appear in the portfolio. These stocks would likely be correlated due to their complementary nature within the same industry. However, our method prioritized a straightforward approach by diversifying between stocks rather than industries, thus leaving more focus on the algorithms selecting the stocks. This also ties back to Chapter 9, many of the stocks in the S&P500 index are controlled by a few stocks due to the equity and weight distribution of the index. By only investing in these high risk stocks, equally, we plan for all of them to out preform.

In [12]:
#Creates the stock final dataframe
def stocks_final(portfolio):
    #creates a dataframe
    Stocks_Final = pd.DataFrame()
    #will store index
    index = []
    #goes through all indexes in the portfolio except the last (total)
    for i in range(len(portfolio.index) - 1):
        #stores index
        index.append(i+1)
    #assigns index
    Stocks_Final.index = index
    #Keeps tickers
    Stocks_Final['Tickers'] = portfolio['Ticker']
    #Keeps shares bought
    Stocks_Final['Shares'] = portfolio['Shares']
    #retuns dataframe
    return Stocks_Final

#stores stocks final dataframe
Stocks_Final = stocks_final(Portfolio_Final)
#Creates csv file
Stocks_Final.to_csv('Stocks_Group_16.csv', index=False)

## Contribution Declaration

The following team members made a meaningful contribution to this assignment:

Caesar, Griffin, Harjosh