In [90]:
#These are the libraries you can use.  You may add any libraries directy related to threading if this is a direction
#you wish to go (this is not from the course, so it's entirely on you if you wish to use threading).  Any
#further libraries you wish to use you must email me, james@uwaterloo.ca, for permission.

from IPython.display import display, Math, Latex

import pandas as pd
import numpy as np
import numpy_financial as npf
import yfinance as yf
import matplotlib.pyplot as plt
import random
from datetime import datetime

## Group Assignment
### Team Number: 14
### Team Member Names: Aadya, Furqan, Pranay
### Team Strategy Chosen: Market Beat

In [91]:

input_tickers = pd.read_csv("Tickers_Example.csv", names=['Tickers']) #OR any other CSV file with tickers in it
input_tickers

Unnamed: 0,Tickers
0,AAPL
1,AAPL
2,ABBV
3,ABT
4,ABT
5,ACN
6,HSBC
7,AGN
8,AIG
9,AMZN


In [92]:
## PRELIMINARY DATA PROCESSING 
## AND INPUT PROCESSING

## WILL CSV BE EXACTLY THE WAY IT IS IN EXAMPLE??

## Check if implementation of US and Canadian ticker validation is sufficient/correct
## Should we do it through currency checks, or exchange checks as below?

def validate_ticker(ticker):
    try:
        stock = yf.Ticker(ticker)
        info = stock.info

        if info and 'regularMarketPrice' in info:
            if 'exchange' in info:
                exchange = info['exchange']
                # yfinance uses these exchange codes, so we check against them
                valid_exchanges = ['NMS', 'NGM', 'NYQ','NAD','PCX','BTS','CBQ','ASE', 'TOR', 'TSE', 'Toronto', 'TSXV', 'CNQ', 'CSE','NEO']
                if exchange in valid_exchanges:
                    return True
                else:
                    return False  
            else:
                return False  
        else:
            return False
    except:
        return False

valid_tickers = []
for ticker in input_tickers['Tickers']:
    if validate_ticker(ticker):
        valid_tickers.append(ticker)
        #print(f"✓ Valid: {ticker}")  
    else:
        print(f"✗ Invalid ticker: {ticker}")

valid_tickers_df = pd.DataFrame(valid_tickers, columns=['Ticker'])
valid_tickers_df

# ✗ Invalid ticker: AGN
# ✗ Invalid ticker: MC.PA
# ✗ Invalid ticker: CELG
# ✗ Invalid ticker: 9984.T
# ✗ Invalid ticker: MON
# ✗ Invalid ticker: RELIANCE.NS
# ✗ Invalid ticker: RTN
# ✗ Invalid ticker: INFY.NS
# ✗ Invalid ticker: CPA.AX

✗ Invalid ticker: AGN
✗ Invalid ticker: MC.PA
✗ Invalid ticker: CELG
✗ Invalid ticker: 9984.T
✗ Invalid ticker: MON
✗ Invalid ticker: RELIANCE.NS
✗ Invalid ticker: RTN
✗ Invalid ticker: INFY.NS
✗ Invalid ticker: CPA.AX


Unnamed: 0,Ticker
0,AAPL
1,AAPL
2,ABBV
3,ABT
4,ABT
5,ACN
6,HSBC
7,AIG
8,AMZN
9,AXP


In [93]:
# check for duplicate tickers

def check_duplicates(df):
    duplicates = df.duplicated()

    for index in duplicates.index:
        if duplicates[index] == True:
            print(f"Removing duplicate: {df.loc[index, 'Ticker']}")
            # remove the row from the dataframe
            df.drop(index, inplace=True)
    return df.reset_index().drop(columns=['index'])

check_duplicates(valid_tickers_df)

Removing duplicate: AAPL
Removing duplicate: ABT
Removing duplicate: BA


Unnamed: 0,Ticker
0,AAPL
1,ABBV
2,ABT
3,ACN
4,HSBC
5,AIG
6,AMZN
7,AXP
8,BA
9,BAC


In [94]:
## REMOVING STOCKS WITH AVG TRADE VOLUME < 5000 between Oct 1, 2024 and Sep 30, 2025 (drop months with < 18 trading days).
##CHECK to ensure, and filter out stocks that do not meet this criteria

def filter_by_avg_volume(df, start_date="2024-10-01", end_date="2025-09-30", min_avg_volume=5000, min_trading_days=18):
    filtered_tickers = []
    for ticker in df['Ticker']:
        stock = yf.Ticker(ticker)
        
        hist = stock.history(start=start_date, end=end_date)
        hist.index = hist.index.tz_localize(None)

        monthly_groups = hist.groupby(hist.index.to_period("M"))
        valid = True
        for month, group in monthly_groups:
            trading_days = len(group)
            if trading_days >= min_trading_days:
                avg_volume = group['Volume'].mean()
                if avg_volume < min_avg_volume:
                    print(f"✗ Ticker failed volume filter: {ticker} (Avg Volume: {avg_volume:.2f} in {month})")
                    valid = False
                    break
        if valid:
            print(f"✓ Ticker passed volume filter: {ticker}")
            filtered_tickers.append(ticker)
    return pd.DataFrame(filtered_tickers, columns=['Ticker'])

final_tickers_df = filter_by_avg_volume(valid_tickers_df)
final_tickers_df


✓ Ticker passed volume filter: AAPL
✓ Ticker passed volume filter: ABBV
✓ Ticker passed volume filter: ABT
✓ Ticker passed volume filter: ACN
✓ Ticker passed volume filter: HSBC
✓ Ticker passed volume filter: AIG
✓ Ticker passed volume filter: AMZN
✓ Ticker passed volume filter: AXP
✓ Ticker passed volume filter: BA
✓ Ticker passed volume filter: BAC
✓ Ticker passed volume filter: BB.TO
✓ Ticker passed volume filter: BIIB
✓ Ticker passed volume filter: BK
✓ Ticker passed volume filter: SAP
✓ Ticker passed volume filter: BLK
✓ Ticker passed volume filter: BMY
✓ Ticker passed volume filter: C
✓ Ticker passed volume filter: CAT
✓ Ticker passed volume filter: CL
✓ Ticker passed volume filter: SAN
✓ Ticker passed volume filter: KO
✓ Ticker passed volume filter: LLY
✓ Ticker passed volume filter: LMT
✓ Ticker passed volume filter: MO
✓ Ticker passed volume filter: MRK
✓ Ticker passed volume filter: PEP
✓ Ticker passed volume filter: PFE
✓ Ticker passed volume filter: PG
✓ Ticker passed volum

Unnamed: 0,Ticker
0,AAPL
1,ABBV
2,ABT
3,ACN
4,HSBC
5,AIG
6,AMZN
7,AXP
8,BA
9,BAC


In [95]:
#Downloading history for valid tickers

tickers_list =  valid_tickers_df['Ticker'].tolist()

stock_data= yf.download(tickers_list, period="6mo", interval="1d", group_by='ticker', auto_adjust=True, threads=True)

# Download benchmark data (S&P 500 + TSX average)
sp500_data = yf.download('^GSPC', period='6mo', auto_adjust=True)
tsx_data = yf.download('^GSPTSE', period='6mo', auto_adjust=True)

# checking the 'Close' prices to ensure they are Series
if isinstance(sp500_data, pd.DataFrame):
    sp500 = sp500_data['Close'].squeeze() 
else:
    sp500 = sp500_data

if isinstance(tsx_data, pd.DataFrame):
    tsx = tsx_data['Close'].squeeze()  
else:
    tsx = tsx_data

# Remove timezones
sp500.index = sp500.index.tz_localize(None)
tsx.index = tsx.index.tz_localize(None)


[*********************100%***********************]  40 of 40 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Scenario 1: High Alpha, Low Beta (< 1.0)

 - Stock outperforms with less volatility than market

 - Best case/ideal scenario 

 - Means it is a Defensive stock that somehow beats the market, low risk high return

Scenario 2: High Alpha, High Beta (> 1.5)

 - Stock outperforms but is very volatile

 - Risky but rewarding

 - Also high reward, but higher risk as well. Still viable option to help choose stocks that will beat market

 - Example: Tech stock in a bull market

Scenario 3: Low/Negative Alpha, High Beta

 - Stock underperforms and is volatile

 - Low return relative to market/less than market, AND high volatility so potential further loss on an already underperforming stock.  

 - Worst case 

 - Want to avoid these types of stocks 


WE WANT TO INVEST IN:

Stocks which fall within the first two scenarios, as they are the most ideal to maximize returns, scenario 1 being the most effective. Stocks which fall in scenario 3, will be avoided. 

In [None]:
## CALCULATING ALPHA AND SORTING TICKERS HIGH TO LOW BY ALPHA


#assuming we have a list of valid tickers
#we need a function that simply returns the alpha of a ticker
#then run a for loop that calls that function for every ticker
#while ranking it from best to worst in a list or dictionary maybe
#going to also assume we have the portfolio returns, risk free rate, expected market return, and the portfolio beta


# calculates beta of a stock 
# beta is cov (Rp, Rm) / var (Rm)
def calc_beta(ticker_returns, market_returns):
    covariance = ticker_returns.cov(market_returns)
    variance = market_returns.var()
    beta = covariance / variance
    return beta

#function to calculate alpha of a ticker given the following parameters
#returns the ticker and its alpha in a list
#Good alpha > 0; Means positive returns relative to the market
#Zero alpha (\(\alpha =0\)): The investment's return was in line with the benchmark index, meaning it did not outperform or underperform after adjusting for risk.
#Negative alpha (\(<0\)): The investment's performance was worse than its benchmark index, considering the risk taken. 
def calc_alpha(portfolio_return, risk_free_rate, expected_market_return, portfolio_beta): 
    tick_alpha_calculation = portfolio_return - (risk_free_rate + (portfolio_beta * (expected_market_return-risk_free_rate)))

    return tick_alpha_calculation

# calculating sharpe ratio 
# sharpe ratio of a stock is = (Rp - Rf) / σp
# Good sharpe ratio > 1: indicates good risk-adjusted performance
# Average sharpe ratio ~ 0.5 - 1: indicates moderate risk-adjusted performance
# Poor sharpe ratio < 0.5: indicates poor risk-adjusted performance
# if ticker_returns are daily, then risk free has to be daily as well
def calc_sharpe(ticker_returns, risk_free_rate):
    std = ticker_returns.std()
    mean_return = ticker_returns.mean()
    sharpe_ratio = (mean_return - risk_free_rate) / std
    return sharpe_ratio

# calculating sortino ratio 
# sortino ratio = (Rp - Rf) / σd when σd is the standard deviation of negative asset returns
# sortino ratio focuses only on downside volatility which helps eliminate 
# the risk of downside movements in asset prices
# Intuitively good return per unit of bad volatility
# Good sortino ratio > 0: good risk-adjusted performance with low downside 
# Zero sortino ratio = 0: no excess return relative to downside risk
# Low sortino ratio < 0: poor risk-adjusted performance with high downside risk

def calc_sortino(ticker_returns, risk_free_rate):
    downside_dev = ticker_returns[ticker_returns < 0].std()
    mean_return = ticker_returns.mean()
    sortino_ratio = (mean_return - risk_free_rate) / downside_dev
    return sortino_ratio


##Calculating momentum, shows how much a stock has returned relative to a chosen date/period a while back
#Determining the change in return a given stock has had over a given period of time can aid in picking
#stocks that are trending upwards


#NOTE TO US, momenutum is only needed if we change the period for other data as well
#for now both are based on 6 months

def calc_momentum(ticker_returns, period):
    momentum = (ticker_returns.iloc[-1]/ticker_returns.iloc[-period]) - 1
    return momentum





In [106]:
sp500_aligned, tsx_aligned = sp500.align(tsx, join='inner')

# Create benchmark
benchmark_prices = (sp500_aligned + tsx_aligned) / 2
benchmark_returns = benchmark_prices.pct_change().dropna()

risk_free_rate = 0.04 / 252  # Daily risk-free rate assuming 4% annual
market_actual_return = (benchmark_prices.iloc[-1] / benchmark_prices.iloc[0]) - 1 

## Will build loop though stock_data[] and as it goes through each ticker we can calculate the relevant metrics for those tickers
##Apply created functions for metrics to the tickers while we iterate through the loop

alpha_results = []

for ticker in tickers_list:
    stock_close = stock_data[ticker]['Close'].dropna()
    stock_close.index = stock_close.index.tz_localize(None)
    stock_returns = stock_close.pct_change().dropna()

    beta = calc_beta(stock_returns, benchmark_returns)

    
    stock_actual_return = (stock_close.iloc[-1] / stock_close.iloc[0]) - 1
    alpha = calc_alpha(stock_actual_return, risk_free_rate, market_actual_return, beta)

    sortino = calc_sortino(stock_returns, risk_free_rate)
    momentum = calc_momentum(stock_close, period=63) #3 months
    
    # Append to results list
    alpha_results.append({
        'Ticker': ticker,
        'Alpha': alpha,
        'Beta': beta,
        'Sortino': sortino,
        'Return': stock_actual_return,
        'Momentum': momentum
    })

    print(f"✓ {ticker}: Alpha={alpha:.4f}, Beta={beta:.2f}, Sortino={sortino:.2f}, Momentum={momentum:.2f}")

# Convert to DataFrame
results_df = pd.DataFrame(alpha_results)

# Sort by Alpha (highest first)
results_df = results_df.sort_values('Alpha', ascending=False)

# Display
print("\n=== Top Stocks by Alpha ===")
results_df.reset_index(drop=True, inplace=True)
results_df
    
    

    

✓ AAPL: Alpha=0.1783, Beta=0.80, Sortino=0.24, Momentum=0.18
✓ ABBV: Alpha=0.2386, Beta=0.27, Sortino=0.21, Momentum=0.11
✓ ABT: Alpha=-0.0761, Beta=0.11, Sortino=-0.04, Momentum=-0.04
✓ ACN: Alpha=-0.3604, Beta=0.76, Sortino=-0.17, Momentum=-0.07
✓ HSBC: Alpha=0.0449, Beta=0.81, Sortino=0.11, Momentum=0.06
✓ AIG: Alpha=-0.1469, Beta=0.38, Sortino=-0.07, Momentum=-0.09
✓ AMZN: Alpha=-0.0691, Beta=1.04, Sortino=0.05, Momentum=-0.03
✓ AXP: Alpha=0.0192, Beta=0.98, Sortino=0.12, Momentum=0.08
✓ BA: Alpha=-0.2050, Beta=0.64, Sortino=-0.07, Momentum=-0.19
✓ BAC: Alpha=0.0267, Beta=0.98, Sortino=0.14, Momentum=0.06
✓ BB.TO: Alpha=-0.1455, Beta=1.50, Sortino=0.06, Momentum=0.17
✓ BIIB: Alpha=0.2021, Beta=0.55, Sortino=0.22, Momentum=0.21
✓ BK: Alpha=0.0801, Beta=0.81, Sortino=0.19, Momentum=0.05
✓ SAP: Alpha=-0.3104, Beta=0.69, Sortino=-0.16, Momentum=-0.12
✓ BLK: Alpha=-0.1609, Beta=1.22, Sortino=0.01, Momentum=-0.11
✓ BMY: Alpha=-0.0793, Beta=0.40, Sortino=-0.01, Momentum=-0.03
✓ C: Alpha=0

Unnamed: 0,Ticker,Alpha,Beta,Sortino,Return,Momentum
0,CAT,0.412985,1.169668,0.391015,0.592982,0.273209
1,LLY,0.294539,0.751376,0.129076,0.410222,0.476986
2,ABBV,0.238625,0.268323,0.20763,0.280039,0.113908
3,MRK,0.226842,0.106836,0.179545,0.243427,0.098648
4,BIIB,0.202104,0.546755,0.222946,0.286327,0.205441
5,TD.TO,0.200504,0.627677,0.258332,0.297169,0.129868
6,AAPL,0.178284,0.797135,0.242047,0.301003,0.180279
7,C,0.138534,1.25615,0.254173,0.331827,0.054222
8,SAN,0.122476,1.098022,0.214665,0.291457,0.050051
9,PEP,0.118574,0.134719,0.13394,0.139446,-0.00729


In [107]:
##USING METRICS TO PICK TOP 10 STOCKS TO INVEST IN

results_df['Alpha_rank'] = results_df['Alpha'].rank(ascending=False)
results_df['Sortino_rank'] = results_df['Sortino'].rank(ascending=False)
results_df['Momentum_rank'] = results_df['Momentum'].rank(ascending=False)

##Beta close to 1.15 is ideal, so we rank based on distance from 1.15
##ascending True means closest to 1.15 gets rank 1, lowest distance to greatest distance
results_df['Beta_rank'] = (results_df['Beta'] - 1.15).abs().rank(ascending=True)

# Score (simple sum - equal weight)
results_df['Composite_Score'] = (
    results_df['Alpha_rank'] + 
    results_df['Sortino_rank'] + 
    results_df['Momentum_rank'] + 
    results_df['Beta_rank']
)

# Sort and select
# Create new dataframe with just ranks and composite score
scored_df = results_df[['Ticker', 'Alpha_rank', 'Sortino_rank', 'Momentum_rank', 'Beta_rank', 'Composite_Score']].copy()

# Sort by composite score
scored_df = scored_df.sort_values('Composite_Score')

scored_df.reset_index(drop=True, inplace=True)

scored_df


Unnamed: 0,Ticker,Alpha_rank,Sortino_rank,Momentum_rank,Beta_rank,Composite_Score
0,CAT,1.0,1.0,2.0,1.0,5.0
1,AAPL,7.0,4.0,4.0,14.0,29.0
2,C,8.0,3.0,17.0,6.0,34.0
3,LLY,2.0,14.0,1.0,18.0,35.0
4,TD.TO,6.0,2.0,6.0,22.0,36.0
5,SAN,9.0,7.0,19.0,3.0,38.0
6,BIIB,5.0,6.0,3.0,25.0,39.0
7,RY.TO,11.0,5.0,8.0,19.0,43.0
8,ABBV,3.0,8.0,7.0,30.0,48.0
9,BK,12.0,9.0,18.0,12.0,51.0


In [108]:
## TOP 10 STOCKS TO INVEST IN BASED ON METRICS ##
top_10_stocks = scored_df.head(10)
top_10_stocks


Unnamed: 0,Ticker,Alpha_rank,Sortino_rank,Momentum_rank,Beta_rank,Composite_Score
0,CAT,1.0,1.0,2.0,1.0,5.0
1,AAPL,7.0,4.0,4.0,14.0,29.0
2,C,8.0,3.0,17.0,6.0,34.0
3,LLY,2.0,14.0,1.0,18.0,35.0
4,TD.TO,6.0,2.0,6.0,22.0,36.0
5,SAN,9.0,7.0,19.0,3.0,38.0
6,BIIB,5.0,6.0,3.0,25.0,39.0
7,RY.TO,11.0,5.0,8.0,19.0,43.0
8,ABBV,3.0,8.0,7.0,30.0,48.0
9,BK,12.0,9.0,18.0,12.0,51.0


In [99]:
### OVERALL PORTFOLIO CHECK. MAKING SURE PORTFOLIO MEETS 

### BASIC REQUIREMENTS:
### 1. NO MORE THEN 40% IN A SINGLE SECTOR
### 2. ONE SMALL CAP
### 3. ONE LARGE CAP


## first going to sort the valid tickers into new lists of largecap, small cap, and industry
## which i can then use to check if my portfolio works

#argument should be the list of all valid tickers
def sort_smallcap(tickers):

    smallcap_tickers = []

    for ticker in tickers:

        tick_info = yf.Ticker(ticker).fast_info
        market_cap = tick_info.get("marketCap")

        if market_cap is not None and market_cap < 2000000000:
            smallcap_tickers.append(ticker)

    return smallcap_tickers
     


def sort_largecap(tickers):

    largecap_tickers = []

    for ticker in tickers:

        tick_info = yf.Ticker(ticker).fast_info
        market_cap = tick_info.get("marketCap")
        
        if market_cap is not None and market_cap > 10000000000:
            largecap_tickers.append(ticker)

    return largecap_tickers



#portfolio argument should be a list of just the TEN tickers in the portfolio
################################################# IF WE CHANGE NUMBER OF TICKERS WELL HAVE TO REVISE THIS FUNCTION
#final_tickers argument should be a list of all the valid tickers RANKED from best to worst through all of the analysis above
def check_portfolio(portfolio, final_tickers):
    

    smallcap_tickers = sort_smallcap(final_tickers)
    largecap_tickers = sort_largecap(final_tickers)

    while True:

        is_smallcap = False
        is_largecap = False
        notfortypercent = True 
        seen_industries = {}
        industry_overforty = None



        for ticker in portfolio:

            if ticker in smallcap_tickers:
                is_smallcap = True
        
            if ticker in largecap_tickers:
                is_largecap = True 

            #keeping track of how many times weve seen a certain industry
            industry = yf.Ticker(ticker).info.get("industry")
            seen_industries[industry] += 1

        for industry, amount in seen_industries.items():
            if amount / 10 > 0.4:
                notfortypercent = False
                industry_overforty = industry

        if is_smallcap and is_largecap and industry_overforty:
            print("Portfolio is Valid.")
            return portfolio
        


        #ACTUALLY FIXING THE PORTFOLIO

        #the logic for small cap and large cap is this:
        #if our booleans are true then there is NO small/large cap in our portfolio so
        #we remove the last stock in our actual portfolio, because 1-9 are better then the tenth stock based of our algorithm
        #then we take the first ticker in our sorted list of small/large cap tickers that is NOT in our portfolio, 
        # (technically there shouldnt be a small/large cap in there but this is a good extra safet check)
        #and we put it in to the portfolio back at the 10th position

        if not is_smallcap:
            for tick in smallcap_tickers:
                if tick not in portfolio:
                    portfolio.pop(-1)
                    portfolio.append(tick)
                    break

            continue

        if not is_largecap:
            for tick in largecap_tickers:
                if tick not in portfolio:
                    portfolio.pop(-1)
                    portfolio.append(tick)
                    break

            continue


        #the logic for the industry check is this:
        #if there is an industry worth over forty percent AND we have the industry name, which we should then
        #we take out the last occurene of that industry in our portfolio, again because its ranked high to low, the latter stocks are non-optimal picks
        #so we remove the last occurece, and then we look in the tickers for the first one that is NOT in the portfolio and NOT in the same industry
        #and then we replace it by appending it to the end of the portfolio
        
    
        if not notfortypercent and industry_overforty is not None:

            worstposition = -1 #last index
            removeworst = None
            swapwith = None

            for tick in portfolio:

                tick_industry = yf.Ticker(tick).info.get("industry")

                if tick_industry == industry_overforty:
                    position = final_tickers.index(tick)

                    if position > worstposition:
                        worstposition = position
                        removeworst = tick

        
            for tick in final_tickers:
                if tick not in portfolio:
                    tick_industry = yf.Ticker(tick).info.get("industry")

                    if tick_industry != industry_overforty:
                        swapwith = tick
                        break


            if removeworst != None and swapwith != None:
                portfolio.remove(removeworst)
                portfolio.append(swapwith)

            continue


        else:
            return portfolio
        





## Contribution Declaration

The following team members made a meaningful contribution to this assignment:

Insert Names Here.