In [4]:
#These are the libraries you can use.  You may add any libraries directy related to threading if this is a direction
#you wish to go (this is not from the course, so it's entirely on you if you wish to use threading).  Any
#further libraries you wish to use you must email me, james@uwaterloo.ca, for permission.

from IPython.display import display, Math, Latex

import pandas as pd
import numpy as np
import numpy_financial as npf
import yfinance as yf
import matplotlib.pyplot as plt
import random
from datetime import datetime

## Group Assignment
### Team Number: 14
### Team Member Names: Aadya, Furqan, Pranay
### Team Strategy Chosen: Market Beat

In [15]:

input_tickers = pd.read_csv("Tickers_Example.csv", names=['Tickers']) #OR any other CSV file with tickers in it
input_tickers

Unnamed: 0,Tickers
0,AAPL
1,AAPL
2,ABBV
3,ABT
4,ABT
5,ACN
6,HSBC
7,AGN
8,AIG
9,AMZN


In [None]:
## PRELIMINARY DATA PROCESSING 
## AND INPUT PROCESSING

## WILL CSV BE EXACTLY THE WAY IT IS IN EXAMPLE??

## Check if implementation of US and Canadian ticker validation is sufficient/correct
## Should we do it through currency checks, or exchange checks as below?

def validate_ticker(ticker):
    try:
        stock = yf.Ticker(ticker)
        info = stock.info

        if info and 'regularMarketPrice' in info:
            if 'exchange' in info:
                exchange = info['exchange']
                # yfinance uses these exchange codes, so we check against them
                valid_exchanges = ['NMS', 'NGM', 'NYQ','NAD','PCX','BTS','CBQ','ASE', 'TOR', 'TSE', 'Toronto', 'TSXV', 'CNQ', 'CSE','NEO']
                if exchange in valid_exchanges:
                    return True
                else:
                    return False  
            else:
                return False  
        else:
            return False
    except:
        return False

valid_tickers = []
for ticker in input_tickers['Tickers']:
    if validate_ticker(ticker):
        valid_tickers.append(ticker)
        #print(f"✓ Valid: {ticker}")  
    else:
        print(f"✗ Invalid ticker: {ticker}")

valid_tickers_df = pd.DataFrame(valid_tickers, columns=['Ticker'])
valid_tickers_df

# ✗ Invalid ticker: AGN
# ✗ Invalid ticker: MC.PA
# ✗ Invalid ticker: CELG
# ✗ Invalid ticker: 9984.T
# ✗ Invalid ticker: MON
# ✗ Invalid ticker: RELIANCE.NS
# ✗ Invalid ticker: RTN
# ✗ Invalid ticker: INFY.NS
# ✗ Invalid ticker: CPA.AX

✗ Invalid ticker: AGN
✗ Invalid ticker: MC.PA
✗ Invalid ticker: CELG
✗ Invalid ticker: 9984.T
✗ Invalid ticker: MON
✗ Invalid ticker: RELIANCE.NS
✗ Invalid ticker: RTN
✗ Invalid ticker: INFY.NS
✗ Invalid ticker: CPA.AX


Unnamed: 0,Ticker
0,AAPL
1,AAPL
2,ABBV
3,ABT
4,ABT
5,ACN
6,HSBC
7,AIG
8,AMZN
9,AXP


In [23]:
# check for duplicate tickers

def check_duplicates(df):
    duplicates = df.duplicated()

    for index in duplicates.index:
        if duplicates[index] == True:
            print(f"Removing duplicate: {df.loc[index, 'Ticker']}")
            # remove the row from the dataframe
            df.drop(index, inplace=True)
    return df.reset_index().drop(columns=['index'])

check_duplicates(valid_tickers_df)

Removing duplicate: AAPL
Removing duplicate: ABT
Removing duplicate: BA


Unnamed: 0,Ticker
0,AAPL
1,ABBV
2,ABT
3,ACN
4,HSBC
5,AIG
6,AMZN
7,AXP
8,BA
9,BAC


In [32]:
## REMOVING STOCKS WITH AVG TRADE VOLUME < 5000 between Oct 1, 2024 and Sep 30, 2025 (drop months with < 18 trading days).
##CHECK to ensure, and filter out stocks that do not meet this criteria

def filter_by_avg_volume(df, start_date="2024-10-01", end_date="2025-09-30", min_avg_volume=5000, min_trading_days=18):
    filtered_tickers = []
    for ticker in df['Ticker']:
        stock = yf.Ticker(ticker)
        
        hist = stock.history(start=start_date, end=end_date)
        hist.index = hist.index.tz_localize(None)

        monthly_groups = hist.groupby(hist.index.to_period("M"))
        valid = True
        for month, group in monthly_groups:
            trading_days = len(group)
            if trading_days >= min_trading_days:
                avg_volume = group['Volume'].mean()
                if avg_volume < min_avg_volume:
                    print(f"✗ Ticker failed volume filter: {ticker} (Avg Volume: {avg_volume:.2f} in {month})")
                    valid = False
                    break
        if valid:
            print(f"✓ Ticker passed volume filter: {ticker}")
            filtered_tickers.append(ticker)
    return pd.DataFrame(filtered_tickers, columns=['Ticker'])

final_tickers_df = filter_by_avg_volume(valid_tickers_df)
final_tickers_df


✓ Ticker passed volume filter: AAPL
✓ Ticker passed volume filter: ABBV
✓ Ticker passed volume filter: ABT
✓ Ticker passed volume filter: ACN
✓ Ticker passed volume filter: HSBC
✓ Ticker passed volume filter: AIG
✓ Ticker passed volume filter: AMZN
✓ Ticker passed volume filter: AXP
✓ Ticker passed volume filter: BA
✓ Ticker passed volume filter: BAC
✓ Ticker passed volume filter: BB.TO
✓ Ticker passed volume filter: BIIB
✓ Ticker passed volume filter: BK
✓ Ticker passed volume filter: SAP
✓ Ticker passed volume filter: BLK
✓ Ticker passed volume filter: BMY
✓ Ticker passed volume filter: C
✓ Ticker passed volume filter: CAT
✓ Ticker passed volume filter: CL
✓ Ticker passed volume filter: SAN
✓ Ticker passed volume filter: KO
✓ Ticker passed volume filter: LLY
✓ Ticker passed volume filter: LMT
✓ Ticker passed volume filter: MO
✓ Ticker passed volume filter: MRK
✓ Ticker passed volume filter: PEP
✓ Ticker passed volume filter: PFE
✓ Ticker passed volume filter: PG
✓ Ticker passed volum

Unnamed: 0,Ticker
0,AAPL
1,ABBV
2,ABT
3,ACN
4,HSBC
5,AIG
6,AMZN
7,AXP
8,BA
9,BAC


In [None]:
#Downloading history for valid tickers

tickers_list =  valid_tickers_df['Ticker'].tolist()

stock_data= yf.download(tickers_list, period="6mo", interval="1d", group_by='ticker', auto_adjust=True, threads=True)

stock_data['ACN']

## Will build loop though stock_data[] and as it goes through each ticker we can calculate the relevant metrics for those tickers
##Apply created functions for metrics to the tickers while we iterate through the loop


#for i in tickers_list:
    
    

[*********************100%***********************]  40 of 40 completed


Price,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2025-05-13,,,,,
2025-05-14,319.939872,320.497083,316.317972,318.815491,2417100.0
2025-05-15,318.626452,322.387640,318.168749,321.601562,3050400.0
2025-05-16,315.840394,316.477193,308.188660,316.109039,3968000.0
2025-05-19,314.218507,319.293134,313.502089,317.800598,2247300.0
...,...,...,...,...,...
2025-11-07,240.000000,246.550003,239.440002,245.759995,4047400.0
2025-11-10,247.339996,248.259995,240.399994,244.550003,2818600.0
2025-11-11,244.889999,244.889999,238.529999,242.559998,3766700.0
2025-11-12,244.000000,246.970001,242.309998,246.529999,3230700.0


## Contribution Declaration

The following team members made a meaningful contribution to this assignment:

Insert Names Here.