In [1]:
#These are the libraries you can use.  You may add any libraries directy related to threading if this is a direction
#you wish to go (this is not from the course, so it's entirely on you if you wish to use threading).  Any
#further libraries you wish to use you must email me, james@uwaterloo.ca, for permission.

from IPython.display import display, Math, Latex

import pandas as pd
import numpy as np
import numpy_financial as npf
import yfinance as yf
import matplotlib.pyplot as plt
import random
from datetime import datetime
from scipy.optimize import minimize

## Group Assignment
### Team Number: 11
### Team Member Names: Akram, Annie, Jester
### Team Strategy Chosen: Market Beat

Disclose any use of AI for this assignment below (detail where and how you used it).  Please see the course outline for acceptable uses of AI.


In [2]:
# Function to get the tickers from the provided CSV file. 
def get_tickers():
    tickers = pd.read_csv('Tickers.csv')
    ticker_lst = [tickers.columns[0]] + (list(tickers[tickers.columns[0]]))
    return ticker_lst

In [3]:
# Important Constants: 
amount = 1_000_000 # Initial investment amount of $1,000,000
group = 11
start_date, end_date = '2022-09-30', '2024-09-30' # Start and end date of the simulation
min_stocks, max_stocks = 12, 24
# Reading in CSV file: 
ticker_lst = get_tickers()

# Initializing variable to store the tickers we will use in our portfolio
columns = ['Ticker', 'Price', 'Currency', 'Shares', 'Value', 'Weight']
Portfolio_Final = pd.DataFrame(columns=columns)

In [4]:
exchange_rate = yf.Ticker('CAD=X').fast_info['last_price']
print(f'The current exchange rate for the latest available day:\nUSD -> CAD: ${np.round(exchange_rate, 4)}')

The current exchange rate for the latest available day:
USD -> CAD: $1.3991


In [None]:
# Function to drop months with less than 18 trading days
def drop_short_trading_months(df):
    """
    Drops months with less than 18 trading days from a yfinance history DataFrame.
    Parameters:
        df (pd.DataFrame): A yfinance DataFrame with a DateTimeIndex and stock data.
    Returns:
        pd.DataFrame: Filtered DataFrame with only months having >= 18 trading days.
    """
    # Ensure the index is datetime for proper grouping
    if not isinstance(df.index, pd.DatetimeIndex):
        raise ValueError("DataFrame index must be a DateTimeIndex.")
    # Group by month and count trading days
    trading_days_per_month = df.groupby(df.index.to_period('M')).size()
    # Identify months with at least 18 trading days
    valid_months = trading_days_per_month[trading_days_per_month >= 18].index
    # Filter the original DataFrame to include only valid months
    filtered_df = df[df.index.to_period('M').isin(valid_months)]
    return filtered_df

In [None]:
# Filtering valid stocks by inputting a list of strings for each ticker. 
def filter_stocks(ticker_lst):
    valid_tickers, invalid_tickers = {}, []
    # Loop through all tickers to check if they are valid
    for ticker in ticker_lst:
        stock = yf.Ticker(ticker)
        try:
            info = stock.fast_info # Get basic stock info

            hist = stock.history(start=start_date, end=end_date) # Get stock history
            hist.index = hist.index.strftime('%Y-%m-%d')

            avg_volume = hist.loc[((hist.index >= '2023-09-30') & (hist.index <= '2024-09-30'))]['Volume'].mean() # Calculate average volume in specified date range.
            currency = info.get("currency")
            hist = drop_short_trading_months(hist) # Drop months with less than 18 trading days
            if ((hist.empty is not None) and # filter for stocks delisted on yfinance
                ( currency == "USD" or currency == "CAD") and # filter for stocks that are not USD
                (avg_volume >= 100_000)): # Filter by volume greater than 100,000
                if currency == "CAD":
                    valid_tickers[ticker] = hist['Close'] # Store the close prices of the stock as a Series
                elif currency == "USD":
                    valid_tickers[ticker] = hist['Close'] * exchange_rate # Convert USD to CAD
            else:
                invalid_tickers.append(ticker)
        except:
            invalid_tickers.append(ticker)
    return [valid_tickers, invalid_tickers]
    # valid_tickers is a dictionary of Series where the key is the name of the ticker. 
    # invalid_tickers is a list of ticker strings which were removed in the filtering process. 

In [6]:
# Loading data into variables
stock_filter = filter_stocks(ticker_lst)
ticker_data = stock_filter[0]
ticker_lst = list(ticker_data.keys()) # Reassign original ticker list

$AGN: possibly delisted; no timezone found
$AXP: possibly delisted; no price data found  (1d 2022-09-30 -> 2024-09-30)
$BA: possibly delisted; no price data found  (1d 2022-09-30 -> 2024-09-30)
Failed to get ticker 'CELG' reason: Expecting value: line 1 column 1 (char 0)
$CELG: possibly delisted; no timezone found
Failed to get ticker 'MON' reason: Expecting value: line 1 column 1 (char 0)
$MON: possibly delisted; no timezone found
Failed to get ticker 'RTN' reason: HTTPSConnectionPool(host='query2.finance.yahoo.com', port=443): Read timed out. (read timeout=10)
$RTN: possibly delisted; no timezone found


In [7]:
data = pd.DataFrame()
for ticker in ticker_data:
    data[ticker] = ticker_data[ticker]

# returns = data.pct_change()
# returns.drop(index=returns.index[0], inplace = True)

data.head()

Unnamed: 0_level_0,AAPL,ABBV,ABT,ACN,AIG,AMZN,BAC,BB.TO,BIIB,BK,...,QCOM,RY.TO,SHOP.TO,T.TO,TD.TO,TXN,UNH,UNP,UPS,USB
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-09-30,191.021155,172.358436,129.510227,347.198145,63.491175,158.093781,39.947659,6.51,373.549023,50.076821,...,150.797667,113.588982,37.189999,24.323668,76.140366,202.527087,685.884986,260.001393,205.437156,51.025466
2022-10-03,196.895516,177.636677,133.204417,357.440065,65.764456,162.12307,41.124924,6.53,369.771544,51.55885,...,156.456916,114.575356,37.799999,25.03307,77.62326,209.148008,700.10418,267.261407,207.408362,52.607349
2022-10-04,201.940552,182.349869,137.969346,370.151367,69.147627,169.412172,42.831296,6.74,374.178617,53.781882,...,163.330767,117.54364,42.610001,25.316833,79.079208,216.096022,710.507085,274.094442,214.784446,53.948802
2022-10-05,202.355172,184.070749,138.37088,370.191757,68.599353,169.216304,42.222823,6.67,366.120018,53.768892,...,166.694277,117.041306,41.959999,25.104012,78.620842,219.563529,715.803514,267.741886,213.563577,53.404637
2022-10-06,201.014456,180.166613,137.126123,363.620299,67.877264,168.306924,41.614347,6.64,366.917492,52.884878,...,167.214826,113.762505,41.32,24.421211,75.751671,217.914826,705.020317,264.525559,210.600416,51.987261


In [28]:
data[['AAPL']].iloc[0]

AAPL    191.021155
Name: 2022-09-30, dtype: float64

In [8]:
# sharpe ratio optimization

def optimal_sharpe(tickers, start_date, end_date, risk_free_rate, investment):
    # download data
    data = yf.download(tickers, start=start_date, end=end_date)['Close']

    # calculate mean return of stocks and covariance of stocks
    returns = data.pct_change()
    returns.drop(index=returns.index[0], inplace = True)
    mean_returns = returns.mean()
    covariance_matrix = returns.cov()

    def neg_sharpe(weights):
        #alternate
        #portfolio = data/data.iloc[0] # normalize returns
        #portfolio = portfolio*weights*investment
        #portfolio['total'] = portfolio.sum(axis=1)
        #portfolio['daily return'] = portfolio['total'].pct_change(1)

        #er = portfolio['daily return'].mean()
        #std = portfolio['daily return'].std()
        #sr = er/std
        


        # calculate portfolio expected return by weighing each stock's expected return
        num_days = len(returns)
        portfolio_expected_return = np.sum(weights*mean_returns*num_days)

        portfolio_variance = 0
        # calculate portfolio risk (std) by finding the portfolio variance, which is affected by covariance
        for i in range(len(weights)):
            for j in range(len(weights)):
                portfolio_variance += weights[i] * weights[j] * covariance_matrix.iloc[i, j]*num_days
        portfolio_std = np.sqrt(portfolio_variance)
        
        # calculate sharpe ratio
        sharpe = (portfolio_expected_return - risk_free_rate)/portfolio_std
        
        return -sharpe #make sharpe ratio negative for minimize function

    # constraints
    def check_sum(weights): 
        return np.sum(weights)-1 #returns 0 if weights sum up to 1
    constraints = {'type': 'eq', 'fun': check_sum}

    min_weight = 1/(2*len(tickers))
    max_weight = 0.4

    bounds = [(min_weight, max_weight)]*len(tickers)

    # initial guess
    init_guess = [1.0/len(tickers)]*len(tickers)

    results = minimize(neg_sharpe, init_guess, method="SLSQP", bounds=bounds, constraints=constraints)

    return results


tickers = ['aapl','adbe','amd','fi', 'csco', 'ibm', 'intc', 'lrcx', 'msft', 'mu', 'orcl', 'qcom', 'txn', 'nvda', 'fis', 'crm', 'avgo', 'now']

optimal = optimal_sharpe(tickers, "2020-01-01", "2022-01-01", 0, 1000000)

print(optimal.fun)

[*********************100%***********************]  17 of 18 completed

1 Failed download:
['ORCL']: ReadTimeout(ReadTimeoutError("HTTPSConnectionPool(host='query2.finance.yahoo.com', port=443): Read timed out. (read timeout=10)"))


nan


  returns = data.pct_change()


In [9]:
# Function to get the total volume for a call or put of a given stock.
# ticker: yfinance Ticker class
# put: Boolean for if you want to calculate put volume. Else, put False for call volume. 
def get_options_vol(ticker, put):
    exps = ticker.options # Expiration dates of available options
    optdata = pd.DataFrame() # Data storage
    for exp in exps:
        chain = pd.DataFrame()
        if put: chain = ticker.option_chain(exp).puts['volume'] # Gets the desired columns
        else: chain = ticker.option_chain(exp).calls['volume'] # If put options are desired then use this data.
        optdata = pd.concat([optdata, chain]) # Add the calls/puts to the main dataframe. 
    return optdata.sum()['volume'] # output total volue of put/call options

# Function to calculate the PCR for each stock. 
def PCR_calc(tickers):
    pcrdata = pd.DataFrame(columns=['Ticker', 'Put Volume', 'Call Volume', 'PCR'])
    for ticker in tickers:
        stock = yf.Ticker(ticker)
        try: 
            # Get the volume for Put and Call options:
            call_options = get_options_vol(stock, False)
            put_options = get_options_vol(stock, True)
            # Calculate PCR Ratio:
            pcr = put_options / call_options
            #print(f"Ticker: {ticker}, PCR: {pcr}")  # Debugging
            pcrdata.loc[len(pcrdata)] = [ticker, put_options, call_options, pcr]
        except Exception as e:
            print(f"Error processing {ticker}: {e}")  # Debugging (output error)
            pass
    return pcrdata

In [10]:
# Load the PCR values for each of the valid stocks into a variable
options_data = PCR_calc(ticker_lst)

[*********************100%***********************]  17 of 18 completed

Error processing BB.TO: 'volume'
Error processing RY.TO: 'volume'
Error processing SHOP.TO: 'volume'
Error processing T.TO: 'volume'
Error processing TD.TO: 'volume'


In [16]:
options_data = options_data.sort_values(by='PCR', ascending=True)
options_data['Rank'] = [i for i in range(len(options_data))]
options_data = options_data.set_index('Rank')

# Display the table of rankings based off PCR. 
# The rankings are based off the stocks with the greatest sentiment for if they will go up or not
# The tickers at the top of the list have a high call rate (meaning the price will go up)

options_data

Unnamed: 0_level_0,Ticker,Put Volume,Call Volume,PCR
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,PYPL,9037.0,24049.0,0.375774
1,KO,10357.0,26155.0,0.395985
2,PEP,4644.0,11474.0,0.404741
3,AMZN,112048.0,233817.0,0.479212
4,MRK,7810.0,15386.0,0.507604
5,PG,4902.0,9367.0,0.523327
6,LLY,24425.0,39986.0,0.610839
7,QCOM,36739.0,54459.0,0.674618
8,UPS,9461.0,13400.0,0.706045
9,UNH,6713.0,9453.0,0.710145


In [12]:
# Code to output final dataframe to a CSV file called Stocks_Group_XX.csv
Stocks_Final = Portfolio_Final[['Ticker', 'Shares']]
Stocks_Final.to_csv(f'Stocks_Group_{group}.csv', index=False)

In [13]:
Portfolio_Final

Unnamed: 0,Ticker,Price,Currency,Shares,Value,Weight


## Contribution Declaration

The following team members made a meaningful contribution to this assignment:

---
<p style="color: #004dd3">
Akram Jamil
</p>

<p style="color: #2C8CA9">
Jester Yang
</p>

<p style="color: #3cc19d;">
Annie Wong
</p>

---