In [3]:
import pandas as pd
import yfinance as yf
import sklearn as skl
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
import seaborn as sns
import matplotlib.pyplot as plt
import scipy as sp

In [1]:
# list of names of S&P 100 companies 
tickers = [
    'AAPL', 'ABBV', 'ABT', 'ACN', 'ADBE', 'AIG', 'AMD', 'AMGN', 'AMT', 'AMZN',
    'AXP', 'BA', 'BAC', 'BK', 'BKNG', 'BLK', 'BMY', 'BRK-B', 'C', 'CAT',
    'CHTR', 'CL', 'CMCSA', 'COF', 'COP', 'COST', 'CRM', 'CSCO', 'CVS', 'CVX',
    'DHR', 'DIS', 'DOW', 'DUK', 'EMR', 'EXC', 'F', 'FDX', 'GD', 'GE', 'GILD',
    'GM', 'GOOG', 'GOOGL', 'GS', 'HD', 'HON', 'IBM', 'INTC', 'JNJ', 'JPM',
    'KHC', 'KMI', 'KO', 'LIN', 'LLY', 'LMT', 'LOW', 'MA', 'MCD', 'MDLZ',
    'MDT', 'MET', 'META', 'MMM', 'MO', 'MRK', 'MS', 'MSFT', 'NEE', 'NFLX',
    'NKE', 'NVDA', 'ORCL', 'PEP', 'PFE', 'PG', 'PM', 'PYPL', 'QCOM', 'RTX',
    'SBUX', 'SLB', 'SO', 'SPG', 'T', 'TGT', 'TMO', 'TMUS', 'TRV', 'TXN',
    'UNH', 'UNP', 'UPS', 'USB', 'V', 'VZ', 'WBA', 'WFC', 'WMT', 'XOM'
]

# Spotify is not part of the list so we just add it to the list manually 
if 'SPOT' not in tickers:
    tickers.append('SPOT')

# Monster energy drink is not part of the list so we add it manually
if 'MNST' not in tickers:
    tickers.append('MNST')

#Chipotle is not a part of the list so we add it manually
if 'CMG' not in tickers:
    tickers.append('CMG')

print(tickers)  # check that the list is right 

['AAPL', 'ABBV', 'ABT', 'ACN', 'ADBE', 'AIG', 'AMD', 'AMGN', 'AMT', 'AMZN', 'AXP', 'BA', 'BAC', 'BK', 'BKNG', 'BLK', 'BMY', 'BRK-B', 'C', 'CAT', 'CHTR', 'CL', 'CMCSA', 'COF', 'COP', 'COST', 'CRM', 'CSCO', 'CVS', 'CVX', 'DHR', 'DIS', 'DOW', 'DUK', 'EMR', 'EXC', 'F', 'FDX', 'GD', 'GE', 'GILD', 'GM', 'GOOG', 'GOOGL', 'GS', 'HD', 'HON', 'IBM', 'INTC', 'JNJ', 'JPM', 'KHC', 'KMI', 'KO', 'LIN', 'LLY', 'LMT', 'LOW', 'MA', 'MCD', 'MDLZ', 'MDT', 'MET', 'META', 'MMM', 'MO', 'MRK', 'MS', 'MSFT', 'NEE', 'NFLX', 'NKE', 'NVDA', 'ORCL', 'PEP', 'PFE', 'PG', 'PM', 'PYPL', 'QCOM', 'RTX', 'SBUX', 'SLB', 'SO', 'SPG', 'T', 'TGT', 'TMO', 'TMUS', 'TRV', 'TXN', 'UNH', 'UNP', 'UPS', 'USB', 'V', 'VZ', 'WBA', 'WFC', 'WMT', 'XOM', 'SPOT', 'MNST', 'CMG']


In [4]:
data = yf.download(tickers, start='2024-01-01', end='2024-12-31')
close = data.loc[:, 'Close']

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  104 of 104 completed


In [5]:
close.head()

Ticker,AAPL,ABBV,ABT,ACN,ADBE,AIG,AMD,AMGN,AMT,AMZN,...,UNH,UNP,UPS,USB,V,VZ,WBA,WFC,WMT,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-01-02,184.532089,152.743408,107.138481,341.431396,580.070007,67.074509,138.580002,286.354218,211.642944,149.929993,...,528.766113,236.856384,148.922592,41.968723,256.474731,35.816391,24.634895,47.823631,52.324097,98.10601
2024-01-03,183.150391,153.355072,106.81662,332.573761,571.789978,66.85041,135.320007,289.531738,207.657898,148.470001,...,531.403381,235.173111,148.170181,40.860004,255.592987,36.074333,23.636557,47.193474,52.327374,98.930267
2024-01-04,180.824371,154.310791,108.240593,331.756866,567.049988,66.869896,136.009995,291.919739,208.402695,144.570007,...,534.726929,234.550385,147.652893,41.299667,257.207886,36.267784,22.42561,47.775154,51.821514,98.067657
2024-01-05,180.098694,154.960693,108.065041,331.294312,564.599976,67.24015,138.580002,291.756042,207.261322,145.240005,...,526.844482,233.431442,149.279999,41.873142,257.28717,37.032383,23.1189,48.395607,51.476608,98.364777
2024-01-08,184.45256,154.282104,109.625542,334.965302,580.549988,66.470413,146.179993,299.343597,209.00238,149.100006,...,526.001404,234.122269,150.408615,42.016506,260.110779,36.940262,23.69202,48.395607,51.982468,96.725845


In [14]:
# Compute daily returns
returns = close.pct_change().dropna()

scaler = StandardScaler()
normalized = scaler.fit_transform(returns.T)

similarity = cosine_similarity(normalized)
similarity_df = pd.DataFrame(similarity, index=returns.columns, columns=returns.columns)

In [15]:
# Top 10 similar to SPOT
top_10 = similarity_df['SPOT'].sort_values(ascending=False)[1:11]
print("Top 10 most similar to Spotify (SPOT):")
print(top_10)

Top 10 most similar to Spotify (SPOT):
Ticker
NFLX    0.435518
AMZN    0.348500
META    0.291598
MSFT    0.278631
GE      0.267607
NVDA    0.266242
LLY     0.201316
COST    0.199220
CRM     0.193729
DHR     0.162492
Name: SPOT, dtype: float64


In [16]:
# Top 10 similar to MNST
top_10 = similarity_df['MNST'].sort_values(ascending=False)[1:11]
print("Top 10 most similar to Monster Energt (MNST):")
print(top_10)

Top 10 most similar to Monster Energt (MNST):
Ticker
PEP     0.303100
KO      0.250260
DUK     0.169366
MDLZ    0.125824
KHC     0.119788
TGT     0.113857
V       0.110583
AMT     0.109518
LIN     0.101779
AAPL    0.101081
Name: MNST, dtype: float64


In [17]:
# Top 10 similar to CMG
top_10 = similarity_df['CMG'].sort_values(ascending=False)[1:11]
print("Top 10 most similar to Chipotle Mexican Grill (CMG):")
print(top_10)

Top 10 most similar to Chipotle Mexican Grill (CMG):
Ticker
COST    0.268785
MSFT    0.249723
AMZN    0.186288
NFLX    0.172900
CRM     0.169626
MA      0.167311
LLY     0.165291
BKNG    0.162473
ORCL    0.153837
ADBE    0.150201
Name: CMG, dtype: float64
