Import the neccessary libraries

In [1]:
import yfinance as yf # Library to download historical stock data
import pandas as pd # Data manipulation and analysis library
import numpy as np # Numerical operations, especially for feature engineering
import matplotlib.pyplot as plt # Plotting library for visualizations
import seaborn as sns # Enhanced data visualization library
from sklearn.preprocessing import StandardScaler # Tool to standardize features
from sklearn.decomposition import PCA # Principal Component Analysis for dimensionality reduction (for 2D plotting)
from sklearn.cluster import KMeans,AgglomerativeClustering,DBSCAN # The three clustering algorithms to compare
from sklearn.metrics import silhouette_score # Evaluation metric for clustering

Data Acquistion

In [2]:
# Define a diverse list of stock tickers from various sectors
TICKERS = ["AAPL","MSFT","GOOGL","AMZN","JPM","JNJ","KO","KO","XOM","TSLA","NFLX"]
# Define the date range for the historical data
START_DATE = "2020-01-01"
END_DATE = None

In [4]:
# Download the historical Adjusted Close prices for the defined tickers
print(f"Downloading data for {len(TICKERS)} stocks from {START_DATE} to {END_DATE}.........")
# The Adj Close price is used as it accounts for dividends and stock splits
stock_data = yf.download(TICKERS,start=START_DATE,end=END_DATE)

Downloading data for 11 stocks from 2020-01-01 to None.........


  stock_data = yf.download(TICKERS,start=START_DATE,end=END_DATE)
[*********************100%***********************]  10 of 10 completed


In [5]:
# Fill any potential missing values (NaN) that may occur due to market closures or data gaps
# ffill (forward fill) propagates the last valid observation forward
stock_data = stock_data.fillna(method="ffill")
print("----- Stock Data Head (Adjusted Close Prices) -----")
stock_data.head()

----- Stock Data Head (Adjusted Close Prices) -----


  stock_data = stock_data.fillna(method="ffill")


Price,Close,Close,Close,Close,Close,Close,Close,Close,Close,Close,High,High,High,High,High,High,High,High,High,High,Low,Low,Low,Low,Low,Low,Low,Low,Low,Low,Open,Open,Open,Open,Open,Open,Open,Open,Open,Open,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume
Ticker,AAPL,AMZN,GOOGL,JNJ,JPM,KO,MSFT,NFLX,TSLA,XOM,AAPL,AMZN,GOOGL,JNJ,JPM,KO,MSFT,NFLX,TSLA,XOM,AAPL,AMZN,GOOGL,JNJ,JPM,KO,MSFT,NFLX,TSLA,XOM,AAPL,AMZN,GOOGL,JNJ,JPM,KO,MSFT,NFLX,TSLA,XOM,AAPL,AMZN,GOOGL,JNJ,JPM,KO,MSFT,NFLX,TSLA,XOM
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2,Unnamed: 41_level_2,Unnamed: 42_level_2,Unnamed: 43_level_2,Unnamed: 44_level_2,Unnamed: 45_level_2,Unnamed: 46_level_2,Unnamed: 47_level_2,Unnamed: 48_level_2,Unnamed: 49_level_2,Unnamed: 50_level_2
2020-01-02,72.538513,94.900497,67.965233,124.07299,119.573341,46.066406,152.791107,329.809998,28.684,54.131084,72.598892,94.900497,67.965233,124.115492,119.581825,46.435003,152.895746,329.980011,28.713333,54.222699,71.292304,93.207497,66.863336,123.3165,118.02242,45.873727,150.612732,324.779999,28.114,53.627182,71.54589,93.75,66.958679,123.987986,118.471593,46.342853,151.040795,326.100006,28.299999,53.627182,135480400,80580000,27278000,5777000,10803700,11867700,22622100,4485800,142981500,12456400
2020-01-03,71.833298,93.748497,67.60968,122.636505,117.99543,45.815094,150.888596,325.899994,29.534,53.695889,72.594063,94.309998,68.216995,123.562991,118.754544,46.066414,152.153771,329.859985,30.266666,54.489913,71.608692,93.224998,66.904545,121.548519,116.920733,45.312461,150.355893,325.529999,29.128,53.566097,71.765674,93.224998,66.938316,121.973514,117.278965,45.505137,150.60323,326.779999,29.366667,54.467004,146322800,75288000,23408000,5752400,10386800,11354500,21116200,3806900,266677500,17386900
2020-01-06,72.405678,95.143997,69.411766,122.483482,117.901611,45.79834,151.278641,335.829987,30.102667,54.108166,72.444321,95.184502,69.437089,122.568474,117.935736,45.999395,151.345236,336.359985,30.104,54.482272,70.703012,93.0,67.087298,121.420996,116.426032,45.672683,148.881465,321.200012,29.333332,53.619536,70.954188,93.0,67.118577,122.398478,116.477206,45.781588,149.42369,323.119995,29.364668,53.688247,118387200,81236000,46768000,7731300,10259000,14698300,20813700,5663100,151995000,20081900
2020-01-07,72.065155,95.343002,69.277679,123.231506,115.897217,45.446491,149.899338,330.75,31.270666,53.665348,72.671348,95.694504,69.694308,123.631003,117.58603,45.739693,151.887465,336.700012,31.441999,53.840946,71.845377,94.601997,69.101397,120.171551,115.846043,45.36272,149.652016,330.299988,30.224001,53.069831,72.415345,95.224998,69.543349,122.407015,117.091325,45.614037,151.554533,336.470001,30.76,53.825679,108872000,80898000,34330000,7382900,10531300,9973900,21634100,4703200,268231500,17387700
2020-01-08,73.224411,94.598503,69.770782,123.214485,116.801308,45.530258,152.286926,339.26001,32.809334,52.856049,73.526303,95.550003,70.10895,124.081465,117.347187,45.773199,152.962326,342.700012,33.232666,53.665341,71.768086,94.321999,69.154534,122.14349,115.658374,45.362716,150.251234,331.049988,31.215334,52.810236,71.768086,94.902,69.263281,123.137976,115.74366,45.463242,151.183463,331.48999,31.58,53.527913,132079200,70160000,35314000,6605800,9695300,10676000,27746500,7104500,467164500,15137700
