# OPTICS CLUSTERING METHOD

In [8]:
#IMPORTS
import yfinance as yf
import pandas as pd
import pandas_ta_classic as ta
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import OPTICS
from statsmodels.tsa.stattools import coint, grangercausalitytests
import warnings
warnings.filterwarnings("ignore")

In [54]:
# Data
stocks = [
    # S&P for Beta
    "^GSPC",
    # Megacap Leaders & Generalists
    "NVDA", "TSM", "AVGO", "AMD", "INTC", "MU", "TXN", "QCOM", "ADI", "MCHP",
    
    # Equipment & Manufacturing
    "ASML", "AMAT", "LRCX", "KLAC", "TER", "ENTG", "NVMI", "TOELY",
    
    # Specialized
    "ON", "NXPI", "STM", "LSCC", "MPWR", "QRVO", "SWKS", "ALAB", "CRDO",
    
    # Intellectual Property & Design Software
    "ARM", "SNPS", "CDNS", "CEVA",
    
    # Memory & Storage
    "WDC", "STX", # Removed extra "MU" here
    
    # Emerging & Mid-Cap
    "GFS", "MRVL", "MTSI", "POWI", "SMTC", "VICR", "CAMT"
]

def fetch_data(stocks):
    data = yf.download(tickers=stocks, period="252d", interval="1h", group_by='ticker', auto_adjust=True, threads=True)
    
    price_series_list = []
    for s in stocks:
        try: 
            if s in data:
                series = data[s]['Close']
                series.name = s
                price_series_list.append(series)
        except Exception as e:
            pass

    if price_series_list:
        df = pd.concat(price_series_list, axis=1)
        df = df.ffill() 
        return df
    return pd.DataFrame()

df = fetch_data(stocks)

[*********************100%***********************]  41 of 41 completed


In [55]:
if isinstance(df.columns, pd.MultiIndex):
    if 'Close' in df.columns.get_level_values(0):
        df = df['Close']
    elif 'Close' in df.columns.get_level_values(1):
        df = df.xs('Close', axis=1, level=1)

returns_df = df.pct_change().dropna()
market_returns = returns_df['^GSPC']
window = 21 * 7

rolling_vol = returns_df.rolling(window=window).std() * np.sqrt(252 * 7)
rolling_cov = returns_df.rolling(window=window).cov(market_returns)
rolling_m_var = market_returns.rolling(window=window).var()
rolling_beta = rolling_cov.divide(rolling_m_var, axis=0)

fundamental_list = []
for ticker in stocks:
    if ticker == '^GSPC': continue
    try:
        t = yf.Ticker(ticker)
        info = t.info
        try:
            opts = t.options
            if opts:
                chain = t.option_chain(opts[0])
                iv = (chain.calls['impliedVolatility'].mean() + chain.puts['impliedVolatility'].mean()) / 2
            else:
                iv = np.nan
        except:
            iv = np.nan
            
        fundamental_list.append({
            'Ticker': ticker,
            'PE': info.get('trailingPE', np.nan),
            'Market_Cap': info.get('marketCap', np.nan),
            'ROE': info.get('returnOnEquity', np.nan),
            'Implied_Vol': iv
        })
    except:
        continue

f_df = pd.DataFrame(fundamental_list).set_index('Ticker')

all_hourly_data = []
for ticker in stocks:
    if ticker == '^GSPC' or ticker not in df.columns: 
        continue
    
    ticker_price = df[ticker]
    ticker_returns = returns_df[ticker] if ticker in returns_df.columns else np.nan
    ticker_beta = rolling_beta[ticker] if ticker in rolling_beta.columns else np.nan
    ticker_vol = rolling_vol[ticker] if ticker in rolling_vol.columns else np.nan

    temp_df = pd.DataFrame({
        'Price': ticker_price,
        'Returns': ticker_returns,
        'Rolling_Beta': ticker_beta,
        'Rolling_Vol': ticker_vol
    }, index=df.index)
    
    temp_df['Ticker'] = ticker
    
    if ticker in f_df.index:
        for col in ['PE', 'Market_Cap', 'ROE', 'Implied_Vol']:
            temp_df[col] = f_df.loc[ticker, col]
    
    all_hourly_data.append(temp_df)

if all_hourly_data:
    master_df = pd.concat(all_hourly_data).reset_index().set_index(['Datetime', 'Ticker'])
    print("Master DataFrame Created Successfully!")
    print(master_df.head())

Master DataFrame Created Successfully!
                                       Price   Returns  Rolling_Beta  Rolling_Vol        PE     Market_Cap      ROE  Implied_Vol
Datetime                  Ticker                                                                                                
2025-01-30 14:30:00+00:00 NVDA    119.570000       NaN           NaN          NaN  47.66334  4653442400256  1.07359     1.507859
2025-01-30 15:30:00+00:00 NVDA    119.054703 -0.004310           NaN          NaN  47.66334  4653442400256  1.07359     1.507859
2025-01-30 16:30:00+00:00 NVDA    119.290001  0.001976           NaN          NaN  47.66334  4653442400256  1.07359     1.507859
2025-01-30 17:30:00+00:00 NVDA    119.474998  0.001551           NaN          NaN  47.66334  4653442400256  1.07359     1.507859
2025-01-30 18:30:00+00:00 NVDA    121.455002  0.016573           NaN          NaN  47.66334  4653442400256  1.07359     1.507859


In [None]:

audit_list = []

for ticker in df.columns:
    valid_indices = df[ticker].dropna().index
    
    if not valid_indices.empty:
        start_date = valid_indices[0]
        end_date = valid_indices[-1]
        duration = end_date - start_date
        row_count = len(valid_indices)
    else:
        start_date, end_date, duration, row_count = None, None, None, 0

    audit_list.append({
        'Ticker': ticker,
        'Start Date': start_date,
        'End Date': end_date,
        'Total Hours': row_count,
        'Days of Data': duration.days if duration else 0
    })

audit_df = pd.DataFrame(audit_list).sort_values('Total Hours', ascending=False)

print(f"Total Tickers: {len(audit_df)}")
print(audit_df.head(10)[['Ticker', 'Start Date', 'Total Hours', 'Days of Data']])



Total Tickers: 41

--- Stocks with FULL History (Top 10) ---
   Ticker                Start Date  Total Hours  Days of Data
0   ^GSPC 2025-01-30 14:30:00+00:00         1755           365
21    STM 2025-01-30 14:30:00+00:00         1755           365
23   MPWR 2025-01-30 14:30:00+00:00         1755           365
24   QRVO 2025-01-30 14:30:00+00:00         1755           365
25   SWKS 2025-01-30 14:30:00+00:00         1755           365
26   ALAB 2025-01-30 14:30:00+00:00         1755           365
27   CRDO 2025-01-30 14:30:00+00:00         1755           365
28    ARM 2025-01-30 14:30:00+00:00         1755           365
29   SNPS 2025-01-30 14:30:00+00:00         1755           365
30   CDNS 2025-01-30 14:30:00+00:00         1755           365

--- Stocks with SHORTEST History (Bottom 10) ---
   Ticker                Start Date  Total Hours  Days of Data
10   MCHP 2025-01-30 14:30:00+00:00         1755           365
11   ASML 2025-01-30 14:30:00+00:00         1755           365
12   AM