# **imports and variables:**

In [1]:
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

!pip install plotly
import plotly.graph_objects as go



In [2]:
sp100 = [
    "AAPL", "ABBV", "ABT", "ACN", "ADBE", "AIG", "AMD", "AMGN", "AMT", "AMZN",
    "ANET", "APA", "APD", "AVGO", "AXP", "BA", "BAC", "BIIB", "BK", "BKNG",
    "BLK", "BMY", "BSX", "C", "CAT", "CHTR", "CL", "CMCSA", "COF", "COP",
    "COST", "CRM", "CSCO", "CVS", "CVX", "DHR", "DIS", "DOW", "DUK", "EMR",
    "EXC", "F", "FDX", "GD", "GE", "GILD", "GM", "GOOG", "GOOGL", "GS",
    "HON", "IBM", "INTC", "JNJ", "JPM", "KO", "LIN", "LLY", "LMT", "LOW",
    "MA", "MAR", "MCD", "MDLZ", "MDT", "MET", "MMM", "MO", "MRK", "MS",
    "MSFT", "NEE", "NFLX", "NKE", "NVDA", "ORCL", "PEP", "PFE", "PG", "PM",
    "PYPL", "QCOM", "RTX", "SBUX", "SCHW", "SO", "SPG", "T", "TGT", "TMO",
    "TMUS", "TSLA", "TXN", "UNH", "UNP", "UPS", "USB", "V", "VZ", "WBA",
    "WFC", "WMT", "XOM"
]

# **Functions:**

In [3]:
#takes a df and creates new columns that checks if the current 'High' price is higher than certain SMAs
def sma_filtering(df):
  filt20 = df["SMA20"] < df["High"]
  filt50 = df["SMA50"] < df["High"]
  filt100 = df["SMA100"] < df["High"]
  df["SMA20_Above"] = filt20
  df["SMA50_Above"] = filt50
  df["SMA100_Above"] = filt100
  df["above_avg"] = filt20 & filt50 & filt100
  return df

In [4]:
#takes as an input a stock name and period and returns the stock's df.
def stock_df(stock, period):
  dat = yf.Ticker(stock)
  df = dat.history(period=period).reset_index()
  df = df.loc[:, ['Date', 'Open', 'High', 'Low', 'Close']]
  return df

In [5]:
# takes a given stock and iterates over 'bunch-size" stocks at a time,
# applying 'func' to each bunch independantly
def iter(stock, bunch_size, func=None):
  i=0

  #taking the stock and converting it to a df.
  if type(stock) == str:
    cur_stock_df = stock_df(stock, '1y')
  elif type(stock) == pd.DataFrame:
    cur_stock_df = stock
  else:
    return 'not a valid dataFrame'

  #iterating while the bunch size equals the 'cur_bunch'
  #if they are not equal, it means we are on the last portion and we need to stop after this one.
  while True:
    cur_bunch = cur_stock_df.iloc[i:i+bunch_size, :]
    if len(cur_bunch) < bunch_size:
      break
    #if the user entered a function, apply it for each bunch.
    if func==bunch_mean:
      func(cur_stock_df, cur_bunch, bunch_size, i+bunch_size-1)
    elif func==weighted_bunch_mean:
      func(cur_stock_df, cur_bunch, bunch_size, i+bunch_size-1)
    i+=1
  return cur_stock_df

In [6]:
def bunch_mean(df, bunch, bunch_size, new_col_index):
  #creates a new column if not already exists.
  new_col = f"SMA{bunch_size}"
  if new_col not in df.columns:
        df[new_col] = None

  #calculates the mean of the current bunch.
  mean = bunch['High'].mean()

  #inputs the calculated mean in the corresponding column.
  df.loc[ new_col_index, f"SMA{bunch_size}"] = mean

In [7]:
def weighted_bunch_mean(df, bunch, bunch_size, new_col_index):
  weighted_mean=0
  index_sum=0
  new_col = f"WMA{bunch_size}"
  if new_col not in df.columns:
        df[new_col] = None

  #calculates the mean of the current bunch.
  for index, row in bunch.iterrows():
    index_sum += (index+1)
    weighted_mean += (index+1) * row['High']


  #inputs the calculated mean in the corresponding column.
  df.loc[ new_col_index, f"WMA{bunch_size}"] = (weighted_mean / index_sum)


In [8]:
# calculates WMA for a given window size using your weighted_bunch_mean function
def calc_wma_column(df, size):
    col = f"WMA{size}"
    df[col] = None

    for i in range(len(df)):
        # not enough rows to calculate WMA
        if i + 1 < size:
            continue

        # take the last 'size' rows
        bunch = df.iloc[i + 1 - size : i + 1]

        # use your function to place the result
        weighted_bunch_mean(df, bunch, size, i)

    return col

# Hull Moving Average
def hull_moving_average(df, n):
    half = int(n / 2)
    sqrt_n = int(np.sqrt(n))

    # calculate WMA(n)
    col_full = calc_wma_column(df, n)

    # calculate WMA(n/2)
    col_half = calc_wma_column(df, half)

    # create temporary values: 2*WMA(n/2) - WMA(n)
    df["HMA_temp"] = 2 * df[col_half] - df[col_full]

    # calculate WMA on the temporary values using sqrt(n)
    out_col = f"HMA{n}"
    df[out_col] = None

    for i in range(len(df)):
        # not enough rows for sqrt(n)
        if i + 1 < sqrt_n:
            continue

        # take the last sqrt(n) values
        bunch = df["HMA_temp"].iloc[i + 1 - sqrt_n : i + 1]

        # skip if there are missing values
        if bunch.isna().any():
            continue

        # compute WMA (same style as your function)
        weighted_mean = 0
        index_sum = 0

        for idx, val in enumerate(bunch):
            weight = idx + 1
            weighted_mean += weight * val
            index_sum += weight

        df.loc[i, out_col] = weighted_mean / index_sum

    return df


In [9]:
#finds the highest high price in the last n days for each row
def highest_high(df, window):
    df = df.copy()
    df[f'HH{window}'] = df['High'].rolling(window=window).max()
    return df

In [10]:
#finds the lowest low price in the last n days for each row
def lowest_low(df, window):
    df = df.copy()
    df[f'LL{window}'] = df['Low'].rolling(window=window).min()
    return df

# **Code:**

In [11]:
top10 = []
top10_dfs = []
i=0

for stock in sp100:
  if len(top10) == 10:
    break

  df = iter(stock, 20, bunch_mean)
  df = iter(df, 50, bunch_mean)
  df = iter(df, 100, bunch_mean)
  sma_filtering(df)

  df = iter(df, 20, weighted_bunch_mean)
  df = iter(df, 50, weighted_bunch_mean)
  df = iter(df, 100, weighted_bunch_mean)

  df = hull_moving_average(df, 20)

  df = highest_high(df,20)
  df = lowest_low(df,20)

  if(df.iloc[-1,-1]):
    top10.append(stock)
    top10_dfs.append(df)
  i+=1

top10

['AAPL', 'ABBV', 'ABT', 'ACN', 'ADBE', 'AIG', 'AMD', 'AMGN', 'AMT', 'AMZN']

In [12]:
top10_dfs[0]

Unnamed: 0,Date,Open,High,Low,Close,SMA20,SMA50,SMA100,SMA20_Above,SMA50_Above,SMA100_Above,above_avg,WMA20,WMA50,WMA100,WMA10,HMA_temp,HMA20,HH20,LL20
0,2024-11-20 00:00:00-05:00,227.033178,228.894754,224.872950,227.968948,,,,False,False,False,False,,,,,,,,
1,2024-11-21 00:00:00-05:00,227.849499,229.123735,224.693774,227.491119,,,,False,False,False,False,,,,,,,,
2,2024-11-22 00:00:00-05:00,227.033189,229.681216,227.033189,228.835037,,,,False,False,False,False,,,,,,,,
3,2024-11-25 00:00:00-05:00,230.417893,232.199827,228.705636,231.821533,,,,False,False,False,False,,,,,,,,
4,2024-11-26 00:00:00-05:00,232.279457,234.509377,232.279457,234.001663,,,,False,False,False,False,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
245,2025-11-13 00:00:00-05:00,274.109985,276.700012,272.089996,272.950012,269.508658,257.359085,238.33744,True,True,True,True,269.63467,258.111394,241.522168,273.766302,277.897934,278.462633,277.051436,247.030527
246,2025-11-14 00:00:00-05:00,271.049988,275.959991,269.600006,272.410004,270.649927,258.056559,239.064619,True,True,True,True,270.752116,258.809151,242.237779,273.668679,276.585243,277.637059,277.051436,255.182612
247,2025-11-17 00:00:00-05:00,268.820007,270.489990,265.730011,267.459991,270.968228,258.66801,239.747377,False,True,True,False,271.055305,259.403103,242.889165,273.646074,276.236844,276.918677,277.051436,255.182612
248,2025-11-18 00:00:00-05:00,269.989990,270.709991,265.320007,267.440002,271.252073,259.311235,240.426546,False,True,True,False,271.325264,260.023631,243.535867,273.583523,275.841782,276.314608,277.051436,255.182612


In [13]:
#check if the code works correctly
df_copy = df.copy()
df_copy = df_copy.head(40)
df_copy = highest_high(df_copy, 20)
df_copy = lowest_low(df_copy, 20)
print(df_copy)


                        Date        Open        High         Low       Close  \
0  2024-11-20 00:00:00-05:00  202.979996  203.130005  199.449997  202.880005   
1  2024-11-21 00:00:00-05:00  203.490005  203.490005  195.750000  198.380005   
2  2024-11-22 00:00:00-05:00  198.250000  199.259995  196.750000  197.119995   
3  2024-11-25 00:00:00-05:00  199.279999  201.949997  199.000000  201.449997   
4  2024-11-26 00:00:00-05:00  201.899994  208.000000  201.789993  207.860001   
5  2024-11-27 00:00:00-05:00  206.979996  207.639999  205.050003  205.740005   
6  2024-11-29 00:00:00-05:00  205.830002  208.199997  204.589996  207.889999   
7  2024-12-02 00:00:00-05:00  209.960007  212.990005  209.509995  210.710007   
8  2024-12-03 00:00:00-05:00  210.309998  214.020004  209.649994  213.440002   
9  2024-12-04 00:00:00-05:00  215.960007  220.000000  215.750000  218.160004   
10 2024-12-05 00:00:00-05:00  218.029999  222.149994  217.300003  220.550003   
11 2024-12-06 00:00:00-05:00  220.750000

In [14]:
i=0
for df in top10_dfs:

  df['Date'] = pd.to_datetime(df['Date'])
  fig = go.Figure(data = go.Scatter(x=df['Date'], y=df['High'],mode='lines', name='Price', line=dict(color='white')))
  fig.add_scatter(x=df['Date'], y=df['SMA20'], mode='lines', name='SMA 20',  line=dict(color='green'))
  fig.add_scatter(x=df['Date'], y=df['SMA50'], mode='lines', name='SMA 50',  line=dict(color='green'))
  fig.add_scatter(x=df['Date'], y=df['SMA100'], mode='lines', name='SMA 100',  line=dict(color='green'))

  fig.add_scatter(x=df['Date'], y=df['WMA20'], mode='lines', name='WMA 20', line=dict(color='blue'))
  fig.add_scatter(x=df['Date'], y=df['WMA50'], mode='lines', name='WMA 50', line=dict(color='blue'))
  fig.add_scatter(x=df['Date'], y=df['WMA100'], mode='lines', name='WMA 100', line=dict(color='blue'))

  #add markers
  #above_sma = df['above_avg'] == True
  #fig.add_scatter(
  #      x=df['Date'][above_sma],
  #      y=df['High'][above_sma],
  #      mode='markers',
  #      marker=dict(color='red', size=8, symbol='triangle-up'),
  #      name='Price > SMA20/50/100'
  #  )

  fig.update_layout(
  title=f"{top10[i]} Stock",
  xaxis_title = "Date",
  yaxis_title = "Price",
  template="plotly_dark"
  )
  #
  fig.update_xaxes(tickformat="%m-%Y")
  fig.show()
  i+=1

In [15]:
i = 0
for df in top10_dfs:
    df['Date'] = pd.to_datetime(df['Date'])

    fig = go.Figure(data=go.Scatter(x=df['Date'], y=df['High'], mode='lines', name='Price'))
    fig.add_scatter(x=df['Date'], y=df['SMA20'], mode='lines', name='SMA 20')
    fig.add_scatter(x=df['Date'], y=df['SMA50'], mode='lines', name='SMA 50')
    fig.add_scatter(x=df['Date'], y=df['SMA100'], mode='lines', name='SMA 100')


    high_above_sma = df['High'].where(df['above_avg'], other=np.nan)

    fig.add_scatter(
        x=df['Date'],
        y=high_above_sma,
        mode='lines',
        line=dict(color='white', width=3, dash='dot'),
        name='Above all SMAs'
    )

    fig.update_layout(
        title=f"{top10[i]} Stock",
        xaxis_title="Date",
        yaxis_title="Price",
        template="plotly_dark"
    )
    fig.update_xaxes(tickformat="%m-%Y")
    fig.show()
    i += 1


In [16]:
i = 0
for df in top10_dfs:
    # Convert 'Date' column to datetime
    df['Date'] = pd.to_datetime(df['Date'])

    # Compute HMA, assuming the function returns a df with 'HMA20' column
    df = hull_moving_average(df, 20)

    # Drop rows where HMA20 or High is NaN (common in the first few rows)
    df = df.dropna(subset=['HMA20', 'High']).copy()

    # Create a figure with the stock price line
    fig = go.Figure(data=go.Scatter(x=df['Date'],y=df['High'],mode='lines',name='Price'))

    # Add the HMA20 line
    fig.add_scatter(x=df['Date'],y=df['HMA20'],mode='lines',name='HMA 20',line=dict(width=3))

    # Highlight where the price is above HMA20
    high_above_hma = df['High'].where(df['High'] > df['HMA20'])

    fig.add_scatter(x=df['Date'],y=high_above_hma,mode='lines',line=dict(color='white', width=3, dash='dot'),name='Price > HMA')

    # Update figure layout
    fig.update_layout(
        title=f"{top10[i]} Stock (HMA 20)",
        xaxis_title="Date",
        yaxis_title="Price",
        template="plotly_dark"
    )

    # Format x-axis to show month-year
    fig.update_xaxes(tickformat="%m-%Y")

    # Show the figure
    fig.show()

    i += 1
