In [94]:
# Import necessary libraries
import pandas as pd
import numpy as np
import yfinance as yf
import matplotlib.pyplot as plt

# Define a ticker and a date range for your data
ticker = 'SPY'
start_date = '2000-01-01'
end_date = '2025-07-31'
window = 5

# Download historical data from Yahoo Finance for a single ticker.
# This will result in a DataFrame with 'Date' as a simple index.
data = yf.download(ticker, start=start_date, end=end_date, auto_adjust=True)

# Use reset_index() to convert the 'Date' index into a column.
data = data.reset_index()

# Now, to get a new DataFrame with just the 'Price' level, we can use droplevel()
# This removes the 'Ticker' level from the columns, leaving only the 'Price' level.
data = data.droplevel(level='Ticker', axis=1)

# Now, add the 'Ticker' column at position 1 (right after the 'Date' column).
data.insert(1, 'Ticker', ticker)

# The DataFrame is now a flat table with no MultiIndex.
display(data)


[*********************100%***********************]  1 of 1 completed


Price,Date,Ticker,Close,High,Low,Open,Volume
0,2000-01-03,SPY,92.142509,93.924380,91.152581,93.924380,8164300
1,2000-01-04,SPY,88.539177,91.271379,88.469882,90.934803,8089800
2,2000-01-05,SPY,88.697563,89.667693,86.955290,88.657966,12177900
3,2000-01-06,SPY,87.272087,89.647915,87.272087,88.460001,6227200
4,2000-01-07,SPY,92.340508,92.340508,88.737169,88.895557,8066500
...,...,...,...,...,...,...,...
6427,2025-07-24,SPY,634.419983,636.150024,633.989990,634.599976,71307100
6428,2025-07-25,SPY,637.099976,637.580017,634.840027,635.090027,56865400
6429,2025-07-28,SPY,636.940002,638.039978,635.539978,637.479980,54917100
6430,2025-07-29,SPY,635.260010,638.669983,634.340027,638.349976,60556300


In [95]:
def calculate_sma(df, lookback):
    """
    Calculates a simple moving average for a DataFrame.

    Args:
        df (pd.DataFrame): The input DataFrame with a 'close' column.
        lookback (int): The number of periods for the moving average.

    Returns:
        pd.DataFrame: The DataFrame with a new column for the moving average.
    """
    # Create a copy to avoid modifying the original DataFrame
    df_sma = df.copy()

    # Calculate the simple moving average
    df_sma['SMA'] = df_sma['Close'].rolling(window=lookback).mean()

    # Calculate the difference between the SMA and the Close price
    df_sma['SMA_sign'] = np.sign(df_sma['Close'] - df_sma['SMA'])

    return df_sma



In [68]:
# Calculate a simple moving average
df_with_sma = calculate_sma(df=data, lookback=window)

# Print the resulting DataFrame to show the new SMA column
print("DataFrame with Simple Moving Average:")
display(df_with_sma)

DataFrame with 5-day Simple Moving Average:


Price,Date,Ticker,Close,High,Low,Open,Volume,SMA,SMA_sign
0,2000-01-03,SPY,92.142509,93.924380,91.152581,93.924380,8164300,,
1,2000-01-04,SPY,88.539177,91.271379,88.469882,90.934803,8089800,,
2,2000-01-05,SPY,88.697563,89.667693,86.955290,88.657966,12177900,,
3,2000-01-06,SPY,87.272087,89.647915,87.272087,88.460001,6227200,,
4,2000-01-07,SPY,92.340508,92.340508,88.737169,88.895557,8066500,89.798369,1.0
...,...,...,...,...,...,...,...,...,...
6427,2025-07-24,SPY,634.419983,636.150024,633.989990,634.599976,71307100,630.768005,1.0
6428,2025-07-25,SPY,637.099976,637.580017,634.840027,635.090027,56865400,632.671997,1.0
6429,2025-07-28,SPY,636.940002,638.039978,635.539978,637.479980,54917100,634.305994,1.0
6430,2025-07-29,SPY,635.260010,638.669983,634.340027,638.349976,60556300,635.585999,-1.0


In [85]:
# Assuming your DataFrame is named `df_with_sma` and has a column 'SMA_sign'

# 1. Create a Series of the previous row's sign value.
previous_sign = df_with_sma['SMA_sign'].shift(1)

# 2. Create a boolean mask that is True when the sign changes AND the previous sign is not NaN.
# The `!=` operator correctly handles NaN values, returning False for comparisons involving them.
sign_changed_mask = (df_with_sma['SMA_sign'] != previous_sign) & (~previous_sign.isna())

# 3. Filter the DataFrame using the boolean mask and explicitly create a copy.
sign_changes_only = df_with_sma[sign_changed_mask].copy()

# Now, add the 'Next_Close_Diff' column to the copy.
sign_changes_only.loc[:, 'Next_Close_Diff'] = sign_changes_only['Close'].shift(-1) - sign_changes_only['Close']

display(sign_changes_only)

Price,Date,Ticker,Close,High,Low,Open,Volume,SMA,SMA_sign,Next_Close_Diff
7,2000-01-12,SPY,90.637840,91.607970,90.519049,91.607970,6907700,90.891267,-1.0,1.227516
8,2000-01-13,SPY,91.865356,92.340522,90.776435,91.528781,5158300,91.809921,1.0,-0.158371
12,2000-01-20,SPY,91.706985,93.112684,91.113028,93.112684,5800100,92.439508,-1.0,-2.415428
20,2000-02-01,SPY,89.291557,89.766723,87.767067,88.539212,8419900,88.372887,1.0,0.217735
26,2000-02-09,SPY,89.509293,91.528746,89.499393,91.528746,8511500,90.439848,-1.0,-0.128670
...,...,...,...,...,...,...,...,...,...,...
6415,2025-07-08,SPY,620.340027,622.109985,619.520020,621.349976,59024600,620.892017,-1.0,3.719971
6416,2025-07-09,SPY,624.059998,624.719971,620.909973,622.770020,66113300,622.174011,1.0,-1.919983
6420,2025-07-15,SPY,622.140015,627.859985,622.059998,627.520020,74317300,624.090002,-1.0,2.079956
6421,2025-07-16,SPY,624.219971,624.729980,618.049988,623.739990,88987500,624.121997,1.0,11.040039


In [86]:
# Assuming your DataFrame is named `sign_changes_only` and has columns like 'Close' and 'SMA_sign'

# 1. Filter the DataFrame to get only the rows where the sign is 1.
# We create a copy to avoid a SettingWithCopyWarning.
df_positive_sign = sign_changes_only[sign_changes_only['SMA_sign'] == 1].copy()

# 2. Calculate the difference between the next Close and the current Close.
# `shift(-1)` gets the value from the next row.
df_positive_sign['Next_Close_Diff'] = df_positive_sign['Close'].shift(-1) - df_positive_sign['Close']

df_positive_sign = df_positive_sign[df_positive_sign['Next_Close_Diff'].notna()].copy()

# 3. Display the resulting DataFrame, showing the calculated differences.
display(df_positive_sign)



Price,Date,Ticker,Close,High,Low,Open,Volume,SMA,SMA_sign,Next_Close_Diff
8,2000-01-13,SPY,91.865356,92.340522,90.776435,91.528781,5158300,91.809921,1.0,-2.573799
20,2000-02-01,SPY,89.291557,89.766723,87.767067,88.539212,8419900,88.372887,1.0,0.089066
30,2000-02-15,SPY,89.380623,89.469716,87.301774,88.222407,11078300,88.964851,1.0,-3.138077
38,2000-02-28,SPY,86.242546,86.598920,84.084503,84.500272,13397800,85.504068,1.0,3.009407
46,2000-03-09,SPY,89.251953,89.251953,86.242570,86.955319,5500900,88.149168,1.0,-0.673180
...,...,...,...,...,...,...,...,...,...,...
6387,2025-05-27,SPY,589.407593,589.567094,576.725054,584.342549,72588500,584.085291,1.0,7.966431
6395,2025-06-06,SPY,597.374023,599.059045,595.100715,596.895397,66588700,593.629077,1.0,3.529541
6401,2025-06-16,SPY,600.903564,602.668367,598.450794,598.630317,79984100,599.800818,1.0,-0.753540
6405,2025-06-23,SPY,600.150024,600.539978,591.890015,595.039978,87426000,597.356287,1.0,23.909973


In [87]:
# Ensure 'Date' column is in datetime format
sign_changes_only['Date'] = pd.to_datetime(sign_changes_only['Date'])

# Extract the year from the 'Date' column
sign_changes_only['Year'] = sign_changes_only['Date'].dt.year

# Group by year and sum the 'Next_Close_Diff'
yearly_next_close_diff_sum = sign_changes_only.groupby('Year')['Next_Close_Diff'].sum()

# Display the result
print("Sum of Next_Close_Diff by Year:")
display(yearly_next_close_diff_sum)

Sum of Next_Close_Diff by Year:


Unnamed: 0_level_0,Next_Close_Diff
Year,Unnamed: 1_level_1
2000,-4.208557
2001,-11.519096
2002,-14.923824
2003,15.250355
2004,5.950394
2005,6.777824
2006,12.120247
2007,1.666885
2008,-34.883057
2009,18.646591


In [90]:
# Ensure 'Date' column in the initial 'data' DataFrame is in datetime format
data['Date'] = pd.to_datetime(data['Date'])

# Extract the year from the 'Date' column
data['Year'] = data['Date'].dt.year

# Group by year and get the first and last close prices
yearly_price_change = data.groupby('Year')['Close'].agg(['first', 'last'])

# Calculate the difference between the last and first close price for each year
yearly_price_change['Yearly_Change'] = yearly_price_change['last'] - yearly_price_change['first']

# Display the result
print("Difference between the last and first close price of each year:")
display(yearly_price_change[['Yearly_Change']])

Difference between the last and first close price of each year:


Unnamed: 0_level_0,Yearly_Change
Year,Unnamed: 1_level_1
2000,-8.154053
2001,-8.355255
2002,-16.794495
2003,14.507236
2004,8.002754
2005,4.370438
2006,12.176834
2007,5.330338
2008,-37.821556
2009,15.531616


In [92]:
# Join the yearly_price_change DataFrame with the yearly_next_close_diff_sum Series on the 'Year' index
comparison_df = yearly_price_change.join(yearly_next_close_diff_sum)

# Display the combined DataFrame, excluding the 'first' and 'last' columns
print("Comparison of Yearly Change and Sum of Next_Close_Diff at Sign Changes:")
display(comparison_df[['Yearly_Change', 'Next_Close_Diff']])

Comparison of Yearly Change and Sum of Next_Close_Diff at Sign Changes:


Unnamed: 0_level_0,Yearly_Change,Next_Close_Diff
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
2000,-8.154053,-4.208557
2001,-8.355255,-11.519096
2002,-16.794495,-14.923824
2003,14.507236,15.250355
2004,8.002754,5.950394
2005,4.370438,6.777824
2006,12.176834,12.120247
2007,5.330338,1.666885
2008,-37.821556,-34.883057
2009,15.531616,18.646591


In [96]:
# Calculate the sum of each column in the comparison_df
grand_totals = comparison_df[['Yearly_Change', 'Next_Close_Diff']].sum()

# Display the grand totals
print("Grand Totals for Yearly_Change and Next_Close_Diff:")
display(grand_totals)

Grand Totals for Yearly_Change and Next_Close_Diff:


Unnamed: 0,0
Yearly_Change,537.774868
Next_Close_Diff,544.622169
