In [26]:
# Import necessary libraries
import pandas as pd
import numpy as np
import yfinance as yf
import matplotlib.pyplot as plt

# Define a ticker and a date range for your data
ticker = 'SPY'
start_date = '2000-01-01'
end_date = '2025-07-31'
window = 5
lookback_range = [3,21]

# Download historical data from Yahoo Finance for a single ticker.
# This will result in a DataFrame with 'Date' as a simple index.
data = yf.download(ticker, start=start_date, end=end_date, auto_adjust=True)

# Use reset_index() to convert the 'Date' index into a column.
data = data.reset_index()

# Now, to get a new DataFrame with just the 'Price' level, we can use droplevel()
# This removes the 'Ticker' level from the columns, leaving only the 'Price' level.
data = data.droplevel(level='Ticker', axis=1)

# Now, add the 'Ticker' column at position 1 (right after the 'Date' column).
data.insert(1, 'Ticker', ticker)

# The DataFrame is now a flat table with no MultiIndex.
display(data)

[*********************100%***********************]  1 of 1 completed


Price,Date,Ticker,Close,High,Low,Open,Volume
0,2000-01-03,SPY,92.142532,93.924403,91.152604,93.924403,8164300
1,2000-01-04,SPY,88.539192,91.271394,88.469897,90.934819,8089800
2,2000-01-05,SPY,88.697578,89.667708,86.955305,88.657981,12177900
3,2000-01-06,SPY,87.272087,89.647915,87.272087,88.460001,6227200
4,2000-01-07,SPY,92.340553,92.340553,88.737213,88.895601,8066500
...,...,...,...,...,...,...,...
6427,2025-07-24,SPY,634.419983,636.150024,633.989990,634.599976,71307100
6428,2025-07-25,SPY,637.099976,637.580017,634.840027,635.090027,56865400
6429,2025-07-28,SPY,636.940002,638.039978,635.539978,637.479980,54917100
6430,2025-07-29,SPY,635.260010,638.669983,634.340027,638.349976,60556300


In [5]:
def calculate_sma(df, lookback):
    """
    Calculates a simple moving average for a DataFrame.

    Args:
        df (pd.DataFrame): The input DataFrame with a 'close' column.
        lookback (int): The number of periods for the moving average.

    Returns:
        pd.DataFrame: The DataFrame with a new column for the moving average.
    """
    # Create a copy to avoid modifying the original DataFrame
    df_sma = df.copy()

    # Calculate the simple moving average
    df_sma['SMA'] = df_sma['Close'].rolling(window=lookback).mean()

    # Calculate the difference between the SMA and the Close price
    df_sma['SMA_sign'] = np.sign(df_sma['Close'] - df_sma['SMA'])

    return df_sma



In [13]:
def analyze_sma_changes(df, lookback):
    """
    Analyzes the impact of Simple Moving Average sign changes on price differences.

    Args:
        df (pd.DataFrame): The input DataFrame with historical price data.
        lookback (int): The number of periods for the moving average.

    Returns:
        pd.DataFrame: A DataFrame containing the sum of Next_Close_Diff at SMA sign changes by year,
                      including the lookback value.
    """
    # Use the existing calculate_sma function
    df_with_sma = calculate_sma(df=df.copy(), lookback=lookback)

    # 1. Create a Series of the previous row's sign value.
    previous_sign = df_with_sma['SMA_sign'].shift(1)

    # 2. Create a boolean mask that is True when the sign changes AND the previous sign is not NaN.
    sign_changed_mask = (df_with_sma['SMA_sign'] != previous_sign) & (~previous_sign.isna())

    # 3. Filter the DataFrame using the boolean mask and explicitly create a copy.
    sign_changes_only = df_with_sma[sign_changed_mask].copy()

    # Now, add the 'Next_Close_Diff' column to the copy.
    sign_changes_only.loc[:, 'Next_Close_Diff'] = sign_changes_only['Close'].shift(-1) - sign_changes_only['Close']

    # Ensure 'Date' column is in datetime format
    sign_changes_only['Date'] = pd.to_datetime(sign_changes_only['Date'])

    # Extract the year from the 'Date' column
    sign_changes_only['Year'] = sign_changes_only['Date'].dt.year

    # Group by year and sum the 'Next_Close_Diff'
    yearly_next_close_diff_sum = sign_changes_only.groupby('Year')['Next_Close_Diff'].sum().reset_index()

    # Add the 'Lookback' column with the specified lookback value
    yearly_next_close_diff_sum['Lookback'] = lookback

    # Return the DataFrame with columns in the desired order: Year, Lookback, Next_Close_Diff
    return yearly_next_close_diff_sum[['Year', 'Lookback', 'Next_Close_Diff']]

In [25]:
# Initialize an empty list to store the results from each lookback value
results_list = []

# Iterate through each value in the specified range
for lookback_value in range(lookback_range[0], lookback_range[1] + 1):
    # Calculate SMA changes for the current lookback value
    df_sma_result = analyze_sma_changes(df=data.copy(), lookback=lookback_value)
    # Append the result to the list
    results_list.append(df_sma_result)

# Concatenate all the DataFrames in the list into a single DataFrame
all_sma_results = pd.concat(results_list, ignore_index=True)

# Print the resulting DataFrame
print("DataFrame with Simple Moving Average analysis for different lookback values:")
display(all_sma_results)

TypeError: 'list' object is not callable

In [None]:
# Join the yearly_price_change DataFrame with the yearly_next_close_diff_sum Series on the 'Year' index
comparison_df = yearly_price_change.join(yearly_next_close_diff_sum)

# Display the combined DataFrame, excluding the 'first' and 'last' columns
print("Comparison of Yearly Change and Sum of Next_Close_Diff at Sign Changes:")
display(comparison_df[['Yearly_Change', 'Next_Close_Diff']])

Comparison of Yearly Change and Sum of Next_Close_Diff at Sign Changes:


Unnamed: 0_level_0,Yearly_Change,Next_Close_Diff
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
2000,-8.154053,-4.208557
2001,-8.355255,-11.519096
2002,-16.794495,-14.923824
2003,14.507236,15.250355
2004,8.002754,5.950394
2005,4.370438,6.777824
2006,12.176834,12.120247
2007,5.330338,1.666885
2008,-37.821556,-34.883057
2009,15.531616,18.646591


In [None]:
# Ensure 'Date' column in the initial 'data' DataFrame is in datetime format
data['Date'] = pd.to_datetime(data['Date'])

# Extract the year from the 'Date' column
data['Year'] = data['Date'].dt.year

# Group by year and get the first and last close prices
yearly_price_change = data.groupby('Year')['Close'].agg(['first', 'last'])

# Calculate the difference between the last and first close price for each year
yearly_price_change['Yearly_Change'] = yearly_price_change['last'] - yearly_price_change['first']

# Display the result
print("Difference between the last and first close price of each year:")
display(yearly_price_change[['Yearly_Change']])

Difference between the last and first close price of each year:


Unnamed: 0_level_0,Yearly_Change
Year,Unnamed: 1_level_1
2000,-8.154053
2001,-8.355255
2002,-16.794495
2003,14.507236
2004,8.002754
2005,4.370438
2006,12.176834
2007,5.330338
2008,-37.821556
2009,15.531616


In [None]:
# Calculate the sum of each column in the comparison_df
grand_totals = comparison_df[['Yearly_Change', 'Next_Close_Diff']].sum()

# Display the grand totals
print("Grand Totals for Yearly_Change and Next_Close_Diff:")
display(grand_totals)

Grand Totals for Yearly_Change and Next_Close_Diff:


Unnamed: 0,0
Yearly_Change,537.774868
Next_Close_Diff,544.622169
