In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
import yfinance as yf
import matplotlib.pyplot as plt

# Define a ticker and a date range for your data
ticker = 'SVXY'
start_date = '2000-01-01'
end_date = '2025-07-31'
window = 5
lookback_range = [3,21]

# Download historical data from Yahoo Finance for a single ticker.
# This will result in a DataFrame with 'Date' as a simple index.
data = yf.download(ticker, start=start_date, end=end_date, auto_adjust=True)

# Use reset_index() to convert the 'Date' index into a column.
data = data.reset_index()

# Now, to get a new DataFrame with just the 'Price' level, we can use droplevel()
# This removes the 'Ticker' level from the columns, leaving only the 'Price' level.
data = data.droplevel(level='Ticker', axis=1)

# Now, add the 'Ticker' column at position 1 (right after the 'Date' column).
data.insert(1, 'Ticker', ticker)

# The DataFrame is now a flat table with no MultiIndex.
display(data)

[*********************100%***********************]  1 of 1 completed


Price,Date,Ticker,Close,High,Low,Open,Volume
0,2011-10-04,SVXY,10.525000,10.525000,9.825000,9.872500,81200
1,2011-10-05,SVXY,11.347500,11.410000,10.862500,10.882500,35600
2,2011-10-06,SVXY,11.582500,11.582500,11.200000,11.357500,22400
3,2011-10-07,SVXY,11.672500,11.797500,11.195000,11.797500,72800
4,2011-10-10,SVXY,12.150000,12.150000,11.850000,11.875000,60000
...,...,...,...,...,...,...,...
3470,2025-07-24,SVXY,45.230000,45.480000,45.150002,45.320000,1075200
3471,2025-07-25,SVXY,45.430000,45.470001,45.139999,45.220001,667900
3472,2025-07-28,SVXY,46.040001,46.060001,45.750000,45.900002,979300
3473,2025-07-29,SVXY,45.500000,46.450001,45.380001,46.389999,1543100


In [2]:
def calculate_sma(df, lookback):
    """
    Calculates a simple moving average for a DataFrame.

    Args:
        df (pd.DataFrame): The input DataFrame with a 'close' column.
        lookback (int): The number of periods for the moving average.

    Returns:
        pd.DataFrame: The DataFrame with a new column for the moving average.
    """
    # Create a copy to avoid modifying the original DataFrame
    df_sma = df.copy()

    # Calculate the simple moving average
    df_sma['SMA'] = df_sma['Close'].rolling(window=lookback).mean()

    # Calculate the difference between the SMA and the Close price
    df_sma['SMA_sign'] = np.sign(df_sma['Close'] - df_sma['SMA'])

    return df_sma



In [3]:
def analyze_sma_changes(df, lookback):
    """
    Analyzes the impact of Simple Moving Average sign changes on price differences.

    Args:
        df (pd.DataFrame): The input DataFrame with historical price data.
        lookback (int): The number of periods for the moving average.

    Returns:
        pd.DataFrame: A DataFrame containing the sum of Next_Close_Diff at SMA sign changes by year,
                      including the lookback value.
    """
    # Use the existing calculate_sma function
    df_with_sma = calculate_sma(df=df.copy(), lookback=lookback)

    # 1. Create a Series of the previous row's sign value.
    previous_sign = df_with_sma['SMA_sign'].shift(1)

    # 2. Create a boolean mask that is True when the sign changes AND the previous sign is not NaN.
    sign_changed_mask = (df_with_sma['SMA_sign'] != previous_sign) & (~previous_sign.isna())

    # 3. Filter the DataFrame using the boolean mask and explicitly create a copy.
    sign_changes_only = df_with_sma[sign_changed_mask].copy()

    # Now, add the 'Next_Close_Diff' column to the copy.
    sign_changes_only.loc[:, 'Next_Close_Diff'] = sign_changes_only['Close'].shift(-1) - sign_changes_only['Close']

    # Ensure 'Date' column is in datetime format
    sign_changes_only['Date'] = pd.to_datetime(sign_changes_only['Date'])

    # Extract the year from the 'Date' column
    sign_changes_only['Year'] = sign_changes_only['Date'].dt.year

    # Filter by SMA_sign = 1
    df_filtered_sign_changes = sign_changes_only[sign_changes_only['SMA_sign'] == 1].copy()

    # Group by year and sum the 'Next_Close_Diff'
    yearly_next_close_diff_sum = df_filtered_sign_changes.groupby('Year')['Next_Close_Diff'].sum().reset_index()

    # Add the 'Lookback' column with the specified lookback value
    yearly_next_close_diff_sum['Lookback'] = lookback

    # Return the DataFrame with columns in the desired order: Year, Lookback, Next_Close_Diff
    return yearly_next_close_diff_sum[['Year', 'Lookback', 'Next_Close_Diff']]

In [4]:
# Initialize an empty list to store the results from each lookback value
results_list = []

# Iterate through each value in the specified range
for lookback_value in range(lookback_range[0], lookback_range[1] + 1):
    # Calculate SMA changes for the current lookback value
    df_sma_result = analyze_sma_changes(df=data.copy(), lookback=lookback_value)
    # Append the result to the list
    results_list.append(df_sma_result)

# Concatenate all the DataFrames in the list into a single DataFrame
all_sma_results = pd.concat(results_list, ignore_index=True)

# Print the resulting DataFrame
print("DataFrame with Simple Moving Average analysis for different lookback values:")
display(all_sma_results)

DataFrame with Simple Moving Average analysis for different lookback values:


Unnamed: 0,Year,Lookback,Next_Close_Diff
0,2011,3,-1.892500
1,2012,3,22.095003
2,2013,3,12.109985
3,2014,3,3.470028
4,2015,3,-3.569988
...,...,...,...
280,2021,21,-1.315002
281,2022,21,0.535000
282,2023,21,14.179993
283,2024,21,-4.554996


In [5]:
# Pivot the DataFrame
pivoted_sma_results = all_sma_results.pivot(index='Year', columns='Lookback', values='Next_Close_Diff')

# Display the pivoted DataFrame
print("Pivoted DataFrame with Lookback as columns, Year as rows, and Next_Close_Diff as values:")
display(pivoted_sma_results)

Pivoted DataFrame with Lookback as columns, Year as rows, and Next_Close_Diff as values:


Lookback,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2011,-1.8925,-0.397498,-0.137499,-0.987499,-0.514999,-2.0775,-1.825,-0.6425,3.934999,4.7375,4.7375,5.58,3.027501,4.650001,4.650001,4.650001,4.650001,4.650001,4.650001
2012,22.095003,20.707497,24.775002,12.899994,11.482496,12.319998,9.507496,11.942495,6.82,5.157499,6.672499,8.112499,8.067503,7.532503,6.625002,6.050003,3.270004,3.51,3.390001
2013,12.109985,8.554985,9.644989,10.649994,9.154999,18.244999,17.949993,15.594997,17.200001,15.920002,16.075005,16.945004,14.954998,12.614998,15.715,13.484997,11.164993,6.159996,6.834995
2014,3.470028,22.590019,26.610008,8.749996,-0.335003,-0.514999,4.194992,4.280006,-4.780003,-0.38501,7.954998,8.564995,10.155006,20.010002,24.73,14.040001,15.570007,11.670006,13.590008
2015,-3.569988,-7.17001,1.769993,-25.16,-30.460003,-27.740005,-14.580009,-1.070007,-2.23,-3.110001,-9.349998,-6.699997,-11.130001,-11.610001,-12.690002,-11.480007,-12.859993,-5.999996,-9.330002
2016,22.559986,27.139992,23.329998,31.269993,28.409988,25.449989,32.389988,13.349991,15.57999,18.539993,17.269993,15.429996,15.639999,20.259998,14.969994,14.529991,17.769993,17.619991,15.389996
2017,102.820007,84.980011,110.030006,102.939995,89.450005,116.650017,112.31002,106.750031,84.050018,88.390022,91.070015,98.650017,98.370018,100.210014,97.460014,100.180016,100.180016,104.000015,111.170013
2018,-25.35998,-34.32498,-50.484961,-18.259989,-12.784994,-8.999994,-8.174995,-7.024994,-8.849995,-4.910004,-6.635004,-8.849998,-5.870001,-5.98,-5.66,-5.539999,-6.899988,-6.819988,-7.214993
2019,4.910006,2.615,2.355,3.919998,3.220003,5.795,6.384998,5.689997,6.279997,6.484995,5.139996,5.759995,4.539995,5.094995,5.999996,6.514999,8.375,8.285,6.844999
2020,3.339997,4.704993,2.844994,2.059999,0.364999,0.304997,-4.935006,-3.445001,-2.835003,-2.405003,-1.250005,-3.000002,-3.199999,-2.229997,-3.14,-2.555003,-3.605002,-4.165002,-4.785003


In [6]:
# Ensure 'Date' column in the initial 'data' DataFrame is in datetime format
data['Date'] = pd.to_datetime(data['Date'])

# Extract the year from the 'Date' column
data['Year'] = data['Date'].dt.year

# Group by year and get the first and last close prices
yearly_price_change = data.groupby('Year')['Close'].agg(['first', 'last'])

# Calculate the difference between the last and first close price for each year
yearly_price_change['Yearly_Change'] = yearly_price_change['last'] - yearly_price_change['first']

# Drop the 'first' and 'last' columns
yearly_price_change = yearly_price_change.drop(columns=['first', 'last'])

# Display the result
print("Difference between the last and first close price of each year:")
display(yearly_price_change)

Difference between the last and first close price of each year:


Unnamed: 0_level_0,Yearly_Change
Year,Unnamed: 1_level_1
2011,2.545
2012,19.002499
2013,30.740002
2014,-4.954998
2015,-11.82
2016,43.720005
2017,158.970016
2018,-244.010004
2019,11.120001
2020,-12.605001


In [7]:
# Join the yearly_price_change DataFrame with the yearly_next_close_diff_sum Series on the 'Year' index
comparison_df = yearly_price_change.join(pivoted_sma_results)

# Display the combined DataFrame, excluding the 'first' and 'last' columns
display(comparison_df)

Unnamed: 0_level_0,Yearly_Change,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2011,2.545,-1.8925,-0.397498,-0.137499,-0.987499,-0.514999,-2.0775,-1.825,-0.6425,3.934999,4.7375,4.7375,5.58,3.027501,4.650001,4.650001,4.650001,4.650001,4.650001,4.650001
2012,19.002499,22.095003,20.707497,24.775002,12.899994,11.482496,12.319998,9.507496,11.942495,6.82,5.157499,6.672499,8.112499,8.067503,7.532503,6.625002,6.050003,3.270004,3.51,3.390001
2013,30.740002,12.109985,8.554985,9.644989,10.649994,9.154999,18.244999,17.949993,15.594997,17.200001,15.920002,16.075005,16.945004,14.954998,12.614998,15.715,13.484997,11.164993,6.159996,6.834995
2014,-4.954998,3.470028,22.590019,26.610008,8.749996,-0.335003,-0.514999,4.194992,4.280006,-4.780003,-0.38501,7.954998,8.564995,10.155006,20.010002,24.73,14.040001,15.570007,11.670006,13.590008
2015,-11.82,-3.569988,-7.17001,1.769993,-25.16,-30.460003,-27.740005,-14.580009,-1.070007,-2.23,-3.110001,-9.349998,-6.699997,-11.130001,-11.610001,-12.690002,-11.480007,-12.859993,-5.999996,-9.330002
2016,43.720005,22.559986,27.139992,23.329998,31.269993,28.409988,25.449989,32.389988,13.349991,15.57999,18.539993,17.269993,15.429996,15.639999,20.259998,14.969994,14.529991,17.769993,17.619991,15.389996
2017,158.970016,102.820007,84.980011,110.030006,102.939995,89.450005,116.650017,112.31002,106.750031,84.050018,88.390022,91.070015,98.650017,98.370018,100.210014,97.460014,100.180016,100.180016,104.000015,111.170013
2018,-244.010004,-25.35998,-34.32498,-50.484961,-18.259989,-12.784994,-8.999994,-8.174995,-7.024994,-8.849995,-4.910004,-6.635004,-8.849998,-5.870001,-5.98,-5.66,-5.539999,-6.899988,-6.819988,-7.214993
2019,11.120001,4.910006,2.615,2.355,3.919998,3.220003,5.795,6.384998,5.689997,6.279997,6.484995,5.139996,5.759995,4.539995,5.094995,5.999996,6.514999,8.375,8.285,6.844999
2020,-12.605001,3.339997,4.704993,2.844994,2.059999,0.364999,0.304997,-4.935006,-3.445001,-2.835003,-2.405003,-1.250005,-3.000002,-3.199999,-2.229997,-3.14,-2.555003,-3.605002,-4.165002,-4.785003


In [8]:
# Calculate the sum and standard deviation of each column in the comparison_df
grand_totals = comparison_df.sum()
standard_deviations = comparison_df.std()

# Calculate the Mean Absolute Deviation for each column
mean_absolute_deviations = comparison_df.apply(lambda x: (x - x.mean()).abs().mean())

# Combine the metrics into a single DataFrame for display
summary_df = pd.DataFrame({
    'Grand Total': grand_totals,
    'Standard Deviation': standard_deviations,
    'Mean Absolute Deviation': mean_absolute_deviations
})

# Display the summary DataFrame
print("Grand Totals, Standard Deviations, and Mean Absolute Deviations for each column:")
display(summary_df)

Grand Totals, Standard Deviations, and Mean Absolute Deviations for each column:


Unnamed: 0,Grand Total,Standard Deviation,Mean Absolute Deviation
Yearly_Change,18.247524,79.776338,38.610735
3,152.257554,28.326138,16.44233
4,143.430031,25.565103,16.243003
5,163.41754,32.803034,18.825669
6,144.682486,28.877993,16.184332
7,122.482488,25.854288,14.872666
8,153.71751,31.907573,17.983445
9,164.967491,30.226249,17.30649
10,157.535035,27.558283,14.470112
11,124.52503,22.336305,12.715112
