In [48]:
# Import necessary libraries
import pandas as pd
import numpy as np
import yfinance as yf
import matplotlib.pyplot as plt

# Define a ticker and a date range for your data
ticker = 'SVXY'
start_date = '2000-01-01'
end_date = '2025-07-31'
window = 5
lookback_range = [3,21]

# Download historical data from Yahoo Finance for a single ticker.
# This will result in a DataFrame with 'Date' as a simple index.
data = yf.download(ticker, start=start_date, end=end_date, auto_adjust=True)

# Use reset_index() to convert the 'Date' index into a column.
data = data.reset_index()

# Now, to get a new DataFrame with just the 'Price' level, we can use droplevel()
# This removes the 'Ticker' level from the columns, leaving only the 'Price' level.
data = data.droplevel(level='Ticker', axis=1)

# Now, add the 'Ticker' column at position 1 (right after the 'Date' column).
data.insert(1, 'Ticker', ticker)

# The DataFrame is now a flat table with no MultiIndex.
display(data)

[*********************100%***********************]  1 of 1 completed


Price,Date,Ticker,Close,High,Low,Open,Volume
0,2011-10-04,SVXY,10.525000,10.525000,9.825000,9.872500,81200
1,2011-10-05,SVXY,11.347500,11.410000,10.862500,10.882500,35600
2,2011-10-06,SVXY,11.582500,11.582500,11.200000,11.357500,22400
3,2011-10-07,SVXY,11.672500,11.797500,11.195000,11.797500,72800
4,2011-10-10,SVXY,12.150000,12.150000,11.850000,11.875000,60000
...,...,...,...,...,...,...,...
3470,2025-07-24,SVXY,45.230000,45.480000,45.150002,45.320000,1075200
3471,2025-07-25,SVXY,45.430000,45.470001,45.139999,45.220001,667900
3472,2025-07-28,SVXY,46.040001,46.060001,45.750000,45.900002,979300
3473,2025-07-29,SVXY,45.500000,46.450001,45.380001,46.389999,1543100


In [49]:
def calculate_sma(df, lookback):
    """
    Calculates a simple moving average for a DataFrame.

    Args:
        df (pd.DataFrame): The input DataFrame with a 'close' column.
        lookback (int): The number of periods for the moving average.

    Returns:
        pd.DataFrame: The DataFrame with a new column for the moving average.
    """
    # Create a copy to avoid modifying the original DataFrame
    df_sma = df.copy()

    # Calculate the simple moving average
    df_sma['SMA'] = df_sma['Close'].rolling(window=lookback).mean()

    # Calculate the difference between the SMA and the Close price
    df_sma['SMA_sign'] = np.sign(df_sma['Close'] - df_sma['SMA'])

    return df_sma



In [50]:
def analyze_sma_changes(df, lookback):
    """
    Analyzes the impact of Simple Moving Average sign changes on price differences.

    Args:
        df (pd.DataFrame): The input DataFrame with historical price data.
        lookback (int): The number of periods for the moving average.

    Returns:
        pd.DataFrame: A DataFrame containing the sum of Next_Close_Diff at SMA sign changes by year,
                      including the lookback value.
    """
    # Use the existing calculate_sma function
    df_with_sma = calculate_sma(df=df.copy(), lookback=lookback)

    # 1. Create a Series of the previous row's sign value.
    previous_sign = df_with_sma['SMA_sign'].shift(1)

    # 2. Create a boolean mask that is True when the sign changes AND the previous sign is not NaN.
    sign_changed_mask = (df_with_sma['SMA_sign'] != previous_sign) & (~previous_sign.isna())

    # 3. Filter the DataFrame using the boolean mask and explicitly create a copy.
    sign_changes_only = df_with_sma[sign_changed_mask].copy()

    # Now, add the 'Next_Close_Diff' column to the copy.
    sign_changes_only.loc[:, 'Next_Close_Diff'] = sign_changes_only['Close'].shift(-1) - sign_changes_only['Close']

    # Ensure 'Date' column is in datetime format
    sign_changes_only['Date'] = pd.to_datetime(sign_changes_only['Date'])

    # Extract the year from the 'Date' column
    sign_changes_only['Year'] = sign_changes_only['Date'].dt.year

    # Group by year and sum the 'Next_Close_Diff'
    yearly_next_close_diff_sum = sign_changes_only.groupby('Year')['Next_Close_Diff'].sum().reset_index()

    # Add the 'Lookback' column with the specified lookback value
    yearly_next_close_diff_sum['Lookback'] = lookback

    # Return the DataFrame with columns in the desired order: Year, Lookback, Next_Close_Diff
    return yearly_next_close_diff_sum[['Year', 'Lookback', 'Next_Close_Diff']]

In [51]:
# Initialize an empty list to store the results from each lookback value
results_list = []

# Iterate through each value in the specified range
for lookback_value in range(lookback_range[0], lookback_range[1] + 1):
    # Calculate SMA changes for the current lookback value
    df_sma_result = analyze_sma_changes(df=data.copy(), lookback=lookback_value)
    # Append the result to the list
    results_list.append(df_sma_result)

# Concatenate all the DataFrames in the list into a single DataFrame
all_sma_results = pd.concat(results_list, ignore_index=True)

# Print the resulting DataFrame
print("DataFrame with Simple Moving Average analysis for different lookback values:")
display(all_sma_results)

DataFrame with Simple Moving Average analysis for different lookback values:


Unnamed: 0,Year,Lookback,Next_Close_Diff
0,2011,3,1.317500
1,2012,3,30.417500
2,2013,3,23.169998
3,2014,3,-8.989998
4,2015,3,-9.459999
...,...,...,...
280,2021,21,9.679998
281,2022,21,2.215000
282,2023,21,19.335001
283,2024,21,-0.014999


In [52]:
# Pivot the DataFrame
pivoted_sma_results = all_sma_results.pivot(index='Year', columns='Lookback', values='Next_Close_Diff')

# Display the pivoted DataFrame
print("Pivoted DataFrame with Lookback as columns, Year as rows, and Next_Close_Diff as values:")
display(pivoted_sma_results)

Pivoted DataFrame with Lookback as columns, Year as rows, and Next_Close_Diff as values:


Lookback,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2011,1.3175,1.3175,1.3175,1.3175,1.3175,1.3175,1.3175,1.7425,5.6675,5.184999,5.184999,5.282499,3.690001,4.8975,4.8975,4.8975,4.8975,4.8975,4.650001
2012,30.4175,30.4175,29.177502,23.0075,23.0075,23.0075,23.0075,23.0075,19.0825,19.0825,19.0825,19.0825,20.674999,20.674999,20.674999,20.674999,20.674999,20.674999,20.674999
2013,23.169998,23.169998,24.409996,30.579998,32.155003,29.384998,29.384998,29.384998,29.384998,31.035,31.035,31.035,31.035,26.66,26.66,26.66,26.66,26.66,26.66
2014,-8.989998,-5.369999,-5.369999,-5.369999,-6.945004,-4.174999,-9.624996,-9.624996,-9.624996,-11.274998,-7.595001,-7.595001,-7.595001,-3.220001,-3.220001,-3.220001,-3.220001,-3.220001,-2.16
2015,-9.459999,-18.93,-18.93,-23.75,-23.75,-23.75,-18.300003,-18.560001,-18.870003,-18.870003,-21.049999,-21.049999,-21.049999,-21.049999,-21.049999,-20.609997,-20.609997,-23.739998,-24.799999
2016,48.589996,54.439999,54.439999,59.259998,59.259998,59.259998,59.259998,59.519997,59.829998,59.829998,58.329998,58.329998,58.329998,58.329998,58.329998,57.889996,57.889996,61.019997,61.019997
2017,167.710007,167.710007,167.710007,167.710007,167.710007,167.710007,167.710007,167.710007,167.710007,162.030014,162.030014,162.030014,162.030014,162.030014,162.030014,162.030014,162.030014,162.030014,162.030014
2018,-244.200005,-244.200005,-244.200005,-244.200005,-244.200005,-243.665003,-243.665003,-243.665003,-243.665003,-237.650011,-237.650011,-237.650011,-237.650011,-237.650011,-237.425011,-237.425011,-237.425011,-237.425011,-237.425011
2019,11.510002,11.510002,11.510002,11.510002,12.365002,11.83,11.83,11.83,11.83,11.495001,10.640001,10.640001,10.640001,10.640001,11.43,11.43,11.43,11.43,9.65
2020,-12.645,-12.645,-12.645,-12.645,-13.5,-13.5,-13.5,-13.5,-13.5,-13.5,-12.645,-12.645,-12.645,-12.645,-13.66,-13.66,-13.66,-13.66,-11.879999


In [53]:
# Ensure 'Date' column in the initial 'data' DataFrame is in datetime format
data['Date'] = pd.to_datetime(data['Date'])

# Extract the year from the 'Date' column
data['Year'] = data['Date'].dt.year

# Group by year and get the first and last close prices
yearly_price_change = data.groupby('Year')['Close'].agg(['first', 'last'])

# Calculate the difference between the last and first close price for each year
yearly_price_change['Yearly_Change'] = yearly_price_change['last'] - yearly_price_change['first']

# Drop the 'first' and 'last' columns
yearly_price_change = yearly_price_change.drop(columns=['first', 'last'])

# Display the result
print("Difference between the last and first close price of each year:")
display(yearly_price_change)

Difference between the last and first close price of each year:


Unnamed: 0_level_0,Yearly_Change
Year,Unnamed: 1_level_1
2011,2.545
2012,19.002499
2013,30.740002
2014,-4.954998
2015,-11.82
2016,43.720005
2017,158.970016
2018,-244.010004
2019,11.120001
2020,-12.605001


In [54]:
# Join the yearly_price_change DataFrame with the yearly_next_close_diff_sum Series on the 'Year' index
comparison_df = yearly_price_change.join(pivoted_sma_results)

# Display the combined DataFrame, excluding the 'first' and 'last' columns
display(comparison_df)

Unnamed: 0_level_0,Yearly_Change,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2011,2.545,1.3175,1.3175,1.3175,1.3175,1.3175,1.3175,1.3175,1.7425,5.6675,5.184999,5.184999,5.282499,3.690001,4.8975,4.8975,4.8975,4.8975,4.8975,4.650001
2012,19.002499,30.4175,30.4175,29.177502,23.0075,23.0075,23.0075,23.0075,23.0075,19.0825,19.0825,19.0825,19.0825,20.674999,20.674999,20.674999,20.674999,20.674999,20.674999,20.674999
2013,30.740002,23.169998,23.169998,24.409996,30.579998,32.155003,29.384998,29.384998,29.384998,29.384998,31.035,31.035,31.035,31.035,26.66,26.66,26.66,26.66,26.66,26.66
2014,-4.954998,-8.989998,-5.369999,-5.369999,-5.369999,-6.945004,-4.174999,-9.624996,-9.624996,-9.624996,-11.274998,-7.595001,-7.595001,-7.595001,-3.220001,-3.220001,-3.220001,-3.220001,-3.220001,-2.16
2015,-11.82,-9.459999,-18.93,-18.93,-23.75,-23.75,-23.75,-18.300003,-18.560001,-18.870003,-18.870003,-21.049999,-21.049999,-21.049999,-21.049999,-21.049999,-20.609997,-20.609997,-23.739998,-24.799999
2016,43.720005,48.589996,54.439999,54.439999,59.259998,59.259998,59.259998,59.259998,59.519997,59.829998,59.829998,58.329998,58.329998,58.329998,58.329998,58.329998,57.889996,57.889996,61.019997,61.019997
2017,158.970016,167.710007,167.710007,167.710007,167.710007,167.710007,167.710007,167.710007,167.710007,167.710007,162.030014,162.030014,162.030014,162.030014,162.030014,162.030014,162.030014,162.030014,162.030014,162.030014
2018,-244.010004,-244.200005,-244.200005,-244.200005,-244.200005,-244.200005,-243.665003,-243.665003,-243.665003,-243.665003,-237.650011,-237.650011,-237.650011,-237.650011,-237.650011,-237.425011,-237.425011,-237.425011,-237.425011,-237.425011
2019,11.120001,11.510002,11.510002,11.510002,11.510002,12.365002,11.83,11.83,11.83,11.83,11.495001,10.640001,10.640001,10.640001,10.640001,11.43,11.43,11.43,11.43,9.65
2020,-12.605001,-12.645,-12.645,-12.645,-12.645,-13.5,-13.5,-13.5,-13.5,-13.5,-13.5,-12.645,-12.645,-12.645,-12.645,-13.66,-13.66,-13.66,-13.66,-11.879999


In [55]:
# Calculate the sum and standard deviation of each column in the comparison_df
grand_totals = comparison_df.sum()
standard_deviations = comparison_df.std()

# Calculate the Mean Absolute Deviation for each column
mean_absolute_deviations = comparison_df.apply(lambda x: (x - x.mean()).abs().mean())

# Combine the metrics into a single DataFrame for display
summary_df = pd.DataFrame({
    'Grand Total': grand_totals,
    'Standard Deviation': standard_deviations,
    'Mean Absolute Deviation': mean_absolute_deviations
})

# Display the summary DataFrame
print("Grand Totals, Standard Deviations, and Mean Absolute Deviations for each column:")
display(summary_df)

Grand Totals, Standard Deviations, and Mean Absolute Deviations for each column:


Unnamed: 0,Grand Total,Standard Deviation,Mean Absolute Deviation
Yearly_Change,18.247524,79.776338,38.610735
3,33.095,81.299535,39.574422
4,33.095,81.659159,40.357089
5,33.025001,81.637014,40.242111
6,33.025001,81.97364,40.884778
7,31.604999,82.055645,41.310468
8,31.604999,81.841952,40.8698
9,29.815001,81.768537,40.847489
10,30.240002,81.807679,41.0086
11,30.240002,81.776704,41.013467
