In [2]:
# Import necessary libraries
import pandas as pd
import numpy as np
import yfinance as yf
import matplotlib.pyplot as plt

# Define a ticker and a date range for your data
ticker = 'UVXY'
start_date = '2000-01-01'
end_date = '2025-07-31'
window = 5
lookback_range = [3,21]

# Download historical data from Yahoo Finance for a single ticker.
# This will result in a DataFrame with 'Date' as a simple index.
data = yf.download(ticker, start=start_date, end=end_date, auto_adjust=True)

# Use reset_index() to convert the 'Date' index into a column.
data = data.reset_index()

# Now, to get a new DataFrame with just the 'Price' level, we can use droplevel()
# This removes the 'Ticker' level from the columns, leaving only the 'Price' level.
data = data.droplevel(level='Ticker', axis=1)

# Now, add the 'Ticker' column at position 1 (right after the 'Date' column).
data.insert(1, 'Ticker', ticker)

# The DataFrame is now a flat table with no MultiIndex.
display(data)

[*********************100%***********************]  1 of 1 completed


Price,Date,Ticker,Close,High,Low,Open,Volume
0,2011-10-04,UVXY,1.029000e+11,1.224000e+11,1.029000e+11,1.213500e+11,0
1,2011-10-05,UVXY,9.069000e+10,9.945000e+10,9.069000e+10,9.873000e+10,0
2,2011-10-06,UVXY,8.700000e+10,9.390000e+10,8.700000e+10,9.057000e+10,0
3,2011-10-07,UVXY,8.826000e+10,9.288000e+10,8.424000e+10,8.529000e+10,0
4,2011-10-10,UVXY,7.797000e+10,8.280000e+10,7.797000e+10,8.274000e+10,0
...,...,...,...,...,...,...,...
3470,2025-07-24,UVXY,1.562000e+01,1.570000e+01,1.535000e+01,1.555000e+01,14276400
3471,2025-07-25,UVXY,1.543000e+01,1.573000e+01,1.536000e+01,1.566000e+01,16113600
3472,2025-07-28,UVXY,1.479000e+01,1.512000e+01,1.476000e+01,1.493000e+01,20124700
3473,2025-07-29,UVXY,1.530000e+01,1.542000e+01,1.440000e+01,1.452000e+01,23927000


In [3]:
def calculate_sma(df, lookback):
    """
    Calculates a simple moving average for a DataFrame.

    Args:
        df (pd.DataFrame): The input DataFrame with a 'close' column.
        lookback (int): The number of periods for the moving average.

    Returns:
        pd.DataFrame: The DataFrame with a new column for the moving average.
    """
    # Create a copy to avoid modifying the original DataFrame
    df_sma = df.copy()

    # Calculate the simple moving average
    df_sma['SMA'] = df_sma['Close'].rolling(window=lookback).mean()

    # Calculate the difference between the SMA and the Close price
    df_sma['SMA_sign'] = np.sign(df_sma['Close'] - df_sma['SMA'])

    return df_sma



In [29]:
def analyze_sma_changes(df, lookback):
    """
    Analyzes the impact of Simple Moving Average sign changes on price differences.

    Args:
        df (pd.DataFrame): The input DataFrame with historical price data.
        lookback (int): The number of periods for the moving average.

    Returns:
        pd.DataFrame: A DataFrame containing the sum of Next_Close_Diff at SMA sign changes by year,
                      including the lookback value.
    """
    # Use the existing calculate_sma function
    df_with_sma = calculate_sma(df=df.copy(), lookback=lookback)

    # 1. Create a Series of the previous row's sign value.
    previous_sign = df_with_sma['SMA_sign'].shift(1)

    # 2. Create a boolean mask that is True when the sign changes AND the previous sign is not NaN.
    sign_changed_mask = (df_with_sma['SMA_sign'] != previous_sign) & (~previous_sign.isna())

    # 3. Filter the DataFrame using the boolean mask and explicitly create a copy.
    sign_changes_only = df_with_sma[sign_changed_mask].copy()

    # Now, add the 'Next_Close_Diff' column to the copy.
    sign_changes_only.loc[:, 'Next_Close_Return'] = (sign_changes_only['Close'] - sign_changes_only['Close'].shift(-1)) / sign_changes_only['Close']

    # Ensure 'Date' column is in datetime format
    sign_changes_only['Date'] = pd.to_datetime(sign_changes_only['Date'])

    # Extract the year from the 'Date' column
    sign_changes_only['Year'] = sign_changes_only['Date'].dt.year

    #return sign_changes_only

    # Filter by SMA_sign = -1
    df_filtered_sign_changes = sign_changes_only[sign_changes_only['SMA_sign'] == -1].copy()

    #return df_Filtered_sign_changes

    # Group by year and sum the 'Next_Close_Diff'
    yearly_next_close_diff_sum = df_filtered_sign_changes.groupby('Year')['Next_Close_Return'].sum().reset_index()

    #return yearly_next_close_diff_sum

    # Add the 'Lookback' column with the specified lookback value
    yearly_next_close_diff_sum['Lookback'] = lookback

    # Return the DataFrame with columns in the desired order: Year, Lookback, Next_Close_Diff
    return yearly_next_close_diff_sum[['Year', 'Lookback', 'Next_Close_Return']]

In [31]:
# Initialize an empty list to store the results from each lookback value
results_list = []

# Iterate through each value in the specified range
for lookback_value in range(lookback_range[0], lookback_range[1] + 1):
    # Calculate SMA changes for the current lookback value
    df_sma_result = analyze_sma_changes(df=data.copy(), lookback=lookback_value)
    # Append the result to the list
    results_list.append(df_sma_result)

# Concatenate all the DataFrames in the list into a single DataFrame
all_sma_results = pd.concat(results_list, ignore_index=True)

# Print the resulting DataFrame
print("DataFrame with Simple Moving Average analysis for different lookback values:")
display(all_sma_results)

DataFrame with Simple Moving Average analysis for different lookback values:


Unnamed: 0,Year,Lookback,Next_Close_Return
0,2011,3,0.096987
1,2012,3,1.892482
2,2013,3,0.203290
3,2014,3,0.395458
4,2015,3,0.228710
...,...,...,...
280,2021,21,0.201505
281,2022,21,-0.193160
282,2023,21,0.714460
283,2024,21,0.057626


In [33]:
# Pivot the DataFrame
pivoted_sma_results = all_sma_results.pivot(index='Year', columns='Lookback', values='Next_Close_Return')

# Display the pivoted DataFrame
print("Pivoted DataFrame with Lookback as columns, Year as rows, and Next_Close_Return as values:")
display(pivoted_sma_results)

Pivoted DataFrame with Lookback as columns, Year as rows, and Next_Close_Return as values:


Lookback,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2011,0.096987,0.198595,-0.013966,-0.147626,-0.165954,-0.152441,-0.152441,0.193414,0.120775,0.201187,0.405883,0.343664,0.343664,0.343664,0.343664,0.462725,0.462725,0.365853,0.528915
2012,1.892482,2.058626,2.396299,2.381653,2.418303,2.364548,2.267811,1.73075,1.804663,1.405704,1.341566,1.447009,1.431146,1.431146,1.523173,1.587548,1.601911,1.448964,1.377382
2013,0.20329,0.120577,0.299553,0.254565,0.405335,0.412996,0.525814,0.030029,0.0463,-0.001555,0.316179,0.433482,0.282059,0.284373,0.308145,0.279298,0.201632,0.299487,0.302102
2014,0.395458,0.534357,0.935753,0.706487,0.384097,0.282546,0.237408,0.305029,0.192714,0.430622,0.467255,-0.013365,0.215409,0.215409,0.076339,-0.058169,-0.058169,0.027553,0.024574
2015,0.22871,0.241716,-0.032483,-0.39106,0.232561,0.195622,0.435841,0.30713,0.274107,-0.109924,-0.023532,0.401389,0.014843,-0.104406,0.012734,-0.072448,-0.129008,-0.179472,-0.00244
2016,1.075154,1.210715,1.018655,1.224011,1.461827,1.617067,1.476589,1.310427,1.122286,0.796251,0.752653,0.751999,0.859085,1.05919,1.032603,1.007821,0.937017,1.113866,1.103824
2017,1.210178,1.348694,1.360306,1.533616,1.527557,1.314757,1.377987,1.253439,1.182418,1.204729,1.028474,1.0059,1.067162,1.193623,1.263302,1.38976,1.422393,1.040717,0.979636
2018,-0.460092,-0.133553,-0.022895,-0.507526,0.5676,0.690205,0.490175,0.273597,0.205216,0.032521,-0.000599,-0.116651,-0.15925,-0.104315,-0.10831,-0.43037,-0.445335,-0.346766,-0.269659
2019,0.688441,0.433312,0.325821,0.426269,0.208814,0.524777,0.706166,0.688407,0.810429,0.688745,0.8114,0.749763,0.903648,0.8975,0.862084,0.722789,0.696407,0.693469,0.754359
2020,0.641397,0.999356,0.969514,0.712596,0.846736,0.428102,0.830977,0.784702,0.824414,0.52993,0.28558,0.269799,0.042031,0.042031,0.32451,0.298697,0.374672,0.272537,0.354342


In [34]:
# Ensure 'Date' column in the initial 'data' DataFrame is in datetime format
data['Date'] = pd.to_datetime(data['Date'])

# Extract the year from the 'Date' column
data['Year'] = data['Date'].dt.year

# Group by year and get the first and last close prices
yearly_price_change = data.groupby('Year')['Close'].agg(['first', 'last'])

# Calculate the difference between the last and first close price for each year
yearly_price_change['Yearly_Return'] = (yearly_price_change['first'] - yearly_price_change['last']) / yearly_price_change['first']

# Drop the 'first' and 'last' columns
yearly_price_change = yearly_price_change.drop(columns=['first', 'last'])

# Display the result
print("Difference between the last and first close price of each year:")
display(yearly_price_change)

Difference between the last and first close price of each year:


Unnamed: 0_level_0,Yearly_Return
Year,Unnamed: 1_level_1
2011,0.645481
2012,0.967984
2013,0.895582
2014,0.639478
2015,0.766954
2016,0.945244
2017,0.931933
2018,-0.729735
2019,0.834574
2020,0.122012


In [35]:
# Join the yearly_price_change DataFrame with the yearly_next_close_diff_sum Series on the 'Year' index
comparison_df = yearly_price_change.join(pivoted_sma_results)

# Display the combined DataFrame, excluding the 'first' and 'last' columns
display(comparison_df)

Unnamed: 0_level_0,Yearly_Return,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2011,0.645481,0.096987,0.198595,-0.013966,-0.147626,-0.165954,-0.152441,-0.152441,0.193414,0.120775,0.201187,0.405883,0.343664,0.343664,0.343664,0.343664,0.462725,0.462725,0.365853,0.528915
2012,0.967984,1.892482,2.058626,2.396299,2.381653,2.418303,2.364548,2.267811,1.73075,1.804663,1.405704,1.341566,1.447009,1.431146,1.431146,1.523173,1.587548,1.601911,1.448964,1.377382
2013,0.895582,0.20329,0.120577,0.299553,0.254565,0.405335,0.412996,0.525814,0.030029,0.0463,-0.001555,0.316179,0.433482,0.282059,0.284373,0.308145,0.279298,0.201632,0.299487,0.302102
2014,0.639478,0.395458,0.534357,0.935753,0.706487,0.384097,0.282546,0.237408,0.305029,0.192714,0.430622,0.467255,-0.013365,0.215409,0.215409,0.076339,-0.058169,-0.058169,0.027553,0.024574
2015,0.766954,0.22871,0.241716,-0.032483,-0.39106,0.232561,0.195622,0.435841,0.30713,0.274107,-0.109924,-0.023532,0.401389,0.014843,-0.104406,0.012734,-0.072448,-0.129008,-0.179472,-0.00244
2016,0.945244,1.075154,1.210715,1.018655,1.224011,1.461827,1.617067,1.476589,1.310427,1.122286,0.796251,0.752653,0.751999,0.859085,1.05919,1.032603,1.007821,0.937017,1.113866,1.103824
2017,0.931933,1.210178,1.348694,1.360306,1.533616,1.527557,1.314757,1.377987,1.253439,1.182418,1.204729,1.028474,1.0059,1.067162,1.193623,1.263302,1.38976,1.422393,1.040717,0.979636
2018,-0.729735,-0.460092,-0.133553,-0.022895,-0.507526,0.5676,0.690205,0.490175,0.273597,0.205216,0.032521,-0.000599,-0.116651,-0.15925,-0.104315,-0.10831,-0.43037,-0.445335,-0.346766,-0.269659
2019,0.834574,0.688441,0.433312,0.325821,0.426269,0.208814,0.524777,0.706166,0.688407,0.810429,0.688745,0.8114,0.749763,0.903648,0.8975,0.862084,0.722789,0.696407,0.693469,0.754359
2020,0.122012,0.641397,0.999356,0.969514,0.712596,0.846736,0.428102,0.830977,0.784702,0.824414,0.52993,0.28558,0.269799,0.042031,0.042031,0.32451,0.298697,0.374672,0.272537,0.354342


In [36]:
# Calculate the sum and standard deviation of each column in the comparison_df
grand_totals = comparison_df.sum()
standard_deviations = comparison_df.std()

# Calculate the Mean Absolute Deviation for each column
mean_absolute_deviations = comparison_df.apply(lambda x: (x - x.mean()).abs().mean())

# Combine the metrics into a single DataFrame for display
summary_df = pd.DataFrame({
    'Grand Total': grand_totals,
    'Standard Deviation': standard_deviations,
    'Mean Absolute Deviation': mean_absolute_deviations
})

# Display the summary DataFrame
print("Grand Totals, Standard Deviations, and Mean Absolute Deviations for each column:")
display(summary_df)

Grand Totals, Standard Deviations, and Mean Absolute Deviations for each column:


Unnamed: 0,Grand Total,Standard Deviation,Mean Absolute Deviation
Yearly_Return,8.998879,0.450653,0.320047
3,7.732131,0.64479,0.490267
4,8.550052,0.621851,0.44629
5,8.731197,0.689069,0.523127
6,8.245029,0.759917,0.560025
7,9.880824,0.691809,0.495226
8,9.385812,0.663373,0.464492
9,9.054376,0.671594,0.492069
10,7.099074,0.580352,0.469476
11,7.192576,0.567818,0.47909
