In [1]:
import yfinance as yf

# Choose an interval: '1d' (daily), '1wk' (weekly), '1mo' (monthly), etc.
interval = '1d'  # Change this to '1wk' or '1mo' as needed
symbol = "^GSPC"
# Download historical data for S&P 500 (^GSPC) with the chosen interval
sp500 = yf.download(symbol, period="1y", interval=interval)

print(f"Downloaded S&P 500 data with interval: {interval}")
print(sp500)


  sp500 = yf.download(symbol, period="1y", interval=interval)
[*********************100%***********************]  1 of 1 completed

Downloaded S&P 500 data with interval: 1d
Price             Close         High          Low         Open      Volume
Ticker            ^GSPC        ^GSPC        ^GSPC        ^GSPC       ^GSPC
Date                                                                      
2024-11-29  6032.379883  6044.169922  6003.979980  6003.979980  2444420000
2024-12-02  6047.149902  6053.580078  6035.330078  6040.109863  4412470000
2024-12-03  6049.879883  6052.069824  6033.390137  6042.970215  4095000000
2024-12-04  6086.490234  6089.839844  6061.060059  6069.390137  4003390000
2024-12-05  6075.109863  6094.549805  6072.899902  6089.029785  4212020000
...                 ...          ...          ...          ...         ...
2025-11-21  6602.990234  6660.049805  6521.919922  6555.770020  5929930000
2025-11-24  6705.120117  6715.750000  6630.700195  6636.540039  6039740000
2025-11-25  6765.879883  6776.399902  6659.979980  6697.029785  5003330000
2025-11-26  6812.609863  6831.439941  6783.870117  6793.54




In [8]:
import plotly.graph_objects as go

# Extract OHLC data from the MultiIndex DataFrame
# The data has columns like ('Close', '^GSPC'), ('High', '^GSPC'), etc.
ohlc_data = sp500[['Open', 'High', 'Low', 'Close']]

# Create candlestick chart
fig = go.Figure(data=[go.Candlestick(
    x=ohlc_data.index,
    open=ohlc_data[('Open', '^GSPC')],
    high=ohlc_data[('High', '^GSPC')],
    low=ohlc_data[('Low', '^GSPC')],
    close=ohlc_data[('Close', '^GSPC')],
    name='S&P 500'
)])

# Update layout
fig.update_layout(
    title='S&P 500 Candlestick Chart (1 Year)',
    xaxis_title='Date',
    yaxis_title='Price',
    xaxis_rangeslider_visible=False,  # Hide the range slider for cleaner look
    height=600
)

# Show the chart
fig.show()


In [9]:
sp500

Price,Close,High,Low,Open,Volume
Ticker,^GSPC,^GSPC,^GSPC,^GSPC,^GSPC
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2024-11-27,5998.740234,6020.160156,5984.870117,6014.109863,3363340000
2024-11-29,6032.379883,6044.169922,6003.979980,6003.979980,2444420000
2024-12-02,6047.149902,6053.580078,6035.330078,6040.109863,4412470000
2024-12-03,6049.879883,6052.069824,6033.390137,6042.970215,4095000000
2024-12-04,6086.490234,6089.839844,6061.060059,6069.390137,4003390000
...,...,...,...,...,...
2025-11-20,6538.759766,6770.350098,6534.049805,6737.930176,5596080000
2025-11-21,6602.990234,6660.049805,6521.919922,6555.770020,5929930000
2025-11-24,6705.120117,6715.750000,6630.700195,6636.540039,6039740000
2025-11-25,6765.879883,6776.399902,6659.979980,6697.029785,5003330000


In [10]:
import pandas as pd

# Extract High, Low, and Close from SP500 (handling MultiIndex structure)
if isinstance(sp500.columns, pd.MultiIndex):
    high = sp500[('High', '^GSPC')]
    low = sp500[('Low', '^GSPC')]
    close = sp500[('Close', '^GSPC')]
else:
    high = sp500['High']
    low = sp500['Low']
    close = sp500['Close']

# Calculate H+L+Close for each bar
hlc = high + low + close

# Calculate the dots value for each date
# Formula: [(H+L+Close Bar -2) + (H+L+Close Bar -1) + (H+L+Close Bar current)] ÷ 9
dots_values = []
for i in range(len(hlc)):
    if i < 2:
        # For first two bars, we don't have enough previous data, set to NaN
        dots_values.append(pd.NA)
    else:
        # Current bar (i), previous bar (i-1), previous of previous bar (i-2)
        current = hlc.iloc[i]
        prev_1 = hlc.iloc[i-1]
        prev_2 = hlc.iloc[i-2]
        # Sum and divide by 9
        dots_value = (current + prev_1 + prev_2) / 9
        dots_values.append(dots_value)

# Create DataFrame with same index as SP500
dots = pd.DataFrame({
    'dots': dots_values
}, index=sp500.index)

print("Dots DataFrame created:")

print(f"\nShape: {dots.shape}")
print(f"\nFirst few rows with values:")
print(dots[dots['dots'].notna()].head(10))

dots

Dots DataFrame created:

Shape: (250, 1)

First few rows with values:
                   dots
Date                   
2024-12-02  6024.484484
2024-12-03  6039.103299
2024-12-04  6056.532227
2024-12-05  6068.365506
2024-12-06  6083.352214
2024-12-09  6078.085503
2024-12-10  6065.601128
2024-12-11  6061.902181
2024-12-12  6061.034451
2024-12-13  6064.949978


Unnamed: 0_level_0,dots
Date,Unnamed: 1_level_1
2024-11-27,
2024-11-29,
2024-12-02,6024.484484
2024-12-03,6039.103299
2024-12-04,6056.532227
...,...
2025-11-20,6626.315484
2025-11-21,6618.169976
2025-11-24,6631.07666
2025-11-25,6670.976671


In [17]:
dots_valid = dots[dots['dots'].notna()].copy()

# Calculate x positions: place dots between current date and next date
if len(dots_valid) > 0:
    # Get all dates from the original data index
    all_dates = sp500.index
    dots_x_positions = []
    last_gap = None

    for i, date in enumerate(dots_valid.index):
        # Find the index of this date in the full data
        try:
            current_idx = all_dates.get_loc(date)
            current_date = all_dates[current_idx]

            # If there's a next date, calculate gap and add gap/2 to current date
            if current_idx < len(all_dates) - 1:
                next_date = all_dates[current_idx + 1]
                # Calculate gap between current and next date
                gap = pd.Timestamp(next_date) - \
                    pd.Timestamp(current_date)
                last_gap = gap  # Store for last index
                # Add gap/2 to current date
                x_position = pd.Timestamp(current_date) + gap / 2
                dots_x_positions.append(x_position)
            else:
                # For the last index, use last date + lastgap
                if last_gap is not None:
                    x_position = pd.Timestamp(
                        current_date) + last_gap/2
                else:
                    # Fallback if no previous gap (shouldn't happen normally)
                    x_position = current_date
                dots_x_positions.append(x_position)
        except (KeyError, IndexError):
            # Fallback: use the date itself
            dots_x_positions.append(date)

    dots_valid['x_position'] = dots_x_positions

dots_valid

Unnamed: 0_level_0,dots,x_position
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-12-02,6024.484484,2024-12-02 12:00:00
2024-12-03,6039.103299,2024-12-03 12:00:00
2024-12-04,6056.532227,2024-12-04 12:00:00
2024-12-05,6068.365506,2024-12-05 12:00:00
2024-12-06,6083.352214,2024-12-07 12:00:00
...,...,...
2025-11-20,6626.315484,2025-11-20 12:00:00
2025-11-21,6618.169976,2025-11-22 12:00:00
2025-11-24,6631.07666,2025-11-24 12:00:00
2025-11-25,6670.976671,2025-11-25 12:00:00
