# Distribution of annual returns for major indices

Different tickers are available:
- S&P 500 index with dividends reinvested, data available since 1988 (_^SP500TR_)
- S&P 500 price index (without dividends), data available since 1927 (_^GSPC)_
- Nasdaq 100 index, data available since 1985 (_^NDX_)

In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
import ipywidgets as widgets
from ipywidgets import interact

In [None]:
# Fetch historical data for the financial indices
# TODO: add support for Shiller TR monthly data since 1871 (!), see http://www.econ.yale.edu/~shiller/data.htm
ticker = '^SP500TR'
title = 'S&P 500 Annualized Total Returns'
df = yf.download(ticker)
df.columns = df.columns.get_level_values(0) # Drop second level of indexing
df = df['Close']

In [3]:
data_min_year, data_max_year = df.index.year.min(), df.index.year.max()

In [4]:
def compute_future_returns(series, period_years=1.5, dates_per_year=256):
    delta = int(period_years * dates_per_year)
    
    # Calculate future returns over the next 1.5 years
    future_returns = series.shift(-delta) / series - 1
    future_returns = (future_returns + 1) ** (1/period_years) - 1
    
    # Drop NaN values (those dates for which we can't calculate future returns)
    future_returns = future_returns.dropna()
    return future_returns

In [5]:
def bucketize(axis_max):
    buckets = [0.15, 0.3, 0.5]
    for b in buckets:
        if axis_max < b:
            return b
    return axis_max

In [12]:
@interact(
    period_years=widgets.IntSlider(value=10, min=1, max=min(50, data_max_year - data_min_year), description='Period years'),
    year_interval=widgets.IntRangeSlider(value=[1950, data_max_year], min=data_min_year, max=data_max_year, description='Year interval')
)
def plot_returns_distribution(year_interval, period_years):
    start_year, end_year = year_interval
    if end_year - start_year < period_years:
        display('Please select a year interval larger than period_years')
        return
    start_date = f'{start_year}-01-01'
    end_date = f'{end_year+1}-01-01'

    data = df.loc[start_date:].loc[:end_date]
    returns = compute_future_returns(data, period_years)

    # Plot results
    plt.figure(figsize=(10, 5))
    plt.hist(returns, bins=50, edgecolor='k', alpha=0.7)
    plt.title(f'Distribution of {title} over {period_years} Years')
    plt.xlabel('Annual Return')
    plt.ylabel('Frequency')
    plt.grid(True)
    ax = plt.gca()
    ax.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: f'{x*100:.0f}%'))
    axis_max = max(-min(returns), max(returns))
    axis_max = bucketize(axis_max)
    plt.xlim(-axis_max, axis_max)
    plt.show()
    
    display(f'Median: {returns.median()*100:.1f}%')
    display(f'Nb of datapoints: {returns.shape[0]}')

interactive(children=(IntSlider(value=10, description='Period years', max=36, min=1), IntRangeSlider(value=(19…

_Data from Yahoo Finance_