## Pull data with yfinance

In [7]:
import yfinance as yf
from datetime import datetime, timedelta
import pandas as pd
from dateutil.relativedelta import relativedelta
import numpy as np

In [2]:
# function to load datasets from yfinance, and create a single table which will be the basis of the data
def load_yf_data(equity, bond):
    from datetime import datetime
    start_date = datetime(year=1950, month=1, day=1)
    end_date = datetime.today()
    interval = "1d"

    # return equity fund data
    ety = yf.download(tickers=equity, interval=interval, start=start_date, end=end_date).reset_index()
    ety.columns = ety.columns.droplevel(1)
    ety = ety[["Date", "Open"]]
    ety.columns = ["date", "ety_open_price"]

    # return bond funds
    bnd = yf.download(tickers=bond, interval=interval, start=start_date, end=end_date).reset_index()
    bnd.columns = bnd.columns.droplevel(1)
    bnd = bnd[["Date", "Open"]]
    bnd.columns = ["date", "bnd_open_price"] 

    # merge the data
    fund_data = ety.merge(bnd, on="date", how="outer")

    return fund_data
    

In [3]:
# call function to load the data
fund_data = load_yf_data("^GSPC", "^FVX")

  ety = yf.download(tickers=equity, interval=interval, start=start_date, end=end_date).reset_index()
[*********************100%***********************]  1 of 1 completed
  bnd = yf.download(tickers=bond, interval=interval, start=start_date, end=end_date).reset_index()
[*********************100%***********************]  1 of 1 completed


In [2]:
# save down the data and reload for working offline
fund_data.to_csv("fund_data.csv", index=False)

NameError: name 'fund_data' is not defined

In [2]:
# read in data
fund_data = pd.read_csv("fund_data.csv")
fund_data["date"] = pd.to_datetime(fund_data["date"])
fund_data.dtypes

date              datetime64[ns]
ety_open_price           float64
bnd_open_price           float64
dtype: object

## Create pension

In [3]:
# create pension class
class Pension():

    def __init__(self, start, retire, contributions):
        self.start = start
        self.retire = retire
        self.cont = contributions

        # create all dates on which contributions will be made
        dates = []
        current_date = self.start

        while current_date <= self.retire:
            dates.append(current_date)
            current_date += relativedelta(months=1)

        self.cont_dates = dates
        

    def load_data(self, data):
        self.funds = data

    def derisk_strategy(self, target_weight, derisk_years):
        self.derisk_years = derisk_years
        self.derisk_target = target_weight
        self.start_derisk_date = self.retire - relativedelta(years=self.derisk_years)
        self.derisk_months = self.derisk_years * 12
        self.derisk_perc_change = round((1 - self.derisk_target) / self.derisk_months, 6)

    def accumulate(self):

        # create all dates on which contributions will be made
        dates = []
        current_date = self.start

        while current_date <= self.retire:
            dates.append(current_date)
            current_date += relativedelta(months=1)
        
        accum = pd.DataFrame(dates, columns=["date"])

        return accum

In [4]:
# test an example
start_date = datetime(year=1990, month=7, day=13)
retire_date = datetime(year=2025, month=7, day=13)
pension = Pension(start_date, retire_date, 750)
pension.derisk_strategy(0.2, 12)
pension.derisk_perc_change

0.005556

In [6]:
# function to return the price of the equity fund or bond fund on a specific date
def get_price(date, col, data=fund_data):
    # allow for the fact that not al days have prices
    try: 
        price = fund_data[fund_data["date"] == date][col].item()
    except: 
        # iterate until finding the next day when there is a price
        while len(fund_data[fund_data["date"] == date]) == 0:
            date = date + timedelta(days=1)
        price = fund_data[fund_data["date"] == date][col].item()

    return price, m

Unnamed: 0,date,ety_open_price,bnd_open_price
0,1950-01-03,16.660000,
1,1950-01-04,16.850000,
2,1950-01-05,16.930000,
3,1950-01-06,16.980000,
4,1950-01-09,17.080000,
...,...,...,...
19140,2025-12-18,6778.060059,3.663
19141,2025-12-19,6792.620117,3.686
19142,2025-12-22,6865.209961,3.711
19143,2025-12-23,6872.410156,3.702


### Solve with Algebra

In [5]:
# set values to test with
curr_e = 415427.764508
curr_b = 0
price_e = 1679.589966
price_b = 1.459

In [8]:
import numpy as np

def calculate_units_to_buy(curr_e, curr_b, price_e, price_b, contribution, target_bond_ratio):
    # Matrix A represents the coefficients of our unknowns (delta_qe, delta_qb)
    # Eq 1: delta_qe * Pe + delta_qb * Pb = C
    # Eq 2: (1-T)*delta_qb - T*delta_qe = T*curr_e - (1-T)*curr_b
    
    A = np.array([
        [price_e, price_b],
        [-target_bond_ratio, (1 - target_bond_ratio)]
    ])
    
    B = np.array([
        contribution,
        target_bond_ratio * curr_e - (1 - target_bond_ratio) * curr_b
    ])
    
    try:
        delta_q = np.linalg.solve(A, B)
        # If any value is negative, we can't reach the target with contribution alone
        if any(q < 0 for q in delta_q):
            return "Boundary Hit: Use Catch-up Logic"
        return delta_q # Returns [delta_qe, delta_qb]
    except np.linalg.LinAlgError:
        return None

In [10]:
pension.derisk_perc_change

0.005556

In [11]:
calculate_units_to_buy(curr_e, curr_b, price_e, price_b, 750, pension.derisk_perc_change)

'Boundary Hit: Use Catch-up Logic'