In [19]:
import pandas as pd

In [20]:
# Read price and dividends data into dfs and parse date columns as pandas datetime dtypes
spy_price_data = pd.read_csv(r'..\0_Raw_Data\SPY_price_history.csv', parse_dates = ['Date'])
spy_dividend_data = pd.read_csv(r'..\0_Raw_Data\SPY_dividends_history.csv', parse_dates = ['Pay Date'])

In [21]:
# Left join dfs on Date and Pay Date
merged_raw_data = spy_price_data.merge(spy_dividend_data, how='left', left_on=['Date'], right_on = ['Pay Date'])

In [22]:
# Drop rows in df that contain leap days of February 29. If we don't these will cause errors in the get_yield function below
merged_raw_data = merged_raw_data[~((merged_raw_data['Date'].dt.month == 2) & (merged_raw_data['Date'].dt.day ==29))]

In [23]:
# Convert 'Date' col from pandas datetime type to just a simple date using <dt.date> method
merged_raw_data['Date'] = pd.to_datetime(merged_raw_data['Date']).dt.date

In [24]:
# Write Lump Sum Investing (LSI) function -lsi_get_yield- to calculate yield of principal invested over a delta time horizon
# where the principal is invested in its entirity at the start of the delta time horizon

def lsi_get_yield(current_date, dataframe, price_col, delta):
    """
    current_date: column name of date data (str)
    dataframe: dataframe name (df)
    price_col: column name of price data (str)
    delta: investment delta in years (int)
    """
    current_price = dataframe.loc[dataframe['Date'] == current_date, price_col]
    current_price = current_price.reset_index()
    
    past_date = current_date - pd.Timedelta(days=delta*365)
    past_price = dataframe.loc[dataframe["Date"] == past_date, price_col]
    past_price = past_price.reset_index()

    return (current_price/past_price)[price_col].values[0]

In [25]:
# Pass the f
merged_raw_data['Yield'] = merged_raw_data.apply(lambda row: lsi_get_yield(row['Date'], dataframe=merged_raw_data, price_col='Close', delta=5), axis=1, result_type='expand')

In [26]:
# Write Dollar Cost Averaging (DCA) function to calculate yield of a principal invested over a delta time horizon
# where the principal is invested piecemeal at evenly seperated intervals over the length of the full delta time horizon

# Currently were using native type hints but I'd like to migrate to 
def dca_get_yield(start_date: pd.datetime, end_date: pd.datetime, period_size: pd.Timedelta, inv_per_period: float):
    assert end_date - start_date > period_size, 'Please reconsider your life choices, programmatically this behavior is moronical'
    
    investment_times = pd.date_range(start_date, end_date, period_size) # pd.date_range is like linspace for dates
    yield_percs = [get_yield(end_date, dataframe, column, price_col, delta) for delta in investment_times]
    total = sum([inv_per_period * perc for perc in yield_percs])
    return total

  def dca_get_yield(start_date: pd.datetime, end_date: pd.datetime, period_size: pd.Timedelta, inv_per_period: float):
