In [6]:
import pandas as pd

In [7]:
# Read price and dividends data into dfs and parse date columns as pandas datetime dtypes
spy_price_data = pd.read_csv(r'..\1_raw_data\SPY_price_history.csv', parse_dates = ['Date'])
spy_dividend_data = pd.read_csv(r'..\1_raw_data\SPY_dividends_history.csv', parse_dates = ['Pay Date'])

In [8]:
# Left join dfs on Date and Pay Date
merged_raw_data = spy_price_data.merge(spy_dividend_data, how='left', left_on=['Date'], right_on = ['Pay Date'])

In [9]:
# Drop rows in df that contain leap days of February 29. If we don't these will cause errors in the get_yield function below
merged_raw_data = merged_raw_data[~((merged_raw_data['Date'].dt.month == 2) & (merged_raw_data['Date'].dt.day ==29))]

In [10]:
# Convert 'Date' col from pandas datetime type to just a simple date using <dt.date> method
merged_raw_data['Date'] = pd.to_datetime(merged_raw_data['Date']).dt.date

In [26]:
# Write Lump Sum Investing (LSI) function "lsi_get_yield" to calculate yield of principal invested over a delta time horizon
# where the principal is invested in its entirity at the start of the delta time horizon

def lsi_get_yield(dataframe: str, date_col: str,  price_col: str, current_date: str, delta: int):
    """
    dataframe: dataframe to operate over (str)
    date_col: col name of date data (str)
    price_col: column name of price data (str)
    current_date:
    delta: investment time horizon in years (int)
    """
    current_price = dataframe.loc[dataframe[date_col] == current_date, price_col]
    current_price = current_price.reset_index()
    
    past_date = current_date - pd.Timedelta(days=delta*365)
    past_price = dataframe.loc[dataframe[date_col] == past_date, price_col]
    past_price = past_price.reset_index()

    return (current_price/past_price)[price_col].values[0]

In [27]:
# Pass lsi_get_yield function to a lambda function to the apply function to get 'Yield" column
merged_raw_data['lsi_yield'] = merged_raw_data.apply(lambda row: lsi_get_yield(dataframe=merged_raw_data, date_col='Date', current_date=row['Date'], price_col='Close', delta=5), axis=1, result_type='expand')

In [29]:
# Write Dollar Cost Averaging (DCA) function "dca_get_yield" to calculate yield of a principal invested in equal parts
# at evenly seperated intervals over the length of the delta time horizon
def dca_get_yield(dataframe: str, start_date: pd.datetime, date_col: str, price_col: str, delta: int, period_size: pd.Timedelta):
    """
    dataframe: dataframe to operate over (str)
    end_date: (str)
    price_col: (str)
    delta: investment delta in years (int)
    period_size: length of time in days between principal investments
    """
    investment_times = pd.date_range(start_date, start_date + (pd.Timedelta(days=delta*365)), period_size) # pd.date_range is like np.linspace func but for dates
    
    test_df = [dataframe.apply(lambda row: lsi_get_yield(dataframe=dataframe, date_col=date_col, current_date=row[x], price_col=price_col, delta=delta), axis=1, result_type='expand') for x in investment_times]    
    
    
    return test_df

  def dca_get_yield(dataframe: str, start_date: pd.datetime, date_col: str, price_col: str, delta: int, period_size: pd.Timedelta):


In [None]:
dca_get_yield(merged_raw_data, 'Date', )

In [16]:
# on to a lambda function to the apply function to get 'Yield" column
merged_raw_data['dca_yield'] = merged_raw_data.apply(lambda row: dca_get_yield(merged_raw_data, row['Date'], 'Close', 5, 14), axis=1, result_type='expand')

ValueError: Wrong number of items passed 14, placement implies 1