In [50]:
import pandas as pd
import datetime

In [203]:
#inputing test data from csv exported from google Finance from google sheets

vwuax = pd.read_csv("VWUAX.csv")
#converting csv datetime to pandas datetime obj
vwuax['Date'] = pd.to_datetime(vwuax['Date'])
#set Date as the index but keep it as a column in the data
vwuax = vwuax.set_index("Date", drop=False)
vwuax

Unnamed: 0_level_0,Date,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2002-09-03,2002-09-03,31.82
2002-09-04,2002-09-04,32.57
2002-09-05,2002-09-05,31.96
2002-09-06,2002-09-06,32.74
2002-09-09,2002-09-09,33.04
...,...,...
2021-09-20,2021-09-20,191.99
2021-09-21,2021-09-21,192.97
2021-09-22,2021-09-22,195.00
2021-09-23,2021-09-23,197.19


In [204]:
# checking with data pulled right from the NASDAQ
nasdaq = pd.read_csv("NASDAQ_VWUAX.csv")

#converting csv datetime to pandas datetime obj
nasdaq['Date'] = pd.to_datetime(nasdaq['Date'])
#set Date as the index but keep it as a column in the data
nasdaq = nasdaq.set_index("Date", drop=False)

#get only relevant data
nasdaq = nasdaq[["Date", "Close/Last"]]
#rename columns so they are consistent
nasdaq = nasdaq.rename(columns={"Close/Last": "Close"})
#sort data so oldest is on top, if it is sorted differently the %change calculation change is differen
nasdaq = nasdaq.sort_index()
nasdaq["Close"]

Unnamed: 0_level_0,Date,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2011-09-26,2011-09-26,45.05
2011-09-27,2011-09-27,45.62
2011-09-28,2011-09-28,44.50
2011-09-29,2011-09-29,44.19
2011-09-30,2011-09-30,42.82
...,...,...
2021-09-20,2021-09-20,191.99
2021-09-21,2021-09-21,192.97
2021-09-22,2021-09-22,195.00
2021-09-23,2021-09-23,197.19


In [205]:
def get_returns(df, input_date, filter_on, date_col_name="Date"):
    df = _date_filter(df, input_date, filter_on, date_col_name)
    change_series = _calculate_change(df)
    df = df.assign(e=pd.Series(change_series).values)
    df = df.rename(columns={"e": "% Change"})
    return df
    
def _date_filter(df, input_datetime_obj, filter_on="y", date_col_name="Date"):
    if filter_on == "m":
        return df.loc[df[date_col_name].dt.month == input_datetime_obj.month]
    if filter_on == "y":
        df = df.loc[df[date_col_name].dt.month == input_datetime_obj.month] 
        return df.loc[df[date_col_name].dt.day == input_datetime_obj.day]

def _calculate_change(df):
    return df["Close"].pct_change()



In [208]:
#setting a date used for analysis
date = datetime.datetime(2021, 9, 1)

In [209]:
get_returns(vwuax, date, "y")

Unnamed: 0_level_0,Date,Close,% Change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2004-09-01,2004-09-01,37.37,
2005-09-01,2005-09-01,43.48,0.1635
2006-09-01,2006-09-01,44.37,0.020469
2009-09-01,2009-09-01,37.69,-0.150552
2010-09-01,2010-09-01,39.39,0.045105
2011-09-01,2011-09-01,46.39,0.17771
2015-09-01,2015-09-01,77.89,0.679026
2016-09-01,2016-09-01,78.64,0.009629
2017-09-01,2017-09-01,92.22,0.172686
2020-09-01,2020-09-01,164.34,0.782043


In [210]:
get_returns(nasdaq, date, "y")

Unnamed: 0_level_0,Date,Close,% Change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2014-09-01,2014-09-01,80.37,
2015-09-01,2015-09-01,77.89,-0.030857
2016-09-01,2016-09-01,78.64,0.009629
2017-09-01,2017-09-01,92.22,0.172686
2020-09-01,2020-09-01,164.34,0.782043
2021-09-01,2021-09-01,198.97,0.210722
