In [2]:
#install yahoofinancials
# !{sys.executable} -m pip install yahoofinancials

In [3]:

import pandas as pd
import yfinance as yf
from yahoofinancials import YahooFinancials
import datetime

In [11]:
class Security:
    def __init__(self, ticker, analysis_start_date, freq = "M"):
        self.analysis_start_date = analysis_start_date
        self.freq = freq
        self.ticker = ticker.upper()
        self.yahoo_finance_data = self._get_yahoo_finance_data()

        self.data = self._revise_date_cols()
        self.data = self._filter_dates()
        self.data = self._clear_extra_columns()

        self.historical_data_in_interval = self.historical_data_in_interval()
        self.intervals_per_year = self.intervals_per_year()

    def _get_yahoo_finance_data(self):
        return yf.Ticker(self.ticker).history(period = "max")

    def _revise_date_cols(self):
        df = self.yahoo_finance_data
        df["Date"] = df.index
        df.reset_index(drop = True)
        return df

    def _filter_dates(self):
        return self.data[self.data["Date"] > self.analysis_start_date].sort_index()
        
    def _clear_extra_columns(self):
        df = self.data
        df = df[["Date", "Close", "Dividends"]]
        return df

    def total_returns(self):
        capital_gains = self.capital_gains_returns()

        return capital_gains

    def capital_gains_returns(self):
        capital_gains_data = self.historical_data_in_interval
        # intervals_per_year = self.intervals_per_year

        # pct_change_close_price = {}
        return capital_gains_data["Close"].pct_change() * 100

        # avg_pct_change_across_interval = capital_gains_data["Close"].pct_change().mean()
        # pct_change_close_price["avg_pct_for_each_{}_intervals_per_year".format(intervals_per_year)] = avg_pct_change_across_interval * 100

        # pct_change_close_price["yearly_capital_gains_return"] = (((avg_pct_change_across_interval + 1) ** intervals_per_year) - 1) * 100

        return pct_change_close_price

    def dividend_income_returns(self):
        pass

    def intervals_per_year(self):
        data = self.historical_data_in_interval
        good_year = data.index.max().year - 1

        intervals_per_year = len(data[data.index.year == good_year])

        if intervals_per_year == 0:
            #good_year might be a fractional year
            good_year = data.index.max().year
            intervals_per_year = len(data[data.index.year == good_year])


        assert intervals_per_year <= 24, "Choose another interval greater than W (week) like M (month) or Q (quarter)"

        return intervals_per_year

    def historical_data_in_interval(self):
        """
        Calculates based on end of frequency.  note, "M" means month end
        possibilities for freq => https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases
        """
        dividends = self._total_dividends_in_interval(self.data[["Dividends", "Date"]])
        closing_prices = self._closing_prices_in_interval(self.data[["Close", "Date"]])

        merged_dividends_and_closing_price = closing_prices.merge(dividends, left_on = "Date", right_on = "Date")

        return merged_dividends_and_closing_price

    def _closing_prices_in_interval(self, closing_prices):
        """
        """
        #pd.Grouper really messes up the indexes so I have to do some work to put it all back in order
        closing_prices = closing_prices.groupby(pd.Grouper(key = "Date", freq = self.freq)).nth(-1)
        closing_prices = closing_prices.rename(columns = {"Date": "Trading_Date"})

        closing_prices = closing_prices.reset_index(drop = True)
        closing_prices = closing_prices.set_index("Trading_Date")
        closing_prices = closing_prices.rename_axis("Date")

        return closing_prices

    def _total_dividends_in_interval(self, dividends):
        """
        """
        #pd.Grouper really messes up the indexes so I have to do some work to put it all back in order
        dividends_pre_sum = dividends.groupby(pd.Grouper(key = "Date", freq = self.freq)).nth(-1)
        dividends_pre_sum = dividends_pre_sum.rename(columns = {"Date": "Trading_Date", "Dividends": "Dividends_Pre"})
    
        dividends_post_sum = dividends.groupby(pd.Grouper(key = "Date", freq = self.freq)).sum()

        dividends = dividends_pre_sum.merge(dividends_post_sum, left_on = "Date", right_on = "Date")
        dividends = dividends.drop(columns = ["Dividends_Pre"])
        dividends = dividends.reset_index(drop = True)
        dividends = dividends.set_index("Trading_Date")
        dividends = dividends.rename_axis("Date")
        return dividends



In [12]:
VTI = Security("VTI", calculations_as_of, "Y")
VTI.capital_gains_returns()

Date
2001-12-31          NaN
2002-12-31   -20.479572
2003-12-31    30.743036
2004-12-31    12.781561
2005-12-30     6.304048
2006-12-29    15.696619
2007-12-31     5.369882
2008-12-31   -36.986041
2009-12-31    28.904665
2010-12-31    17.428284
2011-12-30     0.968532
2012-12-31    16.451328
2013-12-31    33.449754
2014-12-31    12.549333
2015-12-31     0.355058
2016-12-30    12.817735
2017-12-29    21.213445
2018-12-31    -5.234796
2019-12-31    30.668646
2020-12-31    21.076146
2021-12-31    25.681851
Name: Close, dtype: float64

Unnamed: 0_level_0,Close,Dividends
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-01-30,99.794502,0.0
2020-02-28,92.63736,0.0
2020-03-31,82.86071,0.0
2020-04-30,97.022911,0.0
2020-05-29,105.72641,0.0
2020-06-30,112.46106,0.0
2020-07-31,122.330658,0.0
2020-08-31,136.13797,0.0
2020-09-30,130.172272,0.0
2020-10-30,125.507866,0.0


In [416]:
df1 = pd.DataFrame({
    "Date": [
        datetime.datetime(2020, 1, 1),
        datetime.datetime(2020, 1, 30),
        datetime.datetime(2020, 2, 1)
    ], 
    "B": [99, 98, 97]
})

df2 = pd.DataFrame({
    "Date": [
        datetime.datetime(2020, 1, 1),
        datetime.datetime(2020, 3, 30),
        datetime.datetime(2020, 2, 1)
    ], 
    "C": [99, 98, 97]
})

df1

Unnamed: 0,Date,B
0,2020-01-01,99
1,2020-01-30,98
2,2020-02-01,97


In [360]:
df2

Unnamed: 0,Date,C
0,2020-01-01,99
1,2020-03-30,98
2,2020-02-01,97


In [361]:
df1.merge(df2, left_on ="Date", right_on = "Date")

Unnamed: 0,Date,B,C
0,2020-01-01,99,99
1,2020-02-01,97,97


In [295]:
df.groupby(pd.Grouper(key = "A", freq = "M")).nth(-1)

Unnamed: 0_level_0,A,B
A,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-01-31,2020-01-30,98
2020-02-29,2020-02-01,97


In [291]:
vwuax.get_returns().groupby(pd.Grouper(key = "Date", freq = "M")).nth(0)

AttributeError: 'DataFrameGroupBy' object has no attribute 'groupby'

In [207]:
# In [14]: df1.groupby(pd.TimeGrouper('M')).nth(0)
vwuax.data.groupby(pd.Grouper(key = "Date", freq = "M")).sum(0)

Unnamed: 0_level_0,Close,Dividends
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-01-31,2121.465576,0.0
2020-02-29,2193.395142,0.0
2020-03-31,2095.458755,0.0
2020-04-30,2142.226875,0.0
2020-05-31,2071.22924,0.0
2020-06-30,2741.018616,0.0
2020-07-31,2843.057373,0.0
2020-08-31,3056.895737,0.0
2020-09-30,2772.905106,0.0
2020-10-31,3189.514328,0.0


In [None]:
def add_date_col(df):
    df["Date"] = df.index
    return df

def date_group (df, interval = "m", interval_start_date = "1/1/2020"):
    """
    df - the data frame that has the security data
    interval - the interval to group the data buy.  Options are month (m) and will be year (y)
    interval_start_date - date to start the intervals on.
    """

def date_filter (df, calcs_as_of):


In [None]:
date_interval()