In [250]:
#install yahoofinancials
# !{sys.executable} -m pip install yahoofinancials

In [251]:
import pandas as pd
import yfinance as yf
from yahoofinancials import YahooFinancials
from datetime import datetime
from statistics import mean

In [303]:
class Security:
    def __init__(self, ticker, analysis_start_date, analysis_end_date = datetime(datetime.today().year, 1, 1), freq = "Y"):
        self.analysis_start_date = analysis_start_date
        self.analysis_end_date = analysis_end_date
        self.freq = freq
        self.ticker = ticker.upper()
        self.yahoo_finance_data = self._get_yahoo_finance_data()

        self.data = self._revise_date_cols()
        self.data = self._filter_dates()
        self.data = self._clear_extra_columns()

        self.historical_data_in_interval = self.historical_data_in_interval()
        self.intervals_per_year = self.intervals_per_year()

    def _get_yahoo_finance_data(self):
        return yf.Ticker(self.ticker).history(period = "max")

    def _revise_date_cols(self):
        df = self.yahoo_finance_data
        df["Date"] = df.index
        df.reset_index(drop = True)
        return df

    def _filter_dates(self):
        df = self.data
        # make sure data is older than analysis start date
        df = df[df["Date"] > self.analysis_start_date].sort_index()
        #make sure data is earlier than analysis end date
        df = df[df["Date"] < self.analysis_end_date].sort_index()
        return df

        
    def _clear_extra_columns(self):
        df = self.data
        df = df[["Date", "Close", "Dividends"]]
        return df

    def total_returns(self):
        """
        Total returns as a % based on the interval provided by freq.  if freq = "Y" then total returns are yearly
        """
        Capital_Gains_Yield = self.capital_gains_interval_yield()
        Dividend_Yield = self.dividend_income_interval_yield()
        Total_Yield = Capital_Gains_Yield + Dividend_Yield

        return pd.DataFrame({
            "Capital_Gains_Yield" : Capital_Gains_Yield, 
            "Dividend_Yield" : Dividend_Yield,
            "Total_Yield" : Total_Yield
        })

    def _full_interval_data_only(self, df):
        pass

    def capital_gains_interval_yield(self):
        capital_gains_data = self.historical_data_in_interval["Close"]
        return capital_gains_data.pct_change() * 100

    def dividend_income_interval_yield(self):
        data = self.historical_data_in_interval
        dividend_interval_yield = data["Dividends"] / data["Close"] * 100
        return dividend_interval_yield

    def intervals_per_year(self):
        data = self.historical_data_in_interval
        good_year = data.index.max().year - 1

        intervals_per_year = len(data[data.index.year == good_year])

        if intervals_per_year == 0:
            #good_year might be a fractional year
            good_year = data.index.max().year
            intervals_per_year = len(data[data.index.year == good_year])

        assert intervals_per_year <= 24, "Choose another interval greater than W (week) like M (month) or Q (quarter)"

        return intervals_per_year

    def historical_data_in_interval(self):
        """
        Calculates based on end of frequency.  note, "M" means month end
        possibilities for freq => https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases
        """

        dividends = self._total_dividends_in_interval(self.data[["Dividends", "Date"]])
        closing_prices = self._closing_prices_in_interval(self.data[["Close", "Date"]])

        merged_dividends_and_closing_price = closing_prices.merge(dividends, left_on = "Date", right_on = "Date")        

        return merged_dividends_and_closing_price

    def _closing_prices_in_interval(self, closing_prices):
        """
        """
        #pd.Grouper really messes up the indexes so I have to do some work to put it all back in order
        closing_prices = closing_prices.groupby(pd.Grouper(key = "Date", freq = self.freq)).nth(-1)
        closing_prices = closing_prices.rename(columns = {"Date": "Trading_Date"})

        closing_prices = closing_prices.reset_index(drop = True)
        closing_prices = closing_prices.set_index("Trading_Date")
        closing_prices = closing_prices.rename_axis("Date")

        return closing_prices

    def _total_dividends_in_interval(self, dividends):
        """
        """
        #pd.Grouper really messes up the indexes so I have to do some work to put it all back in order
        dividends_pre_sum = dividends.groupby(pd.Grouper(key = "Date", freq = self.freq)).nth(-1)
        dividends_pre_sum = dividends_pre_sum.rename(columns = {"Date": "Trading_Date", "Dividends": "Dividends_Pre"})
    
        dividends_post_sum = dividends.groupby(pd.Grouper(key = "Date", freq = self.freq)).sum()

        dividends = dividends_pre_sum.merge(dividends_post_sum, left_on = "Date", right_on = "Date")
        dividends = dividends.drop(columns = ["Dividends_Pre"])
        dividends = dividends.reset_index(drop = True)
        dividends = dividends.set_index("Trading_Date")
        dividends = dividends.rename_axis("Date")
        return dividends

In [304]:
class Portfolio:
    def __init__(self, portfolio_weights, freq):
        self.portfolio_weights = portfolio_weights
        self.freq = freq
        
        self.YEAR_INTERVALS = [20, 10, 5, 3, 1]

        self.security_returns_data = self._get_security_total_returns()

    def returns(self):

        returns_dict = {
            'Years': [],
            'Capital_Gains_Yield': [],
            'Dividend_Yield': [],
            'Total_Yield': []
        }

        df = pd.DataFrame(returns_dict)

        for year in self.YEAR_INTERVALS:
            returns = self.returns_in_interval(year)

            return returns
            # df.loc[len(df.index)] = [year, r["cg_return"], r["di_return"], r["total_return"]]

        # return df

    def _get_security_total_returns(self):
        start_date = datetime(1950, 1, 1)
        end_date = datetime(datetime.today().year, 1, 1)
        security_total_returns = {}
        for ticker in self.portfolio_weights:
            security = Security(ticker = ticker, analysis_start_date= start_date, analysis_end_date = end_date, freq = self.freq)
            security_total_returns[ticker] = security.total_returns()

        return security_total_returns

    def returns_in_interval(self, interval_year):

        df = pd.DataFrame({
            "Ticker": [], 
            "Capital_Gains_Yield": [], 
            "Dividend_Yield": [], 
            "Total_Yield": [],
            "Portfolio_Weight": []
            })
        
        for ticker in self.portfolio_weights:
            ticker_returns = self.security_returns_data[ticker]
            ticker_returns = ticker_returns[ticker_returns.index.year >= (datetime.today().year - interval_year)]

            df.loc[len(df.index)] = [
                ticker,
                ticker_returns["Capital_Gains_Yield"].mean(), 
                ticker_returns["Dividend_Yield"].mean(), 
                ticker_returns["Total_Yield"].mean(),
                self.portfolio_weights[ticker] / 100
                ]

        return df

    

In [270]:
VG1_PORTFOLIO = {'VTI': 32, 'VXUS': 22, 'VMLUX': 14, 'VWIUX': 18, 'VWLUX': 14}
VG2_PORTFOLIO = {'VTSAX': 13.65, 'VWUAX': 2.05, 'VWNAX': 2.05, 'VEXRX': 0.88, 'VSEQX': 0.88, 'VWILX': 1.95,
 'VTRIX': 1.95, 'VTIAX': 9.1, 'VUSFX': 50.0, 'VMLUX': 5.25, 'VWIUX': 7.0, 'VWLUX': 5.25}

In [307]:
vanguard_one = Portfolio(portfolio_weights = VG1_PORTFOLIO, freq= "Y")
vanguard_one.returns_in_interval(1)

Unnamed: 0,Ticker,Capital_Gains_Yield,Dividend_Yield,Total_Yield,Portfolio_Weight
0,VTI,25.681861,1.213552,26.895413,0.32
1,VXUS,8.984464,3.0958,12.080264,0.22
2,VMLUX,0.392739,1.199642,1.592381,0.14
3,VWIUX,0.905494,1.993197,2.898692,0.18
4,VWLUX,1.595538,2.415499,4.011037,0.14


In [295]:
vanguard_two = Portfolio(portfolio_weights = VG2_PORTFOLIO, freq= "Y")
vanguard_two.returns_in_interval(10)


Unnamed: 0,Ticker,Capital_Gains_Yield,Dividend_Yield,Total_Yield,Portfolio_Weight
0,VTSAX,16.896838,1.925235,18.822074,0.1365
1,VWUAX,21.340726,0.69691,22.037636,0.0205
2,VWNAX,15.030881,3.344077,18.374958,0.0205
3,VEXRX,17.253459,0.669691,17.923151,0.0088
4,VSEQX,16.092532,2.09413,18.186662,0.0088
5,VWILX,16.03954,1.782668,17.822209,0.0195
6,VTRIX,8.424663,2.72586,11.150523,0.0195
7,VTIAX,8.601925,3.312471,11.914396,0.091
8,VUSFX,1.746192,1.532989,3.433311,0.5
9,VMLUX,1.730758,1.839133,3.569891,0.0525


In [None]:
cgy = [
        (vanguard_one.returns_in_interval(10)["Portfolio_Weight"] * vanguard_one.returns_in_interval(10)["Capital_Gains_Yield"]).sum(),
        (vanguard_one.returns_in_interval(5)["Portfolio_Weight"] * vanguard_one.returns_in_interval(5)["Capital_Gains_Yield"]).sum(),
        (vanguard_one.returns_in_interval(3)["Portfolio_Weight"] * vanguard_one.returns_in_interval(3)["Capital_Gains_Yield"]).sum(),
        (vanguard_one.returns_in_interval(1)["Portfolio_Weight"] * vanguard_one.returns_in_interval(1)["Capital_Gains_Yield"]).sum()
    ]
dy = [
        (vanguard_one.returns_in_interval(10)["Portfolio_Weight"] * vanguard_one.returns_in_interval(10)["Dividend_Yield"]).sum(),
        (vanguard_one.returns_in_interval(5)["Portfolio_Weight"] * vanguard_one.returns_in_interval(5)["Dividend_Yield"]).sum(),
        (vanguard_one.returns_in_interval(3)["Portfolio_Weight"] * vanguard_one.returns_in_interval(3)["Dividend_Yield"]).sum(),
        (vanguard_one.returns_in_interval(1)["Portfolio_Weight"] * vanguard_one.returns_in_interval(1)["Dividend_Yield"]).sum()
    ]
ty =[
        (vanguard_one.returns_in_interval(10)["Portfolio_Weight"] * vanguard_one.returns_in_interval(10)["Total_Yield"]).sum(),
        (vanguard_one.returns_in_interval(5)["Portfolio_Weight"] * vanguard_one.returns_in_interval(5)["Total_Yield"]).sum(),
        (vanguard_one.returns_in_interval(3)["Portfolio_Weight"] * vanguard_one.returns_in_interval(3)["Total_Yield"]).sum(),
        (vanguard_one.returns_in_interval(1)["Portfolio_Weight"] * vanguard_one.returns_in_interval(1)["Total_Yield"]).sum()
    ]

In [340]:
def total_vg1_returns(yr_interval):
    df = pd.DataFrame({
    "Capital_Gains_Yield": [(vanguard_one.returns_in_interval(yr_interval)["Portfolio_Weight"] * vanguard_one.returns_in_interval(yr_interval)["Capital_Gains_Yield"]).sum()], 
    "Dividend_Yield": [(vanguard_one.returns_in_interval(yr_interval)["Portfolio_Weight"] * vanguard_one.returns_in_interval(yr_interval)["Dividend_Yield"]).sum()], 
    "Total_Yield": [(vanguard_one.returns_in_interval(yr_interval)["Portfolio_Weight"] * vanguard_one.returns_in_interval(yr_interval)["Total_Yield"]).sum()],
        })

    return df


df = pd.DataFrame({
    "Average_Returns_In_Interval": [],
    "Capital_Gains_Yield": [], 
    "Dividend_Yield": [], 
    "Total_Yield": [],
    })


for year in [10, 5, 3, 1]:

    r = total_vg1_returns(year)

    df.loc[len(df.index)] = [
        year,
        r["Capital_Gains_Yield"].mean(), 
        r["Dividend_Yield"].mean(), 
        r["Total_Yield"].mean(),
        ]

df

Unnamed: 0,Average_Returns_In_Interval,Capital_Gains_Yield,Dividend_Yield,Total_Yield
0,10.0,8.732507,2.765096,11.497603
1,5.0,10.056209,2.400404,12.456614
2,3.0,13.210932,2.177848,15.38878
3,1.0,10.636125,1.934308,12.570433


In [326]:
ty =[
        (vanguard_one.returns_in_interval(10)["Portfolio_Weight"] * vanguard_one.returns_in_interval(10)["Total_Yield"]).sum(),
        (vanguard_one.returns_in_interval(5)["Portfolio_Weight"] * vanguard_one.returns_in_interval(5)["Total_Yield"]).sum(),
        (vanguard_one.returns_in_interval(3)["Portfolio_Weight"] * vanguard_one.returns_in_interval(3)["Total_Yield"]).sum(),
        (vanguard_one.returns_in_interval(1)["Portfolio_Weight"] * vanguard_one.returns_in_interval(1)["Total_Yield"]).sum()
    ]

ty

[11.49760264159117, 12.456613626517823, 15.388779785999622, 12.570433314568138]

In [312]:
(vanguard_one.returns_in_interval(1)["Portfolio_Weight"] * vanguard_one.returns_in_interval(1)["Total_Yield"]).sum()

12.570433314568138

In [294]:
def total_vg2_returns(yr_interval):
    return (vanguard_two.returns_in_interval(yr_interval)["Portfolio_Weight"] * vanguard_two.returns_in_interval(yr_interval)["Total_Yield"]).sum()

total_vg2_returns(3)

10.75715372556595

In [341]:
vanguard_two.security_returns_data["VTI"]

KeyError: 'VTI'

In [344]:
VTI = Security(ticker = "VTI", analysis_start_date = start_date, freq = "Y")
VTI.total_returns().sort_index(ascending = False)

Unnamed: 0_level_0,Capital_Gains_Yield,Dividend_Yield,Total_Yield
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-12-31,25.681861,1.213552,26.895413
2020-12-31,21.076148,1.441927,22.518075
2019-12-31,30.668626,1.830915,32.49954
2018-12-31,-5.234801,2.144541,-3.09026
2017-12-29,21.213451,1.828582,23.042033
2016-12-30,12.817724,2.095399,14.913123
2015-12-31,0.355075,2.206027,2.561101
2014-12-31,12.549344,2.001792,14.551136
2013-12-31,33.449708,2.016734,35.466442
2012-12-31,16.451332,2.51437,18.965702


In [268]:
VG2_PORTFOLIO

{'VTSAX': 13.65,
 'VWUAX': 2.05,
 'VWNAX': 2.05,
 'VEXRX': 0.88,
 'VSEQX': 0.88,
 'VWILX': 1.95,
 'VTRIX': 1.95,
 'VTIAX': 9.1,
 'VUSFX': 50.0,
 'VMLUX': 5.25,
 'VWIUX': 7.0,
 'VWLUX': 5.25}

In [269]:
VG1_PORTFOLIO = {'VTI': 32, 'VXUS': 22, 'VMLUX': 14, 'VWIUX': 18, 'VWLUX': 14}
VG2_PORTFOLIO = {'VTSAX': 13.65, 'VWUAX': 2.05, 'VWNAX': 2.05, 'VEXRX': 0.88, 'VSEQX': 0.88, 'VWILX': 1.95,
 'VTRIX': 1.95, 'VTIAX': 9.1, 'VUSFX': 50.0, 'VMLUX': 5.25, 'VWIUX': 7.0, 'VWLUX': 5.25}