In [2]:
#install yahoofinancials
# !{sys.executable} -m pip install yahoofinancials

In [3]:
import pandas as pd
import yfinance as yf
from yahoofinancials import YahooFinancials
import datetime

In [13]:
VG1_PORTFOLIO = {"VTI": 32, "VXUS": 22, "VMLUX": 14, "VWIUX": 18, "VWLUX": 14}
VG2_PORTFOLIO = {"VTSAX": 13.65, "VWUAX": 2.05, "VWNAX": 2.05, "VEXRX": .88, "VSEQX": .88, "VWILX": 1.95, "VTRIX": 1.95, "VTIAX": 9.10, "VUSFX": 50.00, "VMLUX": 5.25, "VWIUX":  7.00, "VWLUX": 5.25}
CALC_YEARLY_RETURNS_OF = [20, 10, 5, 3, 1]

In [56]:
class Security:
    def __init__(self, ticker, analysis_start_date, freq = "M"):
        self.analysis_start_date = analysis_start_date
        self.freq = freq
        self.ticker = ticker.upper()
        self.yahoo_finance_data = self._get_yahoo_finance_data()

        self.data = self._revise_date_cols()
        self.data = self._filter_dates()
        self.data = self._clear_extra_columns()

        self.historical_data_in_interval = self.historical_data_in_interval()
        self.intervals_per_year = self.intervals_per_year()

    def _get_yahoo_finance_data(self):
        return yf.Ticker(self.ticker).history(period = "max")

    def _revise_date_cols(self):
        df = self.yahoo_finance_data
        df["Date"] = df.index
        df.reset_index(drop = True)
        return df

    def _filter_dates(self):
        return self.data[self.data["Date"] > self.analysis_start_date].sort_index()
        
    def _clear_extra_columns(self):
        df = self.data
        df = df[["Date", "Close", "Dividends"]]
        return df

    def total_returns(self):
        """
        Total returns as a % based on the interval provided by freq.  if freq = "Y" then total returns are yearly
        """
        Capital_Gains_Yield = self.capital_gains_interval_yield()
        Dividend_Yield = self.dividend_income_interval_yield()
        Total_Yield = Capital_Gains_Yield + Dividend_Yield

        return pd.DataFrame({
            "Capital_Gains_Yield" : Capital_Gains_Yield, 
            "Dividend_Yield" : Dividend_Yield,
            "Total_Yield" : Total_Yield
        })

    def capital_gains_interval_yield(self):
        capital_gains_data = self.historical_data_in_interval["Close"]
        return capital_gains_data.pct_change() * 100

    def dividend_income_interval_yield(self):
        data = self.historical_data_in_interval
        dividend_interval_yield = data["Dividends"] / data["Close"] * 100
        return dividend_interval_yield

    def intervals_per_year(self):
        data = self.historical_data_in_interval
        good_year = data.index.max().year - 1

        intervals_per_year = len(data[data.index.year == good_year])

        if intervals_per_year == 0:
            #good_year might be a fractional year
            good_year = data.index.max().year
            intervals_per_year = len(data[data.index.year == good_year])

        assert intervals_per_year <= 24, "Choose another interval greater than W (week) like M (month) or Q (quarter)"

        return intervals_per_year

    def historical_data_in_interval(self):
        """
        Calculates based on end of frequency.  note, "M" means month end
        possibilities for freq => https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases
        """
        dividends = self._total_dividends_in_interval(self.data[["Dividends", "Date"]])
        closing_prices = self._closing_prices_in_interval(self.data[["Close", "Date"]])

        merged_dividends_and_closing_price = closing_prices.merge(dividends, left_on = "Date", right_on = "Date")

        return merged_dividends_and_closing_price

    def _closing_prices_in_interval(self, closing_prices):
        """
        """
        #pd.Grouper really messes up the indexes so I have to do some work to put it all back in order
        closing_prices = closing_prices.groupby(pd.Grouper(key = "Date", freq = self.freq)).nth(-1)
        closing_prices = closing_prices.rename(columns = {"Date": "Trading_Date"})

        closing_prices = closing_prices.reset_index(drop = True)
        closing_prices = closing_prices.set_index("Trading_Date")
        closing_prices = closing_prices.rename_axis("Date")

        return closing_prices

    def _total_dividends_in_interval(self, dividends):
        """
        """
        #pd.Grouper really messes up the indexes so I have to do some work to put it all back in order
        dividends_pre_sum = dividends.groupby(pd.Grouper(key = "Date", freq = self.freq)).nth(-1)
        dividends_pre_sum = dividends_pre_sum.rename(columns = {"Date": "Trading_Date", "Dividends": "Dividends_Pre"})
    
        dividends_post_sum = dividends.groupby(pd.Grouper(key = "Date", freq = self.freq)).sum()

        dividends = dividends_pre_sum.merge(dividends_post_sum, left_on = "Date", right_on = "Date")
        dividends = dividends.drop(columns = ["Dividends_Pre"])
        dividends = dividends.reset_index(drop = True)
        dividends = dividends.set_index("Trading_Date")
        dividends = dividends.rename_axis("Date")
        return dividends

In [67]:
class Portfolio:
    def __init__(self, portfolio_weights, analysis_start_date, freq):
        self.portfolio_weights = portfolio_weights
        self.analysis_start_date = analysis_start_date
        self.freq = freq
        
        self.CALC_YEARLY_RETURNS_OF = [20, 10, 5, 3, 1]


    def returns(self):
        for ticker in self.portfolio_weights:
            ticker_returns = Security(ticker = ticker, analysis_start_date= ???, freq = freq)

        

        

    

In [69]:
start_date = datetime.datetime(2007, 6, 1)

In [68]:
vanguard_one = Portfolio(portfolio_weights = VG1_PORTFOLIO, analysis_start_date = start_date, freq= "M")
vanguard_one.returns()

VTI
VXUS
VMLUX
VWIUX
VWLUX


In [71]:
VTI = Security(ticker = "BIGZ", analysis_start_date = start_date, freq = "M")
VTI.total_returns()

Unnamed: 0_level_0,Capital_Gains_Yield,Dividend_Yield,Total_Yield
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-03-31,,0.0,
2021-04-30,7.19214,0.0,7.19214
2021-05-28,-7.904408,0.0,-7.904408
2021-06-30,1.846549,0.509163,2.355712
2021-07-30,-5.346407,0.537922,-4.808485
2021-08-31,3.665341,0.518903,4.184244
2021-09-30,-7.782232,0.562693,-7.219539
2021-10-29,2.231471,0.55041,2.781882
2021-11-30,-12.992187,0.632599,-12.359588
2021-12-31,-8.020138,0.687758,-7.33238


In [32]:
df1 = pd.DataFrame({
    "Date": [
        datetime.datetime(2020, 1, 1),
        datetime.datetime(2020, 1, 30),
        datetime.datetime(2020, 2, 1)
    ], 
    "B": [10, 20, 30], 
    "B2": [2, 3, 9]
})

df2 = pd.DataFrame({
    "Date": [
        datetime.datetime(2020, 1, 1),
        datetime.datetime(2020, 3, 30),
        datetime.datetime(2020, 2, 1)
    ], 
    "C": [3, 4, 5]
})

df1

Unnamed: 0,Date,B,B2
0,2020-01-01,10,2
1,2020-01-30,20,3
2,2020-02-01,30,9


In [33]:
df1["B"] / df1["B2"]

0    5.000000
1    6.666667
2    3.333333
dtype: float64