In [None]:
#install yahoofinancials
# !{sys.executable} -m pip install yahoofinancials

In [None]:

import pandas as pd
import yfinance as yf
from yahoofinancials import YahooFinancials
import datetime

In [253]:
# yf.Ticker("vwuAX").history(period = "max")["Date"]

In [491]:
VG1_PORTFOLIO = {"VTI": 32, "VXUS": 22, "VMLUX": 14, "VWIUX": 18, "VWLUX": 14}
VG2_PORTFOLIO = {"VTSAX": 13.65, "VWUAX": 2.05, "VWNAX": 2.05, "VEXRX": .88, "VSEQX": .88, "VWILX": 1.95, "VTRIX": 1.95, "VTIAX": 9.10, "VUSFX": 50.00, "VMLUX": 5.25, "VWIUX":  7.00, "VWLUX": 5.25}
CALC_YEARLY_RETURNS_OF = [20, 10, 5, 3, 1]
calculations_as_of = datetime.datetime(2020, 1, 1)


In [546]:
class Security:
    def __init__(self, ticker, calculations_as_of):
        self.date_filter = calculations_as_of
        self.ticker = ticker.upper()
        self.yahoo_finance_data = self._get_yahoo_finance_data()

        self.data = self._revise_date_cols()
        self.data = self._filter_dates()
        self.data = self._clear_extra_columns()

    def _get_yahoo_finance_data(self):
        return yf.Ticker(self.ticker).history(period = "max")

    def _revise_date_cols(self):
        df = self.yahoo_finance_data
        df["Date"] = df.index
        df.reset_index(drop = True)
        return df

    def _filter_dates(self):
        return self.data[self.data["Date"] > self.date_filter].sort_index()
        
    def _clear_extra_columns(self):
        df = self.data
        df = df[["Date", "Close", "Dividends"]]
        return df

    def get_returns(self, freq = "M"):
        """
        Calculates based on end of frequency.  note, "M" means month end
        possibilities for freq => https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases
        """
        dividends = self._total_dividends_in_interval(self.data[["Dividends", "Date"]], freq)
        closing_prices = self._closing_prices_in_interval(self.data[["Close", "Date"]], freq)

        merged_dividends_and_closing_price = closing_prices.merge(dividends, left_on = "Date", right_on = "Date")

        return merged_dividends_and_closing_price
    
    # def _merging_dividends_and_closing_prices(self, closing_prices, dividends):
    #     df = closing_prices.merge(dividends, left_on = "Date", right_on = "Date")
    #     # df = df.reset_index(drop=True)
    #     # df = df.set_index("Trading_Date")
    #     return df


    def _closing_prices_in_interval(self, closing_prices, freq):
        """
        """
        closing_prices = closing_prices.groupby(pd.Grouper(key = "Date", freq = "M")).nth(-1)
        closing_prices = closing_prices.rename(columns = {"Date": "Trading_Date"})

        closing_prices = closing_prices.reset_index(drop = True)
        closing_prices = closing_prices.set_index("Trading_Date")
        closing_prices = closing_prices.rename_axis("Date")

        return closing_prices

    def _total_dividends_in_interval(self, dividends, freq):
        """
        """
        dividends_pre_sum = dividends.groupby(pd.Grouper(key = "Date", freq = "M")).nth(-1)
        dividends_pre_sum = dividends_pre_sum.rename(columns = {"Date": "Trading_Date", "Dividends": "Dividends_Pre"})
    
        dividends_post_sum = dividends.groupby(pd.Grouper(key = "Date", freq = "M")).sum()

        dividends = dividends_pre_sum.merge(dividends_post_sum, left_on = "Date", right_on = "Date")
        dividends = dividends.drop(columns = ["Dividends_Pre"])
        dividends = dividends.reset_index(drop = True)
        dividends = dividends.set_index("Trading_Date")
        dividends = dividends.rename_axis("Date")
        return dividends



In [547]:
vwuax = Security("vwuax", calculations_as_of)
vwuax.get_returns(freq = "M")

Unnamed: 0_level_0,Dividends
Date,Unnamed: 1_level_1
2020-01-30,0.0
2020-02-28,0.0
2020-03-31,0.0
2020-04-30,0.0
2020-05-29,0.0
2020-06-30,0.0
2020-07-31,0.0
2020-08-31,0.0
2020-09-30,0.0
2020-10-30,0.0


In [519]:
vwuax.get_returns(freq = "M")

Unnamed: 0_level_0,Dividends_Pre,Trading_Date,Dividends
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-03-31,0.0,2020-03-31,0.0
2020-04-30,0.0,2020-04-30,0.0
2020-06-30,0.0,2020-06-30,0.0
2020-07-31,0.0,2020-07-31,0.0
2020-08-31,0.0,2020-08-31,0.0
2020-09-30,0.0,2020-09-30,0.0
2020-11-30,0.0,2020-11-30,0.0
2020-12-31,0.0,2020-12-31,0.224
2021-03-31,0.0,2021-03-31,0.0
2021-04-30,0.0,2021-04-30,0.0


In [416]:
df1 = pd.DataFrame({
    "Date": [
        datetime.datetime(2020, 1, 1),
        datetime.datetime(2020, 1, 30),
        datetime.datetime(2020, 2, 1)
    ], 
    "B": [99, 98, 97]
})

df2 = pd.DataFrame({
    "Date": [
        datetime.datetime(2020, 1, 1),
        datetime.datetime(2020, 3, 30),
        datetime.datetime(2020, 2, 1)
    ], 
    "C": [99, 98, 97]
})

df1

Unnamed: 0,Date,B
0,2020-01-01,99
1,2020-01-30,98
2,2020-02-01,97


In [360]:
df2

Unnamed: 0,Date,C
0,2020-01-01,99
1,2020-03-30,98
2,2020-02-01,97


In [361]:
df1.merge(df2, left_on ="Date", right_on = "Date")

Unnamed: 0,Date,B,C
0,2020-01-01,99,99
1,2020-02-01,97,97


In [295]:
df.groupby(pd.Grouper(key = "A", freq = "M")).nth(-1)

Unnamed: 0_level_0,A,B
A,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-01-31,2020-01-30,98
2020-02-29,2020-02-01,97


In [291]:
vwuax.get_returns().groupby(pd.Grouper(key = "Date", freq = "M")).nth(0)

AttributeError: 'DataFrameGroupBy' object has no attribute 'groupby'

In [207]:
# In [14]: df1.groupby(pd.TimeGrouper('M')).nth(0)
vwuax.data.groupby(pd.Grouper(key = "Date", freq = "M")).sum(0)

Unnamed: 0_level_0,Close,Dividends
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-01-31,2121.465576,0.0
2020-02-29,2193.395142,0.0
2020-03-31,2095.458755,0.0
2020-04-30,2142.226875,0.0
2020-05-31,2071.22924,0.0
2020-06-30,2741.018616,0.0
2020-07-31,2843.057373,0.0
2020-08-31,3056.895737,0.0
2020-09-30,2772.905106,0.0
2020-10-31,3189.514328,0.0


In [None]:
def add_date_col(df):
    df["Date"] = df.index
    return df

def date_group (df, interval = "m", interval_start_date = "1/1/2020"):
    """
    df - the data frame that has the security data
    interval - the interval to group the data buy.  Options are month (m) and will be year (y)
    interval_start_date - date to start the intervals on.
    """

def date_filter (df, calcs_as_of):


In [None]:
date_interval()