# Prototype valuation plotting with historical + analyst estimated fundamentals

In [1]:
import json
import requests
import numpy as np
from datetime import datetime
from yahoo_fin import stock_info

### Helper functions to pull and prep stock price data

In [2]:
def get_payload_json_quote_history_from_yfinance_df(df_quote_history, header='close'):
    """
    This helper function gets a cleaned-up payload json/dict from the dataframe of 
    historical quotes, which is returned by yfinance api (yfinance.Ticker('<symbol>').history)
    """
    
    _payload_json = {}
    _df = df_quote_history[header]
    
    # loop through all timestamps in the data:
    for timestamp in dict(_df):
        # if not np.isnan(_df[timestamp]):
        _payload_json[timestamp.to_pydatetime()] = _df[timestamp]
            
    return _payload_json


def get_quote_history(symbol, start, end, interval='1mo', header='close'):
    """
    This helper function pulls raw historical quote data using the YFinance API, 
    and returns the cleaned up data in a dictionary of "<timestamp>: <price>".
    """
    
    # initialize an yfinance object, and get the historical quote data 
    # from the API
    df_quote_history = stock_info.get_data(symbol, start_date=start, end_date=end, 
                                           interval=interval)
    
    # return a "timestamp": "price" dictionary from the dataframe of historical quotes
    return get_payload_json_quote_history_from_yfinance_df(df_quote_history, 
                                                           header=header)

### Helper functions to pull and prep stock fundamentals data

In [3]:
def get_guru_data(symbol, data_type, api_token='b4bbdecd0f955e18a90fee818670dd94:42afdf0e68bee024983a72f8b6ad071d'):
    """
    This helper function pulls data from the GuruFocus API, for the given symbol
    and data type.
    """
    
    base_url = 'https://api.gurufocus.com/public/user/' + api_token + '/stock/'
    constructed_url = base_url + symbol + '/' + data_type
    
    r = requests.get(constructed_url)
    if r.status_code != 200:
        return "Error: the GuruFocus API service failed."
    else:
        return r.json()

In [4]:
class FinancialMetric(object):
    """
    This class implements financial metrics as objects, along with 
    related operations.
    """
    
    def __init__(self, name, timestamps, start_year, values, numerify_values=True):
        """
        Constructor.
        
        Input:
            - "name": name of the metric.
            - "timestamps": a list (or list-like) of strings in the format of "%Y-%m"
            - "start_year": the earliest year in the input data to be used
            - "values": a list (or list-like) of values (could be strings) for the metric,
                        with each value corresponding to the timestamp value of the same position
                        in the input list of timestamps
            - "numerify_values": default=True; 
                                 if True attempt to convert all input values to float values
            - "drop_values": default=['TTM']; 
                             a list of strings to be removed from the input timestamps
                             list, and the associated value will also be removed from the input values 
                             list.
        """
        
        # if requested, ensure all values are converted to numeric values (float)
        if numerify_values:
            _values = [float(value) for value in values]
        else:
            _values = values

        # zip the two lists to form a dictionary
        _data = dict(zip(timestamps, _values))

        # form a new dictionary with pre-specified contraints on the "year" values
        _processed_data = {}
        for timestamp in _data:
            if timestamp == 'TTM':
                # save the TTM value 
                self.TTM_value = _data[timestamp]
                continue
            _timestamp_obj = datetime.strptime(timestamp, '%Y-%m')
            if _timestamp_obj.year >= start_year:
                _processed_data[_timestamp_obj] = _data[timestamp]
        
        # save the processed timestamps and values
        self.name = name
        self.timestamps = tuple(_processed_data.keys())
        self.values = tuple(_processed_data.values())
        
    def get_raw_data(self):
        """
        This method returns the saved timestamps & values as a dictionary of "timestamp: value" pairs.
        """
        
        return dict(zip(self.timestamps, self.values))
    
    def set_per_share_data(self, num_of_shares):
        """
        This method sets the per share values for the current metric.
        """
        
        if len(num_of_shares) != len(self.values):
            raise ValueError('Invalid input length for num_of_shares.')
        else:
            self.per_share_values = list(np.array(self.values) / np.array(num_of_shares))
                
    def get_per_share_data(self):
        """
        This method returns the saved timestamps & per share values as a dictionary of
        "<timestamp>: <per share value>"
        """
        
        return dict(zip(self.timestamps, self.per_share_values))

### Helper function to calculate "normal prices" based on average price multiples

In [5]:
def calc_avg_price_ratio(price_data, per_share_metric_data):
    """
    This function calculates and returns the average price-to-metric ratio.
    
    Input:
        - "price_data" - dictionary of "<timestamp>: <price>"
        - "per_share_metric_data" - dictionary of "<timestamp>: <per share metric value>", 
                                    with only one record per year
    """
    
    # get the ending month of fiscal years, usually either Sep or Dec
    # this assumes the ending month is always the same across all fiscal years
    # TODO - can insert code here to check the assumption and raise an exception
    #        if the assumption does not hold for the input
    _last_month_fiscal_years = list(per_share_metric_data.keys())[-1].month
    
    # create a new dictionary of <fiscal year>: <per share metric value> for easy
    # per share metric value look up by fiscal year
    _list_years = [timestamp.year for timestamp in per_share_metric_data]
    _dict_year_metric = dict(zip(_list_years, per_share_metric_data.values()))

    # form a new dict of "<timestamp>: {'quote': <quote>, 'metric': <metric>, 'ratio': <ratio>}"
    _dict_timestamp_ratio = {}
    for timestamp in price_data:
        
        # get the annual values for the current year, the prev year and the next year
        _metric_value = _dict_year_metric.get(timestamp.year)
        _metric_value_prev_year = _dict_year_metric.get(timestamp.year - 1)
        _metric_value_next_year = _dict_year_metric.get(timestamp.year + 1)
        
        _interpolation_flag = False
        
        # when the current month is ealier than or the same as the last month of the fiscal year, 
        # use metric values of the current year and the previous year for interpolation
        if _metric_value and _metric_value_prev_year and timestamp.month <= _last_month_fiscal_years:

            # interpolate the monthly TTM value
            _metric_value_interpolated = _metric_value_prev_year + \
                                        (_metric_value - _metric_value_prev_year) * \
                                        (timestamp.month + 12 - _last_month_fiscal_years) / 12
            _interpolation_flag = True
        
        # when the current month is greater than the last month of the fiscal year, 
        # use metric values of the current year and the next year for interpolation
        elif _metric_value and _metric_value_next_year and timestamp.month > _last_month_fiscal_years:
            
            # interpolate the monthly TTM value
            _metric_value_interpolated = _metric_value + \
                                        (_metric_value_next_year - _metric_value) * \
                                        (timestamp.month - _last_month_fiscal_years) / 12
            _interpolation_flag = True
            
        # Only include the month in the calculation of average valuation ratios when the metric value
        # of that month is positive - investors don't really look at P/X ratios when they are negative
        if _interpolation_flag and _metric_value_interpolated > 0:
            _dict_timestamp_ratio[timestamp] = {'quote': price_data[timestamp], 
                                                'metric': _metric_value_interpolated, 
                                                'ratio': price_data[timestamp] / _metric_value_interpolated}
            
    # get all valuation ratios to a list, to prep for the calculation of the average ratio
    _list_ratios = []
    for timestamp in _dict_timestamp_ratio:
        _list_ratios.append(_dict_timestamp_ratio[timestamp]['ratio'])

    # remove 12 highest ratios (1 year), and 12 lowest ratios
    for _ in range(12):
        _list_ratios.remove(min(_list_ratios))
        _list_ratios.remove(max(_list_ratios))
        
    return sum(_list_ratios) / len(_list_ratios)

In [6]:
def get_normal_price(per_share_metric_data, avg_price_to_x_ratio):
    """
    This function calculates and returns the "normal price" with respect to 
    a specific income/cash flow metric X:
    
        "normal price" = "historical price" x "historical average Price-to-X ratio"

    Inputs:
        - "per_share_metric_data": dictionary of "<timestamp>: <per share metric value>"
        - "avg_price_to_x_ratio": a numerical value
        
    Output:
        - "normal price": see above
    """
    
    return {timestamp: max(0, per_share_metric_data[timestamp] * avg_price_to_x_ratio) \
            for timestamp in per_share_metric_data}

### Set up for plotting

In [7]:
from bokeh.models import DatetimeTickFormatter, NumeralTickFormatter
from bokeh.plotting import figure, show
from bokeh.io import output_notebook

output_notebook()

In [8]:
def plot(quote_history_data, normal_price_data, normal_price_ratio):
    """
    This helper function
    """
    
    # create new plot
    p = figure(title = "Price Correlated with Fundamentals",
               x_axis_type = "datetime",
               x_axis_label= "Time",
               y_axis_label= "Price")

    # add a line for historical prices by month
    p.line(list(quote_history_data.keys()), 
           list(quote_history_data.values()), 
           legend_label='Price', 
           color='black',
           line_width=2)

    # add a line for "normal prices" by year
    p.line(list(normal_price_data.keys()),
           list(normal_price_data.values()),
           legend_label = 'Normal Price (Ratio {:5.2f})'.format(normal_price_ratio),
           line_width = 2)

    show(p)

### Valuation plotting with only historical fundamentals

In [9]:
# parameters - choose a company
symbol = 'AAPL'

# get financial history
financials_history = get_guru_data(symbol, 'financials')

In [10]:
# parameters - experiment

# set years interval for analysis
analysis_interval_years = 12

# financials parameters
section_name = 'income_statement'
metric_name = 'EBITDA'

In [11]:
# get the earlist fiscal year accordingly
start_year = datetime.utcnow().year - analysis_interval_years
start_date_quote_history = datetime(start_year, 1, 1)
start_year_financials_history = start_year - 1

# get quote history
quote_history = get_quote_history(symbol, start=start_date_quote_history.strftime('%Y-%m-%d'),
                                  end=datetime.utcnow().strftime('%Y-%m-%d'))

# get shares outstanding (diluted average)
num_of_shares = FinancialMetric(name='Shares Outstanding (Diluted Average)',
                                timestamps=financials_history['financials']['annuals']['Fiscal Year'],
                                start_year=start_year_financials_history,
                                values=financials_history['financials']['annuals']['income_statement']['Shares Outstanding (Diluted Average)'])

# get per share metric data
metric_X = FinancialMetric(name = metric_name, 
                           timestamps = financials_history['financials']['annuals']['Fiscal Year'],
                           start_year = start_year_financials_history,
                           values = financials_history['financials']['annuals'][section_name][metric_name])

metric_X.set_per_share_data(num_of_shares=num_of_shares.values)

# get normal prices for metric X
avg_price_ratio_X = calc_avg_price_ratio(quote_history, metric_X.get_per_share_data())
normal_price_per_X = get_normal_price(metric_X.get_per_share_data(), 
                                      avg_price_ratio_X)

# plot
plot(quote_history, normal_price_per_X, avg_price_ratio_X)

### <font color=red>TODO - figure out how to concatenate historical values and analyst estimates for revenue, earnings, EBIT and EBITDA (per share)</font>

#### Considerations:

- the code needs to be reusable for the web app
- the code needs to be extensible when adding estimated **Operating Income** and **OCF** to the scope
- the code should not make it harder to calculate metrics like "3 year revenue growth", with analyst estimated values

### Download analyst estimates

In [12]:
analyst_estimates = get_guru_data(symbol, 'analyst_estimate')

print(json.dumps(analyst_estimates, indent=4))

{
    "annual": {
        "date": [
            "202209",
            "202309",
            "202409"
        ],
        "revenue_estimate": [
            379836.3,
            398146.4,
            406222.3
        ],
        "ebit_estimate": [
            108259.4,
            110040.5,
            115792.6
        ],
        "ebitda_estimate": [
            120115.7,
            120915.7,
            126508.4
        ],
        "dividend_estimate": [
            0.9246,
            1.0328,
            1.38
        ],
        "per_share_eps_estimate": [
            5.6123,
            5.9053,
            6.314
        ],
        "eps_nri_estimate": [
            5.6607,
            6.0117,
            6.314
        ],
        "long_term_growth_rate_mean": 8.335,
        "long_term_revenue_growth_rate_mean": "3.55"
    },
    "quarter": {
        "date": [
            "202112",
            "202203",
            "202206",
            "202209",
            "202212",
            "202303",

### Get per share estimates

In [13]:
# first get the "Shares Outstanding (Diluted Average)", which according to Guru
# is usually used to calculate per share values of metrics in income statements
# and cashflow statements


# get the TTM value for "Shares Outstanding (Diluted Average)"
num_of_shares.TTM_value

16635.095

In [14]:
# define a helper function to get per share estimates from the dictionary of estimates
def get_per_share_estimates(timestamps, values, shares_outstanding=None):
    """
    This helper function returns a dictionary of "<timestamp>: <value (per share)>".
    Timestamps in the output dictionary will be Python's datetime objects.
    
    
    Inputs:
        'timestamps': timestamps in string formats of '%Y%m'.
        'values': analyst estimated values. If 'shares_outstanding' is None, these 
                  will be estimated per share values.
        'shares_outstanding': a constant number of shares to be used to compute per 
                              share values
    """
    
    data = {}
    for i in range(len(timestamps)):
        data[datetime.strptime(timestamps[i], '%Y%m')] = values[i] / shares_outstanding \
            if shares_outstanding is not None else values[i]
    
    return data


get_per_share_estimates(analyst_estimates['annual']['date'], 
                        analyst_estimates['annual']['ebitda_estimate'],
                        shares_outstanding=num_of_shares.TTM_value)

{datetime.datetime(2022, 9, 1, 0, 0): 7.220620020504842,
 datetime.datetime(2023, 9, 1, 0, 0): 7.268711119473618,
 datetime.datetime(2024, 9, 1, 0, 0): 7.604909980976964}

### <font color=blue>Code to get per share estimates tested; continue to work on other todo's</font>

### Concatenate the historical and estimated values of per share metrics

In [15]:
metric_X_combined_per_share_data = metric_X.get_per_share_data()
metric_X_estimated = get_per_share_estimates(analyst_estimates['annual']['date'], 
                                             analyst_estimates['annual']['ebitda_estimate'],
                                             shares_outstanding=num_of_shares.TTM_value)

for timestamp in metric_X_estimated:
    metric_X_combined_per_share_data[timestamp] = metric_X_estimated[timestamp]
    
metric_X_combined_per_share_data

{datetime.datetime(2008, 9, 1, 0, 0): 0.3492889043231064,
 datetime.datetime(2009, 9, 1, 0, 0): 0.4911770056394397,
 datetime.datetime(2010, 9, 1, 0, 0): 0.7497314994135625,
 datetime.datetime(2011, 9, 1, 0, 0): 1.3734049262451165,
 datetime.datetime(2012, 9, 1, 0, 0): 2.2304552954650583,
 datetime.datetime(2013, 9, 1, 0, 0): 2.1868752524290693,
 datetime.datetime(2014, 9, 1, 0, 0): 2.5239426047130147,
 datetime.datetime(2015, 9, 1, 0, 0): 3.6468148402858653,
 datetime.datetime(2016, 9, 1, 0, 0): 3.333147888262436,
 datetime.datetime(2017, 9, 1, 0, 0): 3.644968135983603,
 datetime.datetime(2018, 9, 1, 0, 0): 4.352205339533865,
 datetime.datetime(2019, 9, 1, 0, 0): 4.402104556597669,
 datetime.datetime(2020, 9, 1, 0, 0): 4.622262142623316,
 datetime.datetime(2021, 9, 1, 0, 0): 7.301309896596597,
 datetime.datetime(2022, 9, 1, 0, 0): 7.220620020504842,
 datetime.datetime(2023, 9, 1, 0, 0): 7.268711119473618,
 datetime.datetime(2024, 9, 1, 0, 0): 7.604909980976964}

### Update the valuation plot with analyst estimates included

In [16]:
# update normal prices after appending analyst estimated data
normal_price_per_X = get_normal_price(metric_X_combined_per_share_data, 
                                      avg_price_ratio_X)

plot(quote_history, normal_price_per_X, avg_price_ratio_X)