**Imports**

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime
import pickle
import missingno as msno
from scipy import stats
from statistics import mean
from collections import Counter, OrderedDict
import import_ipynb
import UTILS as utils

import FundamentalAnalysis as fa

importing Jupyter notebook from UTILS.ipynb


In [2]:
pd.set_option("display.max_rows", None, "display.max_columns", None)

In [3]:
api_key = "Here would go my personal API key"

------------

**Load data**

Files with data from year with displacement -X

In [None]:
file1 = 'DATA_ttm/S_P500_data_ttm_Xyeardisp/S_P500_dataframe_with_ratios_ttm_Xyeardisp.csv'
file2 = 'DATA_ttm/S_P500_data_ttm_Xyeardisp/S_P500_companies_financials_data_ttm_Xyeardisp.pickle'

In [None]:
data_loaded = utils.data_loading(file1, file2)

In [None]:
df = data_loaded[0]
dict_companies = data_loaded[1]

In [92]:
df.head(2)

Unnamed: 0_level_0,_PAT_margin,_EBITDA_margin,_return_on_equity,_return_on_assets,_return_on_capital_employed,_gross_margin,_operating_margin,_operating_cash_flow_ratio,_return_on_invested_capital,_return_on_sales,_fixed_asset_turnover,_working_capital_turnover,_total_asset_turnover,_inventory_turnover,_days_of_inventory_on_hand,_accounts_receivables_turnover,_payables_turnover,_debt_to_assets,_interest_coverage,_asset_to_equity,_debt_to_equity,_equity_multiplier,_total_debt_to_capitalization,_total_debt_to_capital,_net_debt_to_EBITDA,_degree_financial_leverage,_earnings_per_share,_book_value_per_share,_cash_earnings_per_share,_price_to_earnings,_price_to_book_value,_price_to_sales,_price_to_free_cash_flow,_price_earnings_to_growth,_dividend_yield,_dividend_payout,_enterprise_value_to_ebitda,_enterprise_value_to_sales,_current_ratio,_quick_ratio,_cash_ratio,_days_of_sales_outstanding,_days_of_sales_in_inventory,_days_of_payables_outstanding,_operating_cycle,_cash_conversion_cycle
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1
MMM,0.167402,0.271661,0.102917,0.03103,0.047709,0.468315,0.208277,2.300579,0.047709,0.208277,0.866823,5.229256,0.185262,0.978114,92.077553,1.813772,1.642398,0.696076,15.093071,3.294063,1.176688,3.309888,0.566805,0.566805,5.917859,2.226105,2.555,24.92273,3.219128,18.235344,7.483685,12.189879,73.837823,-2.09291,0.007957,0.580389,50.806934,3.449114,1.74394,1.11554,0.515781,49.624411,201.254556,54.859324,141.701964,86.84264
AOS,0.13716,0.199539,0.065875,0.037287,0.066318,0.370851,0.171587,21.825725,0.066318,0.171587,1.483506,5.07351,0.27125,1.547255,58.337771,1.472644,0.873009,0.431103,143.894444,1.761376,0.082905,1.761376,0.079406,0.079406,-1.930628,1.03313,0.757969,11.493758,1.000958,23.92637,6.235913,13.052287,98.040415,1.01074,0.003732,0.355069,63.643361,3.159238,1.736831,1.302403,0.518312,61.207901,248.232043,103.11431,119.545672,16.431362


----------------

**Add sectors and industries information to the dataset**

In [93]:
sectors_df, industries_df = utils.get_company_sectors_and_industries(df, dict_companies)

In [94]:
if set(['sector','industry']).issubset(df.columns) == False:
    df.insert(0, "sector", sectors_df.sector)
    df.insert(1, "industry", industries_df.industry)

-------------------

**Compute intrinsic value**

Intrinsic Value = Earnings Per Share (EPS) * (1 + r) * P/E Ratio

In [11]:
def compute_expected_earnings_growth_rate(ticker, disp):
    
    #Get price/earnings to growth  (we have to add a dropna because some stocks like HWM could contain None at some year)
    peg = fa.financial_ratios(ticker, api_key, period="annual").iloc[: , disp-1:].loc['priceEarningsToGrowthRatio'].dropna()
    
    #Remove outliers, since in some recesion years like 2019, the growth has fluctuated a lot and afects much to the mean
    #We'll use z-score with a threshold of 1 (being conservative)
    
    z = np.abs(stats.zscore(pd.Series(peg.values.astype(float))))
    threshold = 1
    clear_peg_avg = np.delete(peg.values, np.where(z > threshold)[0].tolist()).mean()
    
    return clear_peg_avg

In [12]:
def compute_price_to_earnings_intrinsic_value(df, dict_companies, ticker, year_disp=0):

    earnings_growth_rate = compute_expected_earnings_growth_rate(ticker, year_disp)
    
    #We now need the trailing (12 months) eps
    
    income_statement = dict_companies[ticker]['_Company__income_statement']
    #Here we add skipna=False because if the company has at least one quarter of eps without value,
    #then we don't want the sum to give a valid result
    eps_trailing = income_statement.loc['eps'][4*year_disp:4+(4*year_disp)].sum(skipna=False)
    
    current_pe = df['_price_to_earnings'][ticker]
    
    intrinsic_value = eps_trailing * (1+earnings_growth_rate) * current_pe
    
    try:
        date = income_statement.columns[4*year_disp]
    except:
        return "Error. No data for this ticker"
    
    return intrinsic_value, date

In [13]:
def stock_price_intrinsic_value_comparator(dict_companies, ticker, intrinsic_value, date):
    
    stock_data = fa.stock_data(ticker, interval="1d")
    stock_data.index = pd.to_datetime(stock_data.index)
    
    format_date = pd.to_datetime(date+'-01')
    stock_price = stock_data.iloc[stock_data.index.get_loc(format_date, method='nearest')].close
        
    if stock_price == 0:
        pr_intrinsic_price = 0
    else:
        pr_intrinsic_price = intrinsic_value/stock_price
    
    return pr_intrinsic_price

In [None]:
peint_by_sector = dict()
sectors = utils.get_sectors_and_industries(df, dict_companies)[0]
for sector in sectors:
    sector_df = utils.get_sector_companies(df, dict_companies, sector)
    peint_by_sector[sector] = dict()
    for ticker in sector_df.index:
        intrinsic = compute_price_to_earnings_intrinsic_value(df, dict_companies, ticker, year_disp=(X years of disp))
        if intrinsic == "Error. No data for this ticker":
            peint_by_sector[sector][ticker] = [np.nan]
            peint_by_sector[sector][ticker].append(np.nan)
        else:
            peint_by_sector[sector][ticker] = [intrinsic[0]]
            peint_by_sector[sector][ticker].append(stock_price_intrinsic_value_comparator(dict_companies, ticker, intrinsic[0], intrinsic[1]))

In [None]:
peint_by_sector.keys()

In [101]:
with open(f'models_outputs/EPS_output/X_year_disp/peint_by_sector_Xyear.pickle', 'wb') as file:
    pickle.dump(peint_by_sector, file)

**Retrieve the saved dictionary**

In [None]:
peint_by_sector_dict = dict()
with open('models_outputs/EPS_output/X_year_disp/peint_by_sector_Xyear.pickle','rb') as file:
    raw_data = file.read()
    peint_by_sector_dict.update(pickle.loads(raw_data))

In [None]:
peint_by_sector = dict()
for sector in peint_by_sector_dict.keys():
    peint_by_sector[sector] = pd.DataFrame.from_dict(peint_by_sector_dict[sector], orient='index', columns=['intrinsic_value', 'intrval_vs_currentprice'])

In [None]:
peint_by_sector['Industrials']