In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import date
import yfinance as yf

from yahoo_fin import stock_info as si

from pandas_datareader import data

In [2]:
def cumm_prod(price_df, start=None, end=None):
    """
    Calculating the cummulative product of the percentage change to normalize the return and be
    able to compare the stocks. 

    """
    if start == None and end == None:
        return_series = (price_df.pct_change()+1).cumprod() - 1
        return_series.dropna(inplace=True)
    else:
        if end == None:
            end = date.today()
        return_series = (price_df.loc[start:end].pct_change()+1).cumprod() - 1
        return_series.dropna(inplace=True)
        
    
    return return_series

def annualized_return(adjClose_return_df, n_years=None):
    """
    n_years needs to represent the period of the adjClose_return_df.
        If it's 1 year worth of cummulative return, you input 1 for the 1y annualized returns.
        If it's 3 years worth of cummulative return, you input 3 for the long period annualized returns.

        (1+total return)pow(1/N) - 1) where N = number of years
    """
    if n_years == None or n_years == 0:
        print("Input How many years does your data covers!")
    else:
        annualized_return = ((1 + adjClose_return_df.tail(1))**(1/n_years)-1) * 100
        return annualized_return

In [3]:
def market_cap_cat(market_cap):
    try:
        if market_cap > 10000000000:
            market_cap_cat = 'Large'
        elif 2000000000 < market_cap < 1000000000:
            market_cap_cat = 'Medium'
        else:
            market_cap_cat = 'Small'
        return market_cap_cat
    except:
        return None

def earnings_expectations(FPE, TPE):
    try:
        if FPE == None or TPE == None:
            return None
        else:
            if FPE > TPE:
                expect = 'Decrease'
            else:
                expect = 'Increase'
                
            return expect
    except:
        return None

def CAPM_analysis(beta, annual_return, market_return, risk_free_rate = 0.0192):
    try:
        if beta == None or annual_return == None or market_return == None:
            return None
        else:
            capm_status = 'TBD'
            
            capm_ER = risk_free_rate + beta * (market_return - risk_free_rate)

            if annual_return > capm_ER:
                capm_status = 'OverValued'
            else:
                capm_status = 'UnderValued'
        
        return capm_status, capm_ER
    except:
        return None
        
def intrinsic_valuation(ticker, eps_ttm, growth_estimate_5y, PE_trailing, target_return, safety_margin=0.5):

    try:
        if ticker == None or eps_ttm == None or growth_estimate_5y == None or PE_trailing == None or target_return == None:
            return None
        else:
            irv_status = 'TBD'

            forecast_eps_at_year_10 = eps_ttm * ( 1 + growth_estimate_5y) ** 9
            forecast_value_at_year_10 = forecast_eps_at_year_10 * PE_trailing

            pv_future_value = forecast_value_at_year_10/(1 + target_return)**9

            pv_forecast_value_safe = pv_future_value * safety_margin

            if float(si.get_live_price(ticker)) > pv_forecast_value_safe:
                irv_status = 'Above Fair Price'
            else:
                irv_status = 'Below Fair Price'

            return irv_status, pv_forecast_value_safe
    except:
        return None

def get_ESG(ticker):
    ticket = yf.Ticker(ticker)
    try:
        esg_data = pd.DataFrame(ticket.sustainability)['Value']['totalEsg']
        return esg_data
    except:
        pass

def esg_status(esg_Rscore):
    if esg_Rscore == None:
        return None
    else:
        if float(esg_Rscore) < 20:
            return 'Sustainable'
        elif 21 < float(esg_Rscore) < 30:
            return 'Average'
        else:
            return 'Not Sustainable'

In [4]:
SPY_adj_close = si.get_data('SPY', start_date= str(date.today().year - 10) +'-'+ str(date.today().month) +'-'+ str(date.today().day), end_date=str(date.today()))['adjclose']
cummulative_prod_SPY = cumm_prod(SPY_adj_close)
annu_return_10y_SPY = annualized_return(cummulative_prod_SPY, 10)[0]

In [6]:
panel_data = pd.DataFrame(columns=['ticker', 'name', 'sector', 'Earnings_expectation', 'irv_status', 'capm_status', 'esg_status', 'current_price', 'annu_return_10y', 'market_cap', 'marketcap_cat', 'beta', 'eps_ttm', 'growth_estimate_5y', 'pe_forward', 'pe_trailing', 'irv_FairValue', 'socialScore', 'governanceScore', 'environmentScore', 'ESG_risk', 'percentile', 'capm_expected_return'])

In [7]:
def get_data_v2(ticker):

    info = {}

    keys = ['shortName', 'forwardPE', 'trailingPE', 'marketCap', 'epsTrailingTwelveMonths', 'regularMarketPrice']
    data = si.get_quote_data(ticker)
    values = list(map(data.get, keys))

    beta = float(si.get_quote_table(ticker)['Beta (5Y Monthly)'])
    growth_est = float(si.get_analysts_info(ticker)['Growth Estimates'][ticker][4][:-1]) / 100
    sector = si.get_company_info(ticker)['Value']['sector']

    # Annualized return calculation (10 years)
    adj_close = si.get_data(ticker, start_date= str(date.today().year - 10) +'-'+ str(date.today().month) +'-'+ str(date.today().day), end_date=str(date.today()))['adjclose']
    cummulative_prod = cumm_prod(adj_close)
    annu_return_10y = annualized_return(cummulative_prod, 10)[0]/100

 
    esg = get_ESG(ticker)
    


    info = {
            'ticker': ticker,
            'name': values[0],
            'pe_forward': values[1],
            'pe_trailing': values[2],
            'market_cap': values[3],
            'eps_ttm': values[4],
            'current_price': values[5],
            'beta': beta,
            'sector': sector,
            'growth_estimate_5y': growth_est,
            'annu_return_10y': annu_return_10y,
            'ESG_risk': esg
        }


    

    return info

In [8]:
            # 'marketcap_cat': market_cap_cat(values[3]),
            # 'esg_status': esg_status(esg['totalEsg']),
            # 'Earnings_expectation': earnings_expectations(values[1], values[2]),
            # 'irv_FairValue': intrinsic_valuation(ticker, values[4],  growth_est, values[2], 0.15)[1],
            # 'irv_status': intrinsic_valuation(ticker, values[4],  growth_est, values[2], 0.15)[0],
            # 'capm_status': CAPM_analysis(beta, annu_return_10y, annu_return_10y_SPY)[0],
            # 'capm_expected_return': CAPM_analysis(beta, annu_return_10y, annu_return_10y_SPY)[1]

In [9]:
panel_data = pd.DataFrame(columns=['ticker', 'name', 'sector', 'Earnings_expectation', 'irv_status', 'capm_status', 'esg_status', 'current_price', 'annu_return_10y', 'market_cap', 'marketcap_cat', 'beta', 'eps_ttm', 'growth_estimate_5y', 'pe_forward', 'pe_trailing', 'irv_FairValue', 'ESG_risk', 'capm_expected_return'])

In [10]:
sp500_list = si.tickers_sp500()
nasdaq_list = si.tickers_nasdaq()
ftse_list = si.tickers_ftse250()
dow_list = si.tickers_dow()

In [11]:
list_dup = sp500_list + nasdaq_list + ftse_list + dow_list
len(list_dup)

6362

In [12]:

ticker_list = []
[ticker_list.append(x) for x in list_dup if x not in ticker_list]
len(ticker_list)


6161

In [13]:
ticker_list.index('NTNX')

3996

In [14]:
check_point = ticker_list[3996:]

In [15]:
count = 0
chunk = 0
for ticker in check_point:
        try:
                info = get_data_v2(ticker)
                panel_data = panel_data.append(info,ignore_index=True)
        except:
                pass
        count += 1

        if count == 250:
                print(count)

        if count == 500:

                chunk += 1
                count = 0
                print('Last ticker processed: ' + str(ticker))
                print(str(chunk)+ '/' + str(int(len(check_point)/500)+1))
                panel_data.to_csv(f'stock_date_chunk_postCheck_{chunk}.csv', index=False)




       

250
Last ticker processed: PTICU
1/5
250
Last ticker processed: SLNG
2/5
250
Last ticker processed: UFPT
3/5
250
Last ticker processed: EDIN
4/5


In [16]:
panel_data.to_csv('stock_data_Last.csv', index=False)