In [126]:
from yahooquery import Ticker
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
import warnings

In [129]:
warnings.filterwarnings("ignore", message="'S' is deprecated and will be removed in a future version. Please use 's' instead of 'S'.", category=FutureWarning)
warnings.filterwarnings(
    "ignore", 
    message="A value is trying to be set on a copy of a DataFrame or Series through chained assignment",
    category=FutureWarning
)

We will be using these different portfolios as training data for our models. Depending on the ```ticker``` and ```client``` type, we will select a portfolio accordingly to train our model. For example, if AAPL is passed as the ticker, we will select the Technology sector and use that list to train our model. However, if ```client``` is specified, then we want to see how the given ticker performs in comparison to other stocks in that category. As a result, when client is passed we instead train on the respective portfolio: ```esg```, ```income``` or ```growth```. 

We will implement a Random Forest classifier to make a decision on our investment. The reason behind this choice is that we have three different outputs that we can choose from: ```buy```, ```sell```, or ```hold```. As a result, this becomes a classification problem, and we can employ the random forest to solve this problem. Based on my feature engineering and the respective interests of the investors in this task, I have carefully chosen features that can soundly predict the potential price movement. The random forest will use these features and a target variable to analyse the complex relationships that have led to price movements in the past. 

Our features are:
- Total ESG
- Social Score
- Governance Score
- ESG percentile
- Beta
- Market Cap
- Average 5 year Dividend Yield
- Forward Price to Earnings Ratio
- Price to Book Ratio

Our target variable is ```buy```, ```sell```, or ```hold```. We devise this target based on the performance of a particular stock in the last 10 years compared to the S&P 500. These are the conditions: 
- If a stock outperformed the S&P 500 by more than 15% in the last 10 years, then we make the target ```buy```. 
- If a stock underperformed the S&P 500 by more than 10% in the last 10 years, then it is a ```sell```. 
- Otherwise, it is a ```hold```. 

We then devise our explanation based on the particular features that stand out in the dataset of features. We also scrap some stocks immediately when a client is given. These are the cases where we scrap a stock entirely before running our model. 
- For income: if the stock's dividend is less than the mean dividend, then it is a ```sell```.
- For ESG: if the stock's ESG is below the 50th percentile for ESG score then it is a ```sell```. 

This helps us tailor the portfolio for a respective client.  

The sectors in the S&P 500 (and, therefore, that cover the whole market) are:

- Energy
- Materials
- Industrials
- Consumer Cyclical
- Consumer Defensive
- Health Care
- Financials
- Technology
- Communication Services
- Utilities
- Real Estate

We will use these sectors as training data for the models. Once we pass in a ticker to our invest function, we will find which sector it falls under and use stocks in that sector to train the model. 

The list of stocks below show the stocks that we will use to train our models for each of the aforementioned sectors, as well as the stocks we will use to train the specific client-focused portfolios. 

In [161]:
communication_services = ['AMX', 'MTCH', 'ORAN', 'CHTR', 'KT', 'TIMB', 'NTES', 'TTWO', 'SIRI', 'DASH', 
                          'EDR', 'RELX', 'OMC', 'SKM', 'PARA', 'BIDU', 'BCE', 'ROKU', 'FWONA', 'NFLX', 
                          'CHT', 'DIS', 'IQ', 'Z', 'TU', 'PSO', 'CMCSA', 'T', 'TMUS', 'VZ', 'VOD', 'IPG', 
                          'RBLX', 'WBD', 'EA']

consumer_cyclical = ['QSR', 'ROST', 'LI', 'SBUX', 'ULTA', 'JD', 'MBLY', 'HD', 'BKNG', 'PDD', 'LULU', 'ROL', 
                     'ORLY', 'EBAY', 'STLA', 'MELI', 'FLUT', 'YUM', 'HLT', 'CPRT', 'ABNB', 'NKE', 'CMG', 'RACE',
                     'MCD', 'DHI', 'BABA', 'PHM', 'F', 'HMC', 'LOW', 'AZO', 'NVR', 'AMZN', 'TJX', 'DECK', 'TSLA', 
                     'LVS', 'TM']

consumer_defensive = ['COST', 'SYY', 'ABEV', 'KOF', 'BTI', 'PM', 'BUD', 'MDLZ', 'CELH', 'MO', 'WMT', 'KVUE', 
                      'TSN', 'HSY', 'KMB', 'CAG', 'K', 'EDU', 'DLTR', 'BG', 'COTY', 'STZ', 'KDP', 'ACI', 'TGT', 
                      'KR', 'GIS', 'DEO', 'FMX', 'UL', 'CL', 'CCEP', 'USFD', 'KO', 'KHC', 'PEP', 'PG']

energy = ['TTE', 'EOG', 'E', 'RRC', 'CVX', 'ENB', 'PSX', 'NE', 'CSAN', 'TS', 'SHEL', 'COP', 'TRGP', 'SLB', 
          'YPF', 'SWN', 'DINO', 'KMI', 'OKE', 'MRO', 'VLO', 'PR', 'ET', 'OVV', 'XOM', 'CNQ', 'BP', 'AR', 
          'EPD', 'CTRA', 'AM', 'SU', 'EC', 'PBA', 'CIVI', 'MUR', 'WES', 'PXD', 'EQNR', 'DVN']

financials = ['NU', 'MFC', 'JPM', 'LYG', 'RY', 'MET', 'MCO', 'BK', 'AIG', 'AFL', 'CM', 'PNC', 'ALL', 'AXP',
              'KKR', 'MA', 'COF', 'BBVA', 'ARES', 'BNS', 'PGR', 'CB', 'AMP', 'BSBR', 'HSBC', 'V', 'APO', 'BAC', 
              'BLK', 'GS', 'MS', 'C', 'SPGI', 'IBN', 'SCHW', 'BX', 'UBS', 'WFC', 'TRV']

healthcare = ['BMY', 'CNC', 'A', 'GEHC', 'VRTX', 'CVS', 'ZTS', 'GSK', 'MDT', 'BSX', 'PFE', 'TAK', 'SYK', 'IDXX', 
              'NVO', 'ISRG', 'GILD', 'HCA', 'ICLR', 'MRK', 'REGN', 'BIIB', 'ABT', 'IQV', 'HLN', 'CI', 'SNY', 
              'VEEV', 'ELV', 'LLY', 'ALC', 'AMGN', 'BDX', 'TMO', 'EW']

industrials = ['ODFL', 'GE', 'FERG', 'XYL', 'CNI', 'URI', 'JCI', 'UPS', 'VRT', 'HEI', 'HWM', 'AXON', 'PH', 
               'BLDR', 'WM', 'EMR', 'ROK', 'AVY', 'EFX', 'CSX', 'PWR', 'TT', 'LHX', 'FAST', 'OTIS', 'VLTO', 
               'ITW', 'GPN', 'J', 'AME', 'DOV', 'IR', 'DAL', 'JBHT', 'TDG', 'GWW', 'WAB', 'MMM', 'CMI', 'ROP', 
               'PCAR', 'GD', 'NSC', 'RSG', 'FDX', 'CP', 'WCN', 'ADP', 'RYAAY', 'CAT', 'BAH', 'IEX', 'HUBB', 
               'TRI', 'CARR', 'VRSK', 'LUV', 'PAYX', 'CTAS', 'NOC']

materials = ['ALB', 'SUZ', 'X', 'TX', 'ICL', 'TECK', 'RS', 'PPG', 'WPM', 'MOS', 'FNV', 'SUM', 'RPM', 'BHP', 
             'SHW', 'STLD', 'EXP', 'AVTR', 'UFPI', 'EMN', 'CLF', 'IFF', 'LIN', 'SQM', 'FMC', 'GGB', 'VALE', 
             'CE', 'AU', 'RGLD', 'GFI', 'CF', 'CTVA', 'WLK', 'JHX', 'CX', 'AXTA', 'APD']

real_estate = ['BXP', 'OHI', 'UDR', 'EGP', 'FSV', 'SUI', 'CUBE', 'FRT', 'SBAC', 'KIM', 'NLY', 'EQIX', 'EQR', 
               'AMH', 'CPT', 'VTR', 'REG', 'CBRE', 'NNN', 'PEAK', 'BEKE', 'REXR', 'INVH', 'O', 'KSPI', 'PLD',
               'ARE', 'JLL', 'AMT', 'WPC', 'HST', 'MAA', 'IRM', 'ELS', 'FR', 'LAMR', 'COLD', 'ESS', 'VICI', 
               'RHP', 'GLPI']

technology = ['CDNS', 'MRVL', 'SONY', 'ADI', 'KLAC', 'SHOP', 'CRWD', 'APH', 'QCOM', 'NOW', 'IBM', 'INTU', 
              'ARM', 'PLTR', 'INFY', 'AMAT', 'FTNT', 'PANW', 'MU', 'TEAM', 'ORCL', 'NVDA', 'INTC', 'CSCO', 
              'SAP', 'MSFT', 'AAPL', 'WDAY', 'SMCI', 'COIN', 'UBER', 'MSI', 'SQ', 'NXPI', 'MCHP', 'ADSK', 
              'ANET', 'DELL', 'SNOW', 'LRCX', 'SNPS', 'TXN', 'FI']

utilities = ['AGR', 'NEE', 'VST', 'FTS', 'PPL', 'EVRG', 'CMS', 'ES', 'SBS', 'EXC', 'WTRG', 'ELPC', 'BIP', 
             'SO', 'PEG', 'CNP', 'KEP', 'FE', 'D', 'XEL', 'LNT', 'WEC', 'EBR', 'AEE', 'ATO', 'ED', 'AWK', 
             'BEP', 'ETR', 'NI', 'DTE', 'EIX', 'NRG']

In [160]:
esg_stocks = [
    "MSFT", "AMAT", "WWD", "VRSK", "MA", "CAT", "MPC", "NVDA", "DOV", "MSI",
    "BG", "SHW", "DELL", "ADBE", "MDLZ", "CHD", "META", "APD", "IT", "AJG",
    "LRCX", "TJX", "OC", "JBL", "GOOGL", "DECK", "ANET", "ON", "TTEK", "MAR",
    "LLY", "J", "V", "CMC", "FSS", "FTNT", "ZTS", "CDNS", "CLH", "WDAY",
    "MCHP", "CSGP", "CMG", "PLD"]

dividend_stocks = ["FAST", "CHRW", "SJM", "CHD", "IBM", "CAT", "ALB", "ESS", "NEE", "CB",
    "EXPD", "LIN", "BRO", "O", "WST", "AOS", "ROP", "ECL", "GD", "CVX",
    "CAH", "TROW", "MKC", "ATO", "CTAS", "AMCR", "XOM", "APD", "AFL",
    "BEN", "SHW", "CLX", "MDT", "MCD", "PNR", "ADP", "LOW", "ED", "WMT", "AAPL"]

growth_stocks = ["SMCI", "GCT", "DRCT", "APP", "ANF", "PLTR", "COIN", "ELF", "ROVR", "CELH", "CRM",
          "NVDA", "COST", "TMUS", "SE", "NFE", "BMRN", "PLNT", "SG", "AMD", "NXPI", "JOE", "MATX", "MPWR", 
          "ITIC", "KNSL", "ONTO", "TFII", "MLI", "TSLA", "WIRE", "WST", "DHR", "IT", "ON", "REGN", 
          "AAPL", "AOSL", "MMC", "PTSI"]

In [208]:
sector_mapping = {
    "Energy": energy, 
    "Basic Materials": materials, 
    "Industrials": industrials, 
    "Consumer Cyclical": consumer_cyclical, 
    "Consumer Defensive": consumer_defensive, 
    "Healthcare": healthcare, 
    "Financial Services": financials, 
    "Technology": technology, 
    "Communication Services": communication_services, 
    "Utilities": utilities, 
    "Real Estate": real_estate, 
    "esg": esg_stocks, 
    "income": dividend_stocks, 
    "growth": growth_stocks
}

In [169]:
def get_sector(ticker):
    """
    Returns the sector of a stock.
    """
    stock = Ticker(ticker)
    return stock.asset_profile[ticker]["sector"]

In [195]:
def compare_returns(ticker):
    """
    Compares return of a specific stock relative to the returns of the S&P 500 over the last 10 years
    and returns the data in dictionary format.
    """
    spx_ticker = '^GSPC'
    
    end_date = pd.Timestamp.now()  
    start_date = end_date - pd.DateOffset(years=10)  

    stock_ticker = Ticker(ticker)
    spx_ticker_data = Ticker(spx_ticker)
    
    try:
        stock_history = stock_ticker.history(start=start_date, end=end_date)
        spx_history = spx_ticker_data.history(start=start_date, end=end_date)
    except:
        start_date = end_date - pd.DateOffset(years=3)  


    stock_return = (stock_history['close'].iloc[-1] / stock_history['close'].iloc[0]) - 1
    spx_return = (spx_history['close'].iloc[-1] / spx_history['close'].iloc[0]) - 1

    return_comparison = {
        'stock_return': stock_return,
        'spx_return': spx_return,
        'difference': stock_return - spx_return
    }

    return return_comparison


def format_data(ticker, client=False):
    """
    Fetches the data for a stock and creates the features that we will be using for our analysis in dictionary
    format. Depending on the client, we use some different features in those cases. 
    """
    stock = fetch_ticker(ticker)
    esg = stock.esg_scores[ticker]
    
    try:
        total_esg = esg['totalEsg'] if esg['totalEsg'] is not None else 0
    except:
        total_esg = 0
        
    try:
        social_score = esg['socialScore'] if esg['socialScore'] is not None else 0
    except:
        social_score = 0
        
    try:
        governance = esg['governanceScore'] if esg['governanceScore'] is not None else 0
    except: 
        governance = 0
        
    try:
        esg_percentile = esg['percentile'] if esg['percentile'] is not None else 0
    except:
        esg_percentile = 0
        
    try:
        summary = stock.summary_detail[ticker]
    except:
        beta = 0
        market_cap = 0
        avg_dividend_yield = 0
        trailing_pe = 0
        
    try:
        beta = summary['beta'] if summary["beta"] is not None else 0
    except:
        beta = 0
        
    try:
        market_cap = summary["marketCap"] if summary["marketCap"] is not None else 0
    except:
        market_cap = 0
        
    try:
        avg_dividend_yield = summary["fiveYearAvgDividendYield"] if summary["fiveYearAvgDividendYield"] is not None else 0
    except:
        avg_dividend_yield = 0
        
    try:
        forward_pe = summary["forwardPE"] if summary["forwardPE"] is not None else 0
    except:
        forward_pe = 0
        
    key_stats = stock.key_stats[ticker]
    
    try:
        price_to_book = key_stats["priceToBook"] if key_stats["priceToBook"] is not None else 0
    except:
        price_to_book = 0
    
    earning_deviation = []
    flag = False
    try:
        for earning in stock.earnings[ticker]["earningsChart"]["quarterly"]:
            if earning["actual"] is pd.nan or earning["estimate"] is pd.nan:
                earning_performance = 0
                flag = True
                break
            actual = earning["actual"]
            estimate = earning["estimate"]
            earning_deviation.append((actual - estimate) / estimate)
        if not Flag:
            earning_performance = np.mean(earning_deviation) * 100
    except:
        earning_performance = 0
    
    if not client:
        data = {
            'total_esg': total_esg,
            'social_score': social_score,
            'governance_score': governance,
            'esg_percentile': esg_percentile,
            'beta': beta,
            'market_cap': market_cap,
            'avg_dividend_yield': avg_dividend_yield,
            'forward_pe': forward_pe,
            'price_to_book': price_to_book
        }
    if client == "growth":
        data = {
            'total_esg': total_esg,
            'social_score': social_score,
            'governance_score': governance,
            'esg_percentile': esg_percentile,
            'beta': beta,
            'market_cap': market_cap,
            'forward_pe': forward_pe, 
            'price_to_book': price_to_book
        }
    
    if client == "income":
        data = {
            'total_esg': total_esg,
            'beta': beta,
            'market_cap': market_cap,
            'avg_dividend_yield': avg_dividend_yield,
            'forward_pe': forward_pe, 
            'price_to_book': price_to_book
            
        }
    else:
        data = {
            'total_esg': total_esg,
            'social_score': social_score,
            'governance_score': governance,
            'esg_percentile': esg_percentile,
            'beta': beta,
            'market_cap': market_cap,
            'avg_dividend_yield': avg_dividend_yield,
            'forward_pe': forward_pe, 
            'price_to_book': price_to_book
        }
    ret_comp = compare_returns(ticker)
    difference = ret_comp["difference"]
    if difference > 0.15:
        target = 'buy'
    elif difference < -0.1:  
        target = 'sell'
    else:  
        target = 'hold'
    

    data['target'] = target
    
    return data
    
    
def create_dataset(tickers, client=False):
    """
    Given a list of stock tickers, we create a DataFrame of features for training. 
    """
    dataset = []
    
    for ticker in tickers:
        try:
            formatted_data = format_data(ticker, client)
            dataset.append(formatted_data)
        except Exception as e:
            stock = Ticker(ticker)
            print(f"Error processing {ticker}: {e}")
    
    df = pd.DataFrame(dataset)
    
    return df

Here, we go through our whole set of training stocks and calculate the average dividend yield. We will use this if the client is income focused, and will determine which stocks to consider with this average yield. 

Average Dividend Yield in training data = 2.895

In [137]:
"""
Calculate 5 year average dividend yield from all stocks in training set
"""
dividend_yields = []
for sector, sector_list in sector_mapping.items():
    for ticker in sector_list:
        formatted_data = format_data(ticker)
        avg_yield = formatted_data["avg_dividend_yield"]
        if avg_yield != 0:
            dividend_yields.append(avg_yield)
            
print(np.mean(dividend_yields))

2.8949415204678366


Feature Engineering Process:
I played around with several features during the training process. One thing that I thought was interesting was how the earning performance (the percentage above or below the expected earnings) had no importance whatsoever in the model, as seen below (example on ESG portfolios):

```total_esg```            0.247307

```social_score```         0.159644

```esg_percentile```       0.153762

```governance_score```     0.121317

```avg_dividend_yield```   0.088401

```forward_pe```           0.080799

```beta```                 0.064226

```price_to_book```        0.055584

```market_cap```           0.028960

```earning_performance```  0.000000


This led me to remove it altogether from the models.

Below, we can see each of the models after being trained and tested. I include the accuracy of the model (how many predictions it got correct in the testing set) and the importance of the features so that we can get a slighlty better understanding of what is influencing the predictions. This will also help us to formulate qualitative explanations as to why a model outputs a specific investment decision. 

In [163]:
def important_features(model, columns):
    importances = model.feature_importances_
    feature_importances = pd.Series(importances, index=columns).sort_values(ascending=False)
    return feature_importances

In [196]:
models = {}

for sector, sector_list in sector_mapping.items():
    if sector == "esg" or sector == "income" or sector == "growth": 
        dataset = create_dataset(sector_list, sector)
    else:
        dataset = create_dataset(sector_list)

    X = dataset.drop('target', axis=1)
    y = dataset['target']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = RandomForestClassifier(random_state=42)
    
    try:
        model.fit(X_train, y_train)
    except:
        print(X_train, y_train)    
    
    y_pred = model.predict(X_test)

    accuracy = accuracy_score(y_pred, y_test)
    models[sector] = model
    print(f"{sector} sector: \nAccuracy: {accuracy}")
    print(f"{important_features(model, X.columns)}\n\n")
    

Energy sector: 
Accuracy: 1.0
total_esg             0.211876
esg_percentile        0.189161
governance_score      0.180121
forward_pe            0.099516
avg_dividend_yield    0.094871
social_score          0.091669
price_to_book         0.055485
market_cap            0.043056
beta                  0.034245
dtype: float64


Basic Materials sector: 
Accuracy: 0.5
price_to_book         0.240607
forward_pe            0.164594
market_cap            0.146131
beta                  0.114135
social_score          0.081241
avg_dividend_yield    0.080268
total_esg             0.070819
governance_score      0.064981
esg_percentile        0.037224
dtype: float64


Industrials sector: 
Accuracy: 0.75
forward_pe            0.266509
price_to_book         0.187230
avg_dividend_yield    0.110749
governance_score      0.098592
market_cap            0.079598
beta                  0.078983
esg_percentile        0.067653
total_esg             0.055815
social_score          0.054872
dtype: float64


Consume

Below, we define a ```generate_recommendation(decision, sector)``` function, which takes a decision of either ```buy```, ```sell``` or ```hold``` and ```sector```, which is any of the given sectors that were listed above, as well as the client options of esg, income or growth, and returns a qualitative explanation as to why that specific decision was taken. We will use the importance of different features in each of the models to determine the biggest factors in why a model gave a specific decision. 

In [217]:
def generate_explanation(decision, sector):
    """
    Based on a stock's sector (or portfolio type in the case of having a specified client)
    we will return a qualitative explanation for the respective investment decision. 
    """
    if sector == "Energy":
        # ESG big factor
        if decision == "buy":
            return "Stock shows high levels of ESG, including a good social and governance score, which have made it an attractive investment in the Energy sector."
        if decision == "sell":
            return "Stock hasn't shown high enough levels of ESG, which makes it a poor stock in the Energy sector."
        
        if decision == "hold":
            return "Stock is stable and has decent ESG but hasn't shown other factors which will make it outperform or underperform the market."
        
    elif sector == "Basic Materials":
        if decision == "buy":
            return "Stock shows a low price to book ratio, as well as a relatively low forward price to earnings ratio, meaning the market is likely under-pricing it."
        if decision == "sell":
            return "Stock appears to be overvalued in the market given its high price to book and price to earning ratios."
        
        if decision == "hold":
            return "Stock appears to be valued at a fair price given its price to earnings and book ratios and isn't a good opportunity to buy or sell."

    elif sector == "Industrials":
        if decision == "buy":
            return "Stock shows a low price to book ratio, as well as a relatively low forward price to earnings ratio, meaning the market is likely under-pricing it."
        if decision == "sell":
            return "Stock appears to be overvalued in the market given its high price to book and price to earning ratios."
        
        if decision == "hold":
            return "Stock appears to be valued at a fair price given its price to earnings and book ratios and isn't a good opportunity to buy or sell."

    elif sector == "Consumer Cyclical":
        if decision == "buy":
            return "Stock shows a low price to book ratio, as well as a relatively low forward price to earnings ratio, meaning the market is likely under-pricing it."
        if decision == "sell":
            return "Stock appears to be overvalued in the market given its high price to book and future price to earning ratios."
        
        if decision == "hold":
            return "Stock appears to be valued at a fair price given its future price to earnings and book ratios and isn't a good opportunity to buy or sell."
            
    elif sector == "Consumer Defensive":
        if decision == "buy":
            return "Stock shows that its future price to earnings ratio is very favorable, and its volatility is a good factor in future upward price movements and therefore presents itself at a cheap price to buy."
        if decision == "sell":
            return "High price to earnings ratio which makes it favorable to sell and expect a future decrease in the price."
        
        if decision == "hold":
            return "Stock appears to be valued at a fair price given its future price to earnings and book ratios and isn't a good opportunity to buy or sell."
        
    elif sector == "Healthcare":
        if decision == "buy":
            return "High dividend, stable volatility and low future price to earnings ratio make it a good candidate to buy."
        if decision == "sell":
            return "Stock appears to be overvalued given its high future price to earnings ratio and unstable due to its volatility, making it a good candidate to sell."
        
        if decision == "hold":
            return "Stock appears to be fairly valued and stable and doesn't present an opportunity in either direction."
    
    elif sector == "Financial Services":
        if decision == "buy":
            return "Stock shows a low price to book ratio, as well as a relatively low forward price to earnings ratio, meaning the market is likely under-pricing it."
        if decision == "sell":
            return "Stock appears to be overvalued in the market given its high price to book and price to earning ratios."
        
        if decision == "hold":
            return "Stock appears to be valued at a fair price given its price to earnings and book ratios and isn't a good opportunity to buy or sell."
        
    elif sector == "Technology":
        if decision == "buy":
            return "Stock shows a good dividend yield, a low price to book ratio, as well as a good volatility and momentum, making it a good candidate to buy."
        if decision == "sell":
            return "Stock shows a very low dividend and appears to be overvalued and unstable, making it likely to decrease in the future and a good candidate to sell."
        
        if decision == "hold":
            return "Stock appears to be valued at a fair price given its price to earnings and book ratios and isn't a good opportunity to buy or sell."
    
    elif sector == "Communication Services":
        if decision == "buy":
            return "Stock shows that its future price to earnings ratio is very favorable, and its volatility and market cap are good factor in future upward price movements and therefore presents itself at a cheap price to buy."
        if decision == "sell":
            return "High price to earnings ratio which makes it favorable to sell and expect a future decrease in the price."
        
        if decision == "hold":
            return "Stock appears to be valued at a fair price given its future price to earnings and book ratios and isn't a good opportunity to buy or sell."
                    
    elif sector == "Utilities":
        if decision == "buy":
            return "Stock shows a low price to book ratio, a good beta relative to the market and a solid dividend income, making it a good candidate to buy."
        if decision == "sell":
            return "Stock appears to be overvalued in the market given its high price to book and lacking of a dividend income, making it a good candidate to sell."
        
        if decision == "hold":
            return "Stock appears to be valued at a fair price given its future price to book ratio and isn't a good opportunity to buy or sell."
        
    elif sector == "Real Estate":
        if decision == "buy":
            return "Stock shows a low price to book ratio, a low forward price to earnings ratio, and a high dividend yield meaning the market is likely under-pricing it and making it a good investment opportunity."
        if decision == "sell":
            return "Stock appears to be overvalued in the market given its high price to book and future price to earning ratios and lacking of a good dividend, making it a good candidate to sell."
        
        if decision == "hold":
            return "Stock appears to be valued at a fair price given its future price to earnings and book ratios and isn't a good opportunity to buy or sell."

    elif sector == "esg":
        if decision == "buy":
            return "Stock shows high levels of ESG, including a good social and governance score, as well as a low future price to earnings ratio, making it an attractive ESG investment to buy."
        if decision == "sell":
            return "Stock shows high levels of ESG but appears to be overvalued in the market and is a good opportunity to sell."
        if decision == "hold":
            return "Stock is stable and has a good ESG but hasn't shown other factors which will make it outperform or underperform the market."
            
    elif sector == "income":
        if decision == "buy":
            return "Stock has a high dividend and has low future price to earnings and price to book ratios which makes it undervalued in the current market and a good investment opportunity to buy."
        if decision == "sell":
            return "Stock has a high dividend but appears to be overpriced in the current market and is a good opportunity to sell."
        if decision == "hold":
            return "Stock has a good dividend but is fairly valued and is not a good candidate to buy or sell."
            
    elif sector == "growth":
        if decision == "buy":
            return "Stock shows high beta meaning its a good candidate for a growth investment to buy."
        if decision == "sell":
            return "Stock has low beta and doesn't seem like it'd outperform other growth investments and would be a good selling opportunity for a growth portfolio."
        
        if decision == "hold":
            return "Stock doesn't show great momentum in any direction and is likely to hold in price."

The ```invest``` function takes two arguments: ```ticker``` and ```client``` (which is ```False``` by default). The function then returns two strings in tuple format, where the first string represents the investment decision (```buy```, ```sell```, or ```hold```) and the second string is the qualitatitive explanation as to why that decision was made. 

For any ticker that is passed to this function, we use its sector (if no client is referenced), or its client preference (if mentioned) to access the respective Random Forest Classifier that is pre-computed and stored in the dictionary ```models```. We then use the ```.predict()``` method after pulling the respective features from ```yahooquery``` and return the investment decision and the qualitative explanation. 

In [198]:
def make_prediction(model, data):
    X = pd.DataFrame([data]).drop('target', axis=1)
    prediction = model.predict(X)
    return prediction[0]

In [199]:
def invest(ticker, client=False):
    if client is False:
        sector = get_sector(ticker)
        model = models[sector]
        data = format_data(ticker)
        decision = make_prediction(model, data)
    else:
        data = format_data(ticker, client)
        if client == "esg":
            if formatted_data["esg_percentile"] < 30:
                explanation = "ESG is too low for an ESG portfolio"
                decision = "sell"
                return (decision, explanation)
            
        elif client == "income":
            if formatted_data["avg_dividend_yield"] < 2.8949415204678366:
                explanation = "Dividend is too low for income portfolio."
                decision = "sell"
                return (decision, explanation)                
                    
            
        model = models[client]
        decision = make_prediction(model, data)
    sector = client if client is not False else sector
    explanation = generate_explanation(decision, sector)
    return (decision, explanation)

Now, I'll show some examples of how the function works with different stock tickers. 

In [200]:
invest("NVDA", "esg")

('sell', 'ESG is too low for an ESG portfolio')

In [201]:
invest("CSX")

('buy',
 'Stock shows a low price to book ratio, as well as a relatively low forward price to earnings ratio, meaning the market is likely under-pricing it.')

In [202]:
invest("NVDA", "growth")

('buy',
 'Stock shows high beta meaning its a good candidate for a growth investment to buy.')

In [203]:
invest("AAPL", "income")

('buy',
 'Stock has a high dividend and has low future price to earnings and price to book ratios which makes it undervalued in the current market and a good investment opportunity to buy.')

In [204]:
invest("AMZN")

('sell',
 'Stock appears to be overvalued in the market given its high price to book and future price to earning ratios.')

In [206]:
invest("DUK")

('sell',
 'Stock appears to be overvalued in the market given its high price to book and lacking of a dividend income, making it a good candidate to sell.')

In [218]:
import random
# For each of the sectors
for sector, sector_list in sector_mapping.items():
    rand_ticker = random.choice(sector_list)
    print(f"Ticker: {rand_ticker}\n{invest(rand_ticker)}\n")

Ticker: SU
('sell', "Stock hasn't shown high enough levels of ESG, which makes it a poor stock in the Energy sector.")

Ticker: FMC
('sell', 'Stock appears to be overvalued in the market given its high price to book and price to earning ratios.')

Ticker: HWM
('buy', 'Stock shows a low price to book ratio, as well as a relatively low forward price to earnings ratio, meaning the market is likely under-pricing it.')

Ticker: ABNB
('sell', 'Stock appears to be overvalued in the market given its high price to book and future price to earning ratios.')

Ticker: DEO
('sell', 'High price to earnings ratio which makes it favorable to sell and expect a future decrease in the price.')

Ticker: NVO
('buy', 'High dividend, stable volatility and low future price to earnings ratio make it a good candidate to buy.')

Ticker: MS
('sell', 'Stock appears to be overvalued in the market given its high price to book and price to earning ratios.')

Ticker: SNOW
('sell', 'Stock shows a very low dividend and 