In [24]:
# Install packages if needed
#!pip install yfinance ta pandas scikit-learn joblib

# --- 1. Import Libraries ---
import yfinance as yf
import pandas as pd
import numpy as np
import ta
import joblib

# --- 2. Load Model and Scaler ---
rf_model = joblib.load('asset_selection_model.pkl')
scaler = joblib.load('scaler.pkl')

import dill
with open('get_fundamentals.pkl', 'rb') as f:
    get_fundamentals = dill.load(f)
# --- 3. Define Tickers ---
TICKERS = ['AAPL', 'GOOGL', 'AMZN', 'MSFT', 'NVDA']

# --- 4. Download Latest Data (last 60 days) ---
import datetime

#END_DATE = datetime.datetime.today().date()
#START_DATE = END_DATE - datetime.timedelta(days=60)

END_DATE = '2020-10-30'
START_DATE = datetime.datetime.strptime(END_DATE, "%Y-%m-%d") - datetime.timedelta(days=60)


data = yf.download(TICKERS, start=START_DATE, end=END_DATE, group_by='ticker', auto_adjust=True)

# --- 5. Create Latest Features ---
def create_features(data):
    dfs = []
    for ticker in data.columns.levels[0]:
        df = data[ticker].copy()
        df['return_5d'] = df['Close'].pct_change(5)
        df['return_20d'] = df['Close'].pct_change(20)
        df['volatility_20d'] = df['Close'].pct_change().rolling(20).std()
        df['rsi_14'] = ta.momentum.RSIIndicator(df['Close'].squeeze(), window=14).rsi()
        macd = ta.trend.MACD(df['Close'].squeeze())
        df['macd'] = macd.macd()
        df['macd_signal'] = macd.macd_signal()
        bb = ta.volatility.BollingerBands(df['Close'].squeeze())
        df['bollinger_h'] = bb.bollinger_hband()
        df['bollinger_l'] = bb.bollinger_lband()
        df['ticker'] = ticker
        dfs.append(df)
    feature_df = pd.concat(dfs)
    feature_df.reset_index(inplace=True)
    return feature_df

latest_features = create_features(data)

# --- 6. Add Fundamentals and Sentiment aspect---
# --- Pull latest Fundamentals and Sentiment for today's prediction ---
latest_fundamentals = []

for ticker in TICKERS:
    pe, pb = get_fundamentals(ticker)
    latest_fundamentals.append({'ticker': ticker, 'pe_ratio': pe, 'pb_ratio': pb})

latest_fundamentals_df = pd.DataFrame(latest_fundamentals).set_index('ticker')
latest_features = latest_features.merge(latest_fundamentals_df, on='ticker', how='left')

# --- 8. Prepare Dataset for Prediction ---
feature_cols = [
    'return_5d', 'return_20d', 'volatility_20d', 'rsi_14', 
    'macd', 'macd_signal', 'bollinger_h', 'bollinger_l',
    'pe_ratio', 'pb_ratio'
]

latest_features = latest_features.dropna(subset=feature_cols)
X_latest = latest_features[feature_cols]
X_latest_scaled = scaler.transform(X_latest)

# --- 9. Predict Future Returns ---
predictions = rf_model.predict(X_latest_scaled)

latest_features['predicted_future_return'] = predictions

# --- 10. Get Latest Date and Top 3 Stocks ---
latest_date = latest_features['Date'].max()
latest_data = latest_features[latest_features['Date'] == latest_date]


top_3['recommendation'] = top_3['predicted_future_return'].apply(lambda x: 'Buy' if x > 0.08 else 'Sell' if x < -0.08 else 'Do not Enter')
print(f"Top 3 stocks to trade for month after {latest_date}:")
display(top_3[['ticker', 'predicted_future_return', 'recommendation']])



[*********************100%***********************]  5 of 5 completed


Top 3 stocks to trade for month after 2020-10-29 00:00:00:


Unnamed: 0,ticker,predicted_future_return,recommendation
214,NVDA,0.101463,Buy
42,AAPL,0.059237,Do not Enter
128,GOOGL,0.015377,Do not Enter


In [8]:
import shap

# Create explainer object
explainer = shap.TreeExplainer(rf_model)

# Explain latest predictions
shap_values = explainer.shap_values(X_latest_scaled)

# Pick the example (say, first top stock)
index_to_explain = latest_data.index[0]
shap.force_plot(
    explainer.expected_value, 
    shap_values[index_to_explain], 
    features=X_latest.iloc[index_to_explain],
    matplotlib=True
)


NameError: name 'X_latest_scaled' is not defined