In [8]:
import yfinance as yf
import pandas as pd
import joblib
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

In [9]:
# ---------------------------------------------------------
# 1. CONFIG
# ---------------------------------------------------------

TICKERS = [
    "AAPL", "TSLA", "MSFT", "GOOGL", "META",
    "NVDA", "AMZN", "NFLX", "AMD", "INTC",
    "ETH", "BTC", "Gold", "Silver", "RGTI",
    "COST", "MU", "GOOG", "BTC", "DJI", "INX",
    "T", "ONDS"

]

YEARS = "5y"   # how much history to download
INTERVAL = "1d"

In [10]:
# ---------------------------------------------------------
# 2. FEATURE ENGINEERING
# ---------------------------------------------------------

def build_features(df):
    df["Return"] = df["Close"].pct_change()
    df["MA5"] = df["Close"].rolling(5).mean()
    df["MA20"] = df["Close"].rolling(20).mean()
    df["Volatility"] = df["Return"].rolling(10).std()
    df["NextClose"] = df["Close"].shift(-1)
    return df.dropna()

In [11]:
# ---------------------------------------------------------
# 3. BUILD MULTI-STOCK DATASET
# ---------------------------------------------------------

all_data = []

for ticker in TICKERS:
    print(f"Downloading {ticker}...")
    df = yf.download(ticker, period=YEARS, interval=INTERVAL)

    if df.empty:
        print(f"⚠️ No data for {ticker}, skipping.")
        continue

    df = build_features(df)
    df["Ticker"] = ticker
    all_data.append(df)

dataset = pd.concat(all_data)
dataset = dataset.dropna()

[*********************100%***********************]  1 of 1 completed

Downloading AAPL...



[*********************100%***********************]  1 of 1 completed


Downloading TSLA...
Downloading MSFT...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Downloading GOOGL...
Downloading META...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Downloading NVDA...
Downloading AMZN...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Downloading NFLX...
Downloading AMD...


[*********************100%***********************]  1 of 1 completed


Downloading INTC...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Downloading ETH...
Downloading BTC...



[*********************100%***********************]  1 of 1 completed


Downloading Gold...
Downloading Silver...


HTTP Error 404: {"quoteSummary":{"result":null,"error":{"code":"Not Found","description":"Quote not found for symbol: SILVER"}}}
$SILVER: possibly delisted; no price data found  (period=5y) (Yahoo error = "No data found, symbol may be delisted")
[*********************100%***********************]  1 of 1 completed

1 Failed download:
['SILVER']: possibly delisted; no price data found  (period=5y) (Yahoo error = "No data found, symbol may be delisted")
[*********************100%***********************]  1 of 1 completed


⚠️ No data for Silver, skipping.
Downloading RGTI...
Downloading COST...


[*********************100%***********************]  1 of 1 completed


Downloading MU...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Downloading GOOG...
Downloading BTC...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
$INX: possibly delisted; no price data found  (period=5y)
[*********************100%***********************]  1 of 1 completed

1 Failed download:
['INX']: possibly delisted; no price data found  (period=5y)


Downloading DJI...
Downloading INX...


[*********************100%***********************]  1 of 1 completed


⚠️ No data for INX, skipping.
Downloading T...
Downloading ONDS...


[*********************100%***********************]  1 of 1 completed


In [12]:
# ---------------------------------------------------------
# 4. ENCODE TICKER AS CATEGORY
# ---------------------------------------------------------

dataset["Ticker"] = dataset["Ticker"].astype("category")
dataset["TickerCode"] = dataset["Ticker"].cat.codes

  dataset["TickerCode"] = dataset["Ticker"].cat.codes


In [13]:
# ---------------------------------------------------------
# 5. SELECT FEATURES
# ---------------------------------------------------------

feature_cols = [
    "Open", "High", "Low", "Close", "Volume", "MA5", "MA20", "Volatility" ]
    #"MA5", "MA20", "Volatility", "TickerCode"

X = dataset[feature_cols]
y = dataset["NextClose"]

In [14]:
df["Target"] = df["Close"].shift(-1)
df = df.dropna()

X = df[["Open", "High", "Low", "Close", "Volume", "MA5", "MA20", "Volatility"]]
y = df["Target"]

In [15]:
# ---------------------------------------------------------
# 6. TRAIN MODEL
# ---------------------------------------------------------

print("Training model...")

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, shuffle=True
)

model = RandomForestRegressor(
    n_estimators=300,
    random_state=42,
    n_jobs=-1
)

model.fit(X_train, y_train)

score = model.score(X_test, y_test)
print(f"Model R² Score: {score:.4f}")

Training model...
Model R² Score: 0.9874


In [16]:
print("X shape:", X.shape)
print("y shape:", y.shape)
print("Head X:\n", X.head())
print("Head y:\n", y.head())

X shape: (1235, 8)
y shape: (1235,)
Head X:
 Price        Open   High    Low  Close  Volume     MA5     MA20 Volatility
Ticker       ONDS   ONDS   ONDS   ONDS    ONDS                            
Date                                                                      
2021-03-15  11.00  11.85  10.80  11.75  294500  10.808  12.1680   0.096599
2021-03-16  11.91  12.15  11.08  11.29  306100  11.084  11.9625   0.096130
2021-03-17  11.06  11.90  11.06  11.80  331200  11.426  11.8860   0.097386
2021-03-18  11.56  12.25  11.21  11.26  335800  11.418  11.8150   0.091390
2021-03-19  11.51  12.24  11.09  11.32  880600  11.484  11.7675   0.086466
Head y:
 Date
2021-03-15    11.29
2021-03-16    11.80
2021-03-17    11.26
2021-03-18    11.32
2021-03-19    11.70
Name: Target, dtype: float64


In [17]:
# ---------------------------------------------------------
# 7. SAVE MODEL + FEATURE COLUMNS
# ---------------------------------------------------------

joblib.dump(model, "stock_model.pkl")
joblib.dump(feature_cols, "feature_columns.pkl")
joblib.dump(dataset["Ticker"].cat.categories.tolist(), "ticker_categories.pkl")

print("Saved:")
print(" - stock_model.pkl")
print(" - feature_columns.pkl")
print(" - ticker_categories.pkl")


Saved:
 - stock_model.pkl
 - feature_columns.pkl
 - ticker_categories.pkl


In [18]:
import yfinance as yf
import pandas as pd
import joblib
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

TICKERS = [
    "AAPL", "TSLA", "MSFT", "GOOGL", "META",
    "NVDA", "AMZN", "NFLX", "AMD", "INTC",
    "ETH", "BTC", "Gold", "Silver", "RGTI",
    "COST", "MU", "GOOG", "BTC", "DJI", "INX",
    "T", "ONDS"

]

YEARS = "5y"
INTERVAL = "1d"

def build_features(df):
    df["Return"] = df["Close"].pct_change()
    df["MA5"] = df["Close"].rolling(5).mean()
    df["MA20"] = df["Close"].rolling(20).mean()
    df["Volatility"] = df["Return"].rolling(10).std()
    df["NextClose"] = df["Close"].shift(-1)
    return df.dropna()

all_data = []

for ticker in TICKERS:
    print(f"Downloading {ticker}...")
    df = yf.download(ticker, period=YEARS, interval=INTERVAL)

    if df.empty:
        print(f"⚠️ No data for {ticker}, skipping.")
        continue

    # FIX: flatten MultiIndex columns
    if isinstance(df.columns, pd.MultiIndex):
        df.columns = [col[0] for col in df.columns]

    df = build_features(df)
    df["Ticker"] = ticker
    all_data.append(df)

dataset = pd.concat(all_data)
dataset = dataset.dropna()

dataset["Ticker"] = dataset["Ticker"].astype("category")
dataset["TickerCode"] = dataset["Ticker"].cat.codes

feature_cols = [
    "Open", "High", "Low", "Close", "Volume",
    "MA5", "MA20", "Volatility", "TickerCode"
]

X = dataset[feature_cols]
y = dataset["NextClose"]

print("Training model...")

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, shuffle=True
)

model = RandomForestRegressor(
    n_estimators=300,
    random_state=42,
    n_jobs=-1
)

model.fit(X_train, y_train)

score = model.score(X_test, y_test)
print(f"Model R² Score: {score:.4f}")

joblib.dump(model, "stock_model.pkl")
joblib.dump(feature_cols, "feature_columns.pkl")
joblib.dump(dataset["Ticker"].cat.categories.tolist(), "ticker_categories.pkl")

print("Saved:")
print(" - stock_model.pkl")
print(" - feature_columns.pkl")
print(" - ticker_categories.pkl")

Downloading AAPL...


[*********************100%***********************]  1 of 1 completed

Downloading TSLA...



[*********************100%***********************]  1 of 1 completed


Downloading MSFT...


[*********************100%***********************]  1 of 1 completed


Downloading GOOGL...


[*********************100%***********************]  1 of 1 completed


Downloading META...


[*********************100%***********************]  1 of 1 completed


Downloading NVDA...


[*********************100%***********************]  1 of 1 completed


Downloading AMZN...


[*********************100%***********************]  1 of 1 completed


Downloading NFLX...


[*********************100%***********************]  1 of 1 completed


Downloading AMD...


[*********************100%***********************]  1 of 1 completed


Downloading INTC...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Downloading ETH...
Downloading BTC...





Downloading Gold...


[*********************100%***********************]  1 of 1 completed
$SILVER: possibly delisted; no price data found  (period=5y) (Yahoo error = "No data found, symbol may be delisted")
[*********************100%***********************]  1 of 1 completed

1 Failed download:


Downloading Silver...


['SILVER']: possibly delisted; no price data found  (period=5y) (Yahoo error = "No data found, symbol may be delisted")
[*********************100%***********************]  1 of 1 completed


⚠️ No data for Silver, skipping.
Downloading RGTI...
Downloading COST...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Downloading MU...
Downloading GOOG...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Downloading BTC...
Downloading DJI...
Downloading INX...


$INX: possibly delisted; no price data found  (period=5y)
[*********************100%***********************]  1 of 1 completed

1 Failed download:
['INX']: possibly delisted; no price data found  (period=5y)


⚠️ No data for INX, skipping.
Downloading T...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Downloading ONDS...
Training model...
Model R² Score: 0.9999
Saved:
 - stock_model.pkl
 - feature_columns.pkl
 - ticker_categories.pkl


# 60 days ahead prediction

In [19]:
import numpy as np

SEQ_LEN = 60   # past 60 days
HORIZON = 60   # predict 60 days ahead

def build_sequences(df, feature_cols, target_col="Close"):
    data = df[feature_cols].values
    target = df[target_col].values

    X_seq, y_seq = [], []
    for i in range(len(df) - SEQ_LEN - HORIZON + 1):
        X_seq.append(data[i:i+SEQ_LEN])
        y_seq.append(target[i+SEQ_LEN:i+SEQ_LEN+HORIZON])  # 60-step future
    return np.array(X_seq), np.array(y_seq)

In [20]:
feature_cols = ["Open","High","Low","Close","Volume","MA5","MA20","Volatility"]
X_seq, y_seq = build_sequences(dataset, feature_cols, target_col="Close")

# Full LSTM model (PyTorch, 60‑day path)

In [21]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

class LSTMForecaster(nn.Module):
    def __init__(self, input_dim, hidden_dim=64, num_layers=2, horizon=60):
        super().__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, horizon)

    def forward(self, x):
        out, _ = self.lstm(x)          # (B, T, H)
        last = out[:, -1, :]           # (B, H)
        return self.fc(last)           # (B, horizon)

X_t = torch.tensor(X_seq, dtype=torch.float32)
y_t = torch.tensor(y_seq, dtype=torch.float32)

ds = TensorDataset(X_t, y_t)
dl = DataLoader(ds, batch_size=64, shuffle=True)

model = LSTMForecaster(input_dim=len(feature_cols))
opt = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = nn.MSELoss()

for epoch in range(20):
    for xb, yb in dl:
        pred = model(xb)
        loss = loss_fn(pred, yb)
        opt.zero_grad()
        loss.backward()
        opt.step()

# Transformer model (PyTorch, 60‑day path)

In [None]:
class TransformerForecaster(nn.Module):
    def __init__(self, input_dim, d_model=64, nhead=4, num_layers=2, horizon=60):
        super().__init__()
        self.input_proj = nn.Linear(input_dim, d_model)
        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, batch_first=True)
        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.fc = nn.Linear(d_model, horizon)

    def forward(self, x):
        x = self.input_proj(x)         # (B, T, d_model)
        enc = self.encoder(x)          # (B, T, d_model)
        last = enc[:, -1, :]           # (B, d_model)
        return self.fc(last)           # (B, horizon)

model = TransformerForecaster(input_dim=len(feature_cols))
# same training loop as LSTM (just swap model)

# Recursive Random Forest (using your existing RF)

In [None]:
def rf_recursive_forecast(model, df, feature_cols, steps=60):
    df = df.copy()
    preds = []
    for _ in range(steps):
        X_latest = df[feature_cols].iloc[-1:].values
        next_close = model.predict(X_latest)[0]
        preds.append(next_close)

        # append synthetic next row
        new_row = df.iloc[-1].copy()
        new_row["Close"] = next_close
        # recompute features that depend on Close
        df = pd.concat([df, new_row.to_frame().T])
        df["Return"] = df["Close"].pct_change()
        df["MA5"] = df["Close"].rolling(5).mean()
        df["MA20"] = df["Close"].rolling(20).mean()
        df["Volatility"] = df["Return"].rolling(10).std()
        df = df.dropna().reset_index(drop=True)

    return preds

# Hybrid model (RF + LSTM)
One simple, effective hybrid:
- RF predicts the 60‑day‑ahead level (single value).
- LSTM predicts the shape (normalized path).
- You scale the LSTM path to land at the RF target.


In [None]:
# RF: train on Target60 = Close.shift(-60)
# LSTM: train on normalized 60-day future: (future / future[0]) - 1

def combine_hybrid(rf_price_60, lstm_path_60, last_close):
    # lstm_path_60: relative path, e.g. returns or normalized
    # simple version: scale so last point matches rf_price_60
    raw_path = lstm_path_60  # e.g. absolute prices from LSTM
    scale = rf_price_60 / raw_path[-1]
    return raw_path * scale

# Full Hybrid Function

In [None]:
def hybrid_align(rf_target_60, lstm_path_60):
    """
    rf_target_60: scalar (RF prediction for day +60)
    lstm_path_60: array of shape (60,) with LSTM predicted prices
    """
    final_lstm = lstm_path_60[-1]

    # Avoid division by zero
    if final_lstm == 0:
        return lstm_path_60

    scale = rf_target_60 / final_lstm
    hybrid_path = lstm_path_60 * scale

    # alpha = np.linspace(0.0, 1.0, 60)  # gradually shift weight to RF
    # hybrid_path = (1 - alpha) * lstm_path_60 + alpha * (lstm_path_60 * scale)
    return hybrid_path

The hybrid alignment function (final point forced to RF target)

In [None]:
import numpy as np

def hybrid_align(rf_target_60, lstm_path_60):
    """
    rf_target_60: scalar predicted by RF for day +60
    lstm_path_60: array of shape (60,) predicted by LSTM/Transformer
    """
    final_lstm = lstm_path_60[-1]

    if final_lstm == 0:
        return lstm_path_60

    scale = rf_target_60 / final_lstm
    hybrid_path = lstm_path_60 * scale
    return hybrid_path

Your Flask /predict route with hybrid forecasting
This version assumes:
- model_rf is your RandomForestRegressor trained on Target60
- model_lstm is your LSTM model predicting a 60‑day path
- build_features(df) is unchanged
- feature_cols is the same 8‑feature list you trained on

````
@app.route("/predict", methods=["POST"])
def predict():
    data = request.get_json()
    ticker = data.get("ticker", "").upper()

    try:
        df = yf.download(ticker, period="120d", interval="1d")

        if df.empty:
            return jsonify({"reply": f"No data found for {ticker}"}), 400

        # Flatten MultiIndex
        if isinstance(df.columns, pd.MultiIndex):
            df.columns = [col[0] for col in df.columns]

        df = build_features(df)

        if df.empty or len(df) < 60:
            return jsonify({"reply": f"Not enough data for {ticker}"}), 400

        df.index = pd.to_datetime(df.index)

        # --- 1. RF 60-day prediction ---
        X_latest = df[feature_cols].iloc[-1:].values
        rf_target_60 = float(model_rf.predict(X_latest)[0])

        # --- 2. LSTM 60-day path prediction ---
        seq = df[feature_cols].values[-60:]
        seq = torch.tensor(seq, dtype=torch.float32).unsqueeze(0)
        with torch.no_grad():
            lstm_path_60 = model_lstm(seq).numpy()[0]

        # --- 3. Hybrid alignment ---
        hybrid_path = hybrid_align(rf_target_60, lstm_path_60)

        # Build dates for the next 60 days
        last_date = df.index[-1]
        future_dates = [(last_date + pd.Timedelta(days=i+1)).strftime("%Y-%m-%d") for i in range(60)]

        return jsonify({
            "reply": f"60-day forecast for {ticker}",
            "ticker": ticker,
            "future_dates": future_dates,
            "hybrid_forecast": hybrid_path.tolist(),
            "rf_target_60": rf_target_60,
            "lstm_raw_path": lstm_path_60.tolist()
        })

    except Exception as e:
        import traceback
        print("ERROR IN /predict ROUTE:")
        traceback.print_exc()
        return jsonify({"reply": f"Error: {str(e)}"}), 500
````
