In [None]:
##CHECK THAT NOTEBOOK IS IN THE CORRECT LOCATION
import pathlib, sys
print(pathlib.Path().resolve())          # should end in market-predictions
print((pathlib.Path("model")).exists())  # True  -> import will work

print("print!")

In [None]:
#IMPORT ALL NECCESSARY NOTEBOOKS
%pip install --quiet \
    backoff gspread google-auth google-auth-httplib2 \
    joblib lightgbm matplotlib numpy optuna pandas scikit-learn \
    ta vectorbt xgboost yfinance plotly kaleido

print("all done!")

In [None]:
#pyarrow install
%pip install pyarrow
print("good!")

In [None]:
#----------------------------------------------
#Program begins
print("lets go->")

In [None]:
from model.features import (
    download_or_load_prices,
    compute_features,
    data_prep_and_feature_engineering,
)
from model.grid_search import run_grid_search
from model.backtest import run_backtest

print("downloaded!!")

In [None]:
import datetime as dt
from pathlib import Path
import pandas as pd

start = dt.date(2015, 1, 1)
end   = dt.date(2024, 7, 1)

# download / load benchmark tickers
market = download_or_load_prices(
    ["SPY", "^VIX"],
    Path("market_cache.parquet"),
    start,
    end
)

# flatten Multi‑Index -> simple columns
market = market["Close"] if isinstance(market.columns, pd.MultiIndex) else market
market.columns = market.columns.rename(None)          # now ['SPY', '^VIX']

print("market columns:", market.columns.tolist())

print(":)")

In [None]:
import datetime as dt
from pathlib import Path
import pandas as pd

# ----- download SPY close prices -----
spy = download_or_load_prices(
    ["SPY"],
    Path("spy_cache.parquet"),
    dt.date(2015, 1, 1),
    dt.date(2024, 7, 1)
)["Close"]          # take only the Close column

# ----- download ^VIX close prices -----
vix = download_or_load_prices(
    ["^VIX"],
    Path("vix_cache.parquet"),
    dt.date(2015, 1, 1),
    dt.date(2024, 7, 1)
)["Close"]

# ----- assemble the market dataframe -----
market = pd.concat({"SPY": spy, "^VIX": vix}, axis=1)

print("market columns:", market.columns.tolist())   # should show ['SPY', '^VIX']


In [None]:
from pathlib import Path
import datetime as dt
import pandas as pd

start = dt.date(2015, 1, 1)
end   = dt.date(2024, 7, 1)

# 1️⃣  market DataFrame (SPY & ^VIX) –– Cell A you already ran
# variable name: market   (columns: ['SPY', '^VIX'])

# 2️⃣  raw prices for your ticker
df_prices = download_or_load_prices(
    ["AAPL"],
    Path("price_cache.parquet"),
    start,
    end
)

# 3️⃣  feature engineering returns ONE DataFrame with 'Target'
df_feat = compute_features(df_prices, market)

# 4️⃣  list of feature column names (everything except 'Target')
feature_list = [col for col in df_feat.columns if col != "Target"]

# 5️⃣  list of tickers used
tickers = ["AAPL"]

# 6️⃣  prepare train / test sets
X_train, X_test, y_train, y_test = data_prep_and_feature_engineering(
    tickers,                      # <-- first arg: list
    feature_list,                 # <-- second: list of feature names
    Path("feature_cache.parquet"),
    start,
    end
)

print("pipeline reached train/test split with no errors")


print("all done!")

In [None]:
# 1.  raw prices for your ticker(s)
df_prices = download_or_load_prices(
    ["AAPL"],
    Path("price_cache.parquet"),
    start,
    end
)

# 2.  compute all features + 'Target'
df_feat = compute_features(df_prices, market)          # ONE DataFrame returned

# 3.  create the inputs for the next stage
feature_list = [col for col in df_feat.columns if col != "Target"]
tickers      = ["AAPL"]

# 4.  prepare training data (this helper returns X_train_sel, y_train)
X_train, y_train = data_prep_and_feature_engineering(
    tickers,                      # first arg  → list of tickers
    feature_list,                 # second arg → list of feature names
    Path("feature_cache.parquet"),# cache file for processed set
    start,
    end
)

print("X_train shape:", X_train.shape, "| y_train shape:", y_train.shape)


In [None]:


# pull raw prices
df_prices = download_or_load_prices(
    ["AAPL"],
    Path("price_cache.parquet"),
    dt.date(2015, 1, 1),
    dt.date(2024, 7, 1)
)

# turn prices into model‑ready data
X, y, *_ = compute_features(df_prices, market)

X_train, X_test, y_train, y_test = data_prep_and_feature_engineering(
    X,
    y,
    Path("feature_cache.parquet"),   # cache file for the processed set
    dt.date(2015, 1, 1),             # start date  (same as above)
    dt.date(2024, 7, 1)              # end date    (same as above)
)

print("all done!")

In [None]:
# search for best model settings
best_params, best_model = run_grid_search(X_train, y_train)
print("best parameters:", best_params)

# evaluate on the hold‑out set
results = run_backtest(best_model, X_test, y_test)
results.head()

print("finished!")