In [1]:
!jupyter kernelspec list

Available kernels:
  ml-unified    /workspaces/smart_dev/ml-unified/.venv/share/jupyter/kernels/ml-unified
  python3       /workspaces/smart_dev/ml-unified/.venv/share/jupyter/kernels/python3


In [2]:
!python -c "import tensorflow as tf; print(f'TensorFlow GPU devices: {len(tf.config.list_physical_devices(\"GPU\"))}')"

2025-07-11 10:50:59.054344: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1752231059.211158   24725 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1752231059.256907   24725 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1752231059.575926   24725 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1752231059.575996   24725 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1752231059.576004   24725 computation_placer.cc:177] computation placer alr

In [3]:
!python -c "import torch; print(f'PyTorch CUDA available: {torch.cuda.is_available()}')"

PyTorch CUDA available: True


In [6]:
import torch

print('--- PyTorch GPU Test ---')
is_available = torch.cuda.is_available()
print(f'CUDA Available: {is_available}')

if is_available:
    device_count = torch.cuda.device_count()
    print(f'Device Count: {device_count}')
    device_name = torch.cuda.get_device_name(0)
    print(f'Device Name: {device_name}')
    
    try:
        tensor = torch.tensor([1.0, 2.0]).to('cuda')
        print(f'Tensor on GPU: {tensor}')
        print('SUCCESS: PyTorch is utilizing the GPU.')
    except Exception as e:
        print(f'ERROR: PyTorch failed a CUDA operation: {e}')

--- PyTorch GPU Test ---
CUDA Available: False


In [7]:
! uv pip list | grep -E '^(ipykernel|jupyter|yfinance|pandas|numpy|arch|numba|xgboost|shap|opacus|plotly|scipy|pyarrow|duckdb|seaborn|matplotlib|dash|dash-bootstrap-components|tensorflow|tensorflow-probability|keras|GitPython|torch|scikit|faiss)'

[2mUsing Python 3.11.13 environment at: /opt/conda/envs/ml-unified[0m
dash                       3.0.4
dash-bootstrap-components  2.0.3
duckdb                     1.3.1+g2063dda
faiss                      1.9.0
ipykernel                  6.29.5
jupyter                    1.1.1
jupyter-client             8.6.3
jupyter-console            6.6.3
jupyter-core               5.8.1
jupyter-events             0.12.0
jupyter-lsp                2.2.5
jupyter-server             2.16.0
jupyter-server-terminals   0.5.3
jupyterlab                 4.4.4
jupyterlab-pygments        0.3.0
jupyterlab-server          2.27.3
jupyterlab-widgets         3.0.15
keras                      3.10.0
matplotlib                 3.10.3
matplotlib-inline          0.1.7
numba                      0.61.2
numpy                      1.26.4
opacus                     1.5.4
pandas                     2.3.0
plotly                     6.2.0
pyarrow                    19.0.1
scikit-learn               1.7.0
scipy             

In [1]:
from pathlib import Path
from datetime import datetime, timedelta
import pandas as pd
import numpy as np
import yfinance as yf
from xgboost import XGBClassifier
from scipy.signal import find_peaks
import shap
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
import plotly.graph_objs as go
# from plotly.subplots import make_subplots
import duckdb
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)  # Ignore known FutureWarnings (e.g., pandas Int64Index)

# Global DuckDB connection (persists between function calls)
con = duckdb.connect(database=':memory:')
start_date = (datetime.today() - timedelta(days=365*10)).strftime('%Y-%m-%d')
end_date = datetime.today().strftime('%Y-%m-%d')
symbol = 'RYCEY'  # Default ticker for testing

def get_date_range(df, date_col='Date'):
    """Safely get first and last dates from DataFrame"""
    # Get dates from column or index
    dates = df[date_col].values if date_col in df.columns else df.index.values
    
    # Convert to datetime and handle empty cases
    dates = pd.to_datetime(dates)
    if len(dates) == 0:
        return None, None
    
    # Use iloc-style access to avoid index issues
    first_date = dates[0].date()
    last_date = dates[-1].date() if len(dates) > 1 else first_date
    
    return first_date, last_date

def calculate_dominant_periods(df, window_size=60):
    """
    Calculate dominant periods using FFT with bias reduction techniques:
    - Proper windowing (Hamming window)
    - Detrending
    - Advanced peak detection
    - Noise floor consideration
    """
    # Initialize results
    dominant_periods = np.full(len(df), np.nan)
    
    # Create window function
    window = np.hamming(window_size)
    
    # Minimum meaningful period (avoid detecting very high frequencies)
    min_period = 2  # Minimum period of 2 samples
    
    for i in range(window_size, len(df)):
        # Get window data and detrend
        window_data = df['Close'].iloc[i - window_size:i].values
        window_data = window_data - np.mean(window_data)  # Remove DC component
        
        # Apply window function
        windowed_data = window_data * window
        
        # Perform FFT
        fft = np.fft.fft(windowed_data)
        freqs = np.fft.fftfreq(len(fft))
        
        # Only consider positive frequencies
        positive_mask = freqs > 0
        positive_freqs = freqs[positive_mask]
        fft_mag = np.abs(fft[positive_mask])
        
        # Convert to power spectrum (magnitude squared)
        power_spectrum = fft_mag**2
        
        # Find all peaks in the power spectrum
        peaks, properties = find_peaks(power_spectrum, height=0)
        
        if len(peaks) > 0:
            # Calculate noise floor (median of non-peak values)
            noise_floor = np.median(power_spectrum[np.setdiff1d(
                np.arange(len(power_spectrum)), peaks
            )])
            
            # Filter peaks that are significantly above noise floor (3dB threshold)
            significant_peaks = peaks[properties['peak_heights'] > 2*noise_floor]
            
            if len(significant_peaks) > 0:
                # Get top 3 significant peaks by magnitude
                top_peaks = significant_peaks[
                    np.argsort(power_spectrum[significant_peaks])[-3:]
                ]
                
                # Select the peak with highest frequency among top peaks
                # (avoids always selecting the lowest frequency peak)
                selected_peak = top_peaks[
                    np.argmax(positive_freqs[top_peaks])
                ]
                
                dom_freq = positive_freqs[selected_peak]
                dom_period = abs(1/dom_freq)
                
                # Validate period is within reasonable bounds
                if dom_period < min_period or dom_period > window_size:
                    dom_period = np.nan
            else:
                dom_period = np.nan
        else:
            dom_period = np.nan
        
        dominant_periods[i] = dom_period
    
    # Post-processing
    df['Dominant_Period'] = dominant_periods
    
    # Linear interpolation for missing values (better than forward fill)
    df['Dominant_Period'] = df['Dominant_Period'].interpolate(
        method='linear',
        limit_area='inside'  # Only fill between valid values
    )
    
    # Optional: Smooth the final series
    df['Dominant_Period'] = df['Dominant_Period'].rolling(
        window=3,
        center=True,
        min_periods=1
    ).mean()
    
    return df


In [96]:
#SIMPLE DATA PULL
def get_stock_data(ticker: str) -> pd.DataFrame:
    with duckdb.connect() as con:
        if Path(f'data/{ticker}.parquet').exists():
            pass
            print(f"Loading data for {ticker} from local Parquet file.")
            return con.execute(f"SELECT * FROM read_parquet('data/{ticker}.parquet')").df()
        else:
            print(f"Downloading data for {ticker} from Yahoo Finance.")
            df = yf.download(ticker, start=start_date, end=end_date, progress=False)
            df = df[['Open', 'High', 'Low', 'Close', 'Volume']].dropna()
            df.to_parquet(f'data/{ticker}.parquet')
            return df
df = get_stock_data(symbol)  # Example usage to ensure data is downloaded

#type(df)

Loading data for RYCEY from local Parquet file.


In [None]:
def clean_column_names(df, ticker):
    """Flatten multi-index columns and remove ticker from column names"""
    # Convert columns to list if they're in the problematic format
    if any(isinstance(col, tuple) for col in df.columns):
        new_columns = []
        for col in df.columns:
            if isinstance(col, tuple):
                # Keep just the metric name (e.g., 'Close') and drop the ticker
                new_columns.append(col[0])
            else:
                new_columns.append(col)
        df.columns = new_columns
    return df

def get_stock_data(ticker: str, force_download: bool = False) -> duckdb.DuckDBPyRelation:
    """Fetch stock data from cache or Yahoo Finance, storing in DuckDB."""
    Path('data').mkdir(exist_ok=True)
    parquet_path = f'data/{ticker}.parquet'
    
    if not force_download and Path(parquet_path).exists():
        print(f"Loading cached data for {ticker}")
        con.execute(f"CREATE OR REPLACE TABLE {ticker}_data AS SELECT * FROM read_parquet('{parquet_path}')").df()
    else:
        print(f"Downloading fresh data for {ticker}")
        df = yf.download(
            ticker, 
            start=(datetime.today() - timedelta(days=365*10)).strftime('%Y-%m-%d'),
            end=datetime.today().strftime('%Y-%m-%d'),
            progress=False
        )
        
        # Clean the column names
        df = clean_column_names(df, ticker)
        
        # Select our standard columns and reset index
        df = df[['Open', 'High', 'Low', 'Close', 'Volume']].dropna()
        df = df[['Open', 'High', 'Low', 'Close', 'Volume']].reset_index()
        
        # Save to Parquet
        df.to_parquet(parquet_path, index=False)
        
        # Register with DuckDB
        con.execute(f"CREATE OR REPLACE TABLE {ticker}_data AS SELECT * FROM df")
    
    return con.table(f'{ticker}_data').df()

# Usage
if __name__ == "__main__":
    # Get data
    df = get_stock_data("RYCEY")
    
    # Show schema to verify clean column names
    print("Table Schema:")
    print(con.execute("DESCRIBE rycey_data").df())
    
    # Query recent data
    recent_data = con.execute("""
        SELECT 
            MIN(Date) as start_date
            ,MAX(Date) as end_date
            ,COUNT(*) as row_count
            ,AVG(Close) as avg_close
        FROM df
    """).df()
    
    print("\nRecent Data:")
    print(recent_data)
    print("\nColumn Names:", recent_data.columns.tolist())

Loading cached data for RYCEY
Table Schema:
  column_name   column_type null   key default extra
0        Date  TIMESTAMP_NS  YES  None    None  None
1        Open        DOUBLE  YES  None    None  None
2        High        DOUBLE  YES  None    None  None
3         Low        DOUBLE  YES  None    None  None
4       Close        DOUBLE  YES  None    None  None
5      Volume        BIGINT  YES  None    None  None

Recent Data:
  start_date   end_date  row_count  avg_close
0 2015-06-05 2025-05-30       2512   6.510762

Column Names: ['start_date', 'end_date', 'row_count', 'avg_close']


In [4]:
# Technical Indicators & Advanced Metrics

# 2.1 Relative Strength Index (RSI) - 14 day
delta = df['Close'].diff()
gain = delta.clip(lower=0)
loss = -delta.clip(upper=0)
avg_gain = gain.ewm(alpha=1/14, adjust=False).mean()
avg_loss = loss.ewm(alpha=1/14, adjust=False).mean()
rs = avg_gain / avg_loss
df['RSI14'] = 100 - (100 / (1 + rs))

# 2.2 Moving Average Convergence/Divergence (MACD: 12,26 with Signal 9)
ema12 = df['Close'].ewm(span=12, adjust=False).mean()
ema26 = df['Close'].ewm(span=26, adjust=False).mean()
df['MACD_line'] = ema12 - ema26
df['MACD_signal'] = df['MACD_line'].ewm(span=9, adjust=False).mean()
df['MACD_hist'] = df['MACD_line'] - df['MACD_signal']

# 2.3 Average True Range (ATR) - 14 day
high_low = df['High'] - df['Low']
high_prev_close = (df['High'] - df['Close'].shift(1)).abs()
low_prev_close  = (df['Low'] - df['Close'].shift(1)).abs()
true_range = pd.DataFrame({'hl': high_low, 'hc': high_prev_close, 'lc': low_prev_close}).max(axis=1)
df['ATR14'] = true_range.rolling(window=14).mean()

# 2.4 Bollinger Bands (20-day SMA ± 2 STD)
rolling_mean20 = df['Close'].rolling(window=20).mean()
rolling_std20  = df['Close'].rolling(window=20).std()
df['BB_mid']   = rolling_mean20
df['BB_upper'] = rolling_mean20 + 2 * rolling_std20
df['BB_lower'] = rolling_mean20 - 2 * rolling_std20

# 2.5 On-Balance Volume (OBV)
direction = np.sign(df['Close'].diff().fillna(0))
df['OBV'] = (direction * df['Volume']).cumsum()

# 2.6 Stochastic Oscillator %K and %D (14-day)
window = 14
lowest_low  = df['Low'].rolling(window).min()
highest_high = df['High'].rolling(window).max()
df['%K'] = (df['Close'] - lowest_low) / (highest_high - lowest_low + 1e-9) * 100  # +1e-9 to avoid zero division
df['%D'] = df['%K'].rolling(3).mean()

# 2.7 Money Flow Index (MFI) - 14 day
typical_price = (df['High'] + df['Low'] + df['Close']) / 3.0
mf = typical_price * df['Volume']
tp_diff = typical_price.diff()
pos_mf = mf.where(tp_diff > 0, 0.0).rolling(window).sum()
neg_mf = mf.where(tp_diff < 0, 0.0).rolling(window).sum()
df['MFI14'] = 100 - 100 / (1 + pos_mf / (neg_mf + 1e-9))

# 2.8 Commodity Channel Index (CCI) - 20 day
TP20 = typical_price.rolling(20).mean()
MD20 = (typical_price - TP20).abs().rolling(20).mean()  # Mean deviation
df['CCI20'] = (typical_price - TP20) / (0.015 * MD20)

# 2.9 Williams %R (14-day)
df['Williams_%R'] = (highest_high - df['Close']) / (highest_high - lowest_low + 1e-9) * -100

# 2.10 Rate of Change (ROC) - 10 day
df['ROC10'] = df['Close'].pct_change(periods=10) * 100

# 2.11 GARCH(1,1) Volatility Estimate (daily)
returns = df['Close'].pct_change().fillna(0)
# Initialize GARCH parameters (omega, alpha, beta) and variance
var0 = returns.var()
alpha, beta = 0.1, 0.85
omega = var0 * max(0, (1 - alpha - beta))
garch_vars = [var0]
for r in returns.iloc[1:]:
    new_var = omega + alpha * (r**2) + beta * garch_vars[-1]
    garch_vars.append(new_var)
df['GARCH_vol'] = np.sqrt(garch_vars)

# 2.12 Fourier Transform Dominant Period
# Compute the Fourier Transform dominant period for each row using a rolling window
df = calculate_dominant_periods(df, window_size=60)

# 2.13 One-day Return (%) as feature
df['Return1'] = returns * 100

# Drop initial rows with NaN values from rolling calculations
df.dropna(inplace=True)
print(f"After feature engineering: {len(df)} data points, {df.shape[1]} columns (incl. features).")
print(f"{df.Dominant_Period.describe()}\n{df.Close.describe()}")
df.sample(10)  # display last 5 rows of the last 10 feature columns

After feature engineering: 2453 data points, 24 columns (incl. features).
count    2453.000000
mean        4.803999
std         1.820771
min         2.142857
25%         3.537582
50%         4.615385
75%         5.555556
max        16.111111
Name: Dominant_Period, dtype: float64
count    2453.000000
mean        6.387663
std         3.950234
min         0.744062
25%         1.865117
50%         7.301733
75%         9.914142
max        13.825971
Name: Close, dtype: float64


Unnamed: 0,Date,Open,High,Low,Close,Volume,RSI14,MACD_line,MACD_signal,MACD_hist,...,OBV,%K,%D,MFI14,CCI20,Williams_%R,ROC10,GARCH_vol,Dominant_Period,Return1
1834,2022-09-16,0.892875,0.892875,0.863112,0.873033,3351900,35.071058,-0.040442,-0.040748,0.000307,...,-252834700.0,31.24998,35.906841,44.777387,-48.016808,-68.75002,1.149419,0.028509,2.222222,-2.222223
586,2017-10-02,11.15901,11.196551,11.121469,11.187165,15400,52.011253,0.005713,0.014286,-0.008573,...,-2786300.0,60.86966,55.555698,54.848986,17.402721,-39.13034,-0.334453,0.020977,8.888889,0.421234
1905,2022-12-28,1.091292,1.091292,1.061529,1.061529,2766900,53.072743,0.011843,0.018514,-0.006671,...,-231514900.0,36.363692,36.363692,43.815725,-0.382888,-63.636307,-2.727273,0.022624,4.984127,0.0
1943,2023-02-23,1.557571,1.577412,1.517887,1.54765,13195400,76.210061,0.039499,0.030767,0.008733,...,-196738400.0,90.624973,56.134222,62.838888,391.774194,-9.375027,19.999996,0.066844,5.166667,19.999996
789,2018-07-24,12.561286,12.608831,12.52325,12.570795,21300,59.937571,0.23396,0.293385,-0.059425,...,-1890300.0,41.304717,36.047365,58.464115,7.490571,-58.695283,-1.636889,0.019789,4.74359,0.685465
663,2018-01-23,11.476885,11.58062,11.467454,11.58062,23200,61.401362,0.187983,0.103324,0.084659,...,-4298000.0,80.291914,81.751782,44.054249,110.098074,-19.708086,4.067795,0.026592,3.157895,0.244892
854,2018-10-24,10.507358,10.621465,10.212581,10.241109,139100,25.391933,-0.461111,-0.366066,-0.095045,...,-2879800.0,1.515187,4.745237,13.482042,-115.813111,-98.484813,-7.869966,0.025777,2.749907,-2.97297
473,2017-04-21,9.614589,9.679741,9.586667,9.679741,27700,67.278363,0.235941,0.197268,0.038672,...,-6289600.0,92.592719,88.271721,92.166698,101.325597,-7.407281,6.122475,0.021006,5.333333,0.192667
2431,2025-02-04,7.281892,7.321575,7.26205,7.271971,1570500,52.78459,0.040722,0.029939,0.010783,...,31299000.0,67.94873,69.658133,71.542298,46.554276,-32.05127,0.273598,0.022011,10.0,0.68681
139,2015-12-22,7.660381,7.761055,7.632925,7.761055,167900,40.156253,-0.232294,-0.261173,0.028879,...,-6169700.0,29.464223,35.416557,20.01945,-100.147786,-70.535777,-4.611942,0.026134,5.538462,-0.586169


In [5]:
# Define target: 1 if next day's Close is higher than today's, else 0
df['UpNext'] = (df['Close'].shift(-1) > df['Close']).astype(int)
df.dropna(inplace=True)  # drop last row (no target for it)
print("Target 'UpNext':", df['UpNext'].value_counts().to_dict())  # distribution of up/down


Target 'UpNext': {0: 1291, 1: 1162}


In [7]:
df.columns

Index(['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'RSI14', 'MACD_line',
       'MACD_signal', 'MACD_hist', 'ATR14', 'BB_mid', 'BB_upper', 'BB_lower',
       'OBV', '%K', '%D', 'MFI14', 'CCI20', 'Williams_%R', 'ROC10',
       'GARCH_vol', 'Dominant_Period', 'Return1', 'UpNext'],
      dtype='object')

In [8]:
# Train-test split (80% train, 20% test by date order)
train_size = int(0.8 * len(df))
train_df = df.iloc[:train_size].copy()
test_df  = df.iloc[train_size:].copy()
train_start, train_end = get_date_range(train_df, 'Date')
test_start, test_end = get_date_range(test_df, 'Date')

print(f"Training: {train_start} to {train_end} ({len(train_df)} samples)")
print(f"Testing: {test_start} to {test_end} ({len(test_df)} samples)")

# Features for modeling (20+ features we engineered)
feature_cols = [
    'RSI14','MACD_line','MACD_signal','MACD_hist','ATR14',
    'BB_mid','BB_upper','BB_lower','OBV','%K','%D',
    'MFI14','CCI20','Williams_%R','ROC10','GARCH_vol',
    'Dominant_Period','Return1','Close','Volume'
]
X_train = train_df[feature_cols]
y_train = train_df['UpNext'].astype(int)
X_test  = test_df[feature_cols]
y_test  = test_df['UpNext'].astype(int)


Training: 2015-08-28 to 2023-06-14 (1962 samples)
Testing: 2023-06-15 to 2025-05-30 (491 samples)


In [9]:
# 4.1 Train XGBoost model
xgb_model = XGBClassifier(n_estimators=100, random_state=42, eval_metric='logloss')
xgb_model.fit(X_train, y_train)
y_pred = xgb_model.predict(X_test)
accuracy = (y_pred == y_test).mean()
print(f"XGBoost Test Accuracy: {accuracy:.2%}")

# 4.2 SHAP feature importance for XGBoost
explainer = shap.TreeExplainer(xgb_model)
shap_values = explainer.shap_values(X_test)
# If XGBoost classifier returns a list (one per class), take the second element (positive class)
if isinstance(shap_values, list):
    shap_values = shap_values[1]  # shap values for class "1"
# Calculate mean absolute SHAP value for each feature
importance = np.mean(np.abs(shap_values), axis=0)
feature_importance = pd.Series(importance, index=X_test.columns).sort_values(ascending=False)
print("Top 5 features by SHAP importance:")
print(feature_importance.head(5))


XGBoost Test Accuracy: 50.10%
Top 5 features by SHAP importance:
RSI14      0.386637
OBV        0.372639
Return1    0.300512
MFI14      0.245748
%D         0.245400
dtype: float32


In [10]:
# 5.1 Prepare sequence data for Transformer
window_size = 60  # sequence length (days)
X_values = df[feature_cols].values
y_values = df['UpNext'].values.astype(int)

X_seq, y_seq = [], []
for i in range(window_size, len(X_values)):
    # Sequence of features for days [i-window_size ... i-1]
    X_seq.append(X_values[i-window_size:i])
    # Label for sequence = UpNext of day i-1 (predicting day i relative to i-1)
    y_seq.append(y_values[i-1])
X_seq = np.array(X_seq, dtype=np.float32)
y_seq = np.array(y_seq, dtype=np.int64)

# Split sequence data into train and test sets corresponding to original split
# A sequence ending at index j (label index j) belongs to train if j < train_size, else test
train_seq_idx = np.where((np.arange(window_size, len(X_values)) - 1) < train_size)[0]
test_seq_idx  = np.where((np.arange(window_size, len(X_values)) - 1) >= train_size)[0]
X_seq_train = X_seq[train_seq_idx]
y_seq_train = y_seq[train_seq_idx]
X_seq_test  = X_seq[test_seq_idx]
y_seq_test  = y_seq[test_seq_idx]
print(f"Sequences: {X_seq_train.shape[0]} train sequences, {X_seq_test.shape[0]} test sequences.")

# Convert to torch tensors
X_seq_train_t = torch.tensor(X_seq_train)
y_seq_train_t = torch.tensor(y_seq_train)
X_seq_test_t  = torch.tensor(X_seq_test)
y_seq_test_t  = torch.tensor(y_seq_test)


Sequences: 1903 train sequences, 490 test sequences.


In [11]:
# 5.2 Define Transformer model (encoder) for binary classification
class StockTransformer(nn.Module):
    def __init__(self, input_features, d_model=64, nhead=4, num_layers=2, num_classes=2):
        super(StockTransformer, self).__init__()
        self.input_features = input_features
        self.d_model = d_model
        # Feature embedding layer: project input features to d_model dimensions
        self.feature_embed = nn.Linear(input_features, d_model)
        # Transformer Encoder layers
        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        # Final output layer
        self.fc_out = nn.Linear(d_model, num_classes)
    def forward(self, x):
        # x shape: (batch, seq_len, input_features)
        x = x.permute(1, 0, 2)               # -> (seq_len, batch, input_features)
        x = self.feature_embed(x)            # -> (seq_len, batch, d_model)
        x = self.transformer_encoder(x)      # -> (seq_len, batch, d_model)
        out = x[-1, :, :]                    # take the last time step's output: (batch, d_model)
        out = self.fc_out(out)               # -> (batch, num_classes)
        return out

# Initialize model, loss, optimizer
model = StockTransformer(input_features=X_seq_train.shape[2], d_model=64, nhead=4, num_layers=2, num_classes=2)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [12]:
# 5.3 Train the Transformer model
epochs = 5
batch_size = 32
train_dataset = TensorDataset(X_seq_train_t, y_seq_train_t)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

for epoch in range(1, epochs+1):
    model.train()
    total_loss = 0.0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * X_batch.size(0)
    avg_loss = total_loss / len(train_dataset)
    print(f"Epoch {epoch}/{epochs} - Training Loss: {avg_loss:.4f}")

# 5.4 Evaluate on test sequences
model.eval()
with torch.no_grad():
    test_outputs = model(X_seq_test_t)
    test_preds = test_outputs.argmax(dim=1).numpy()
test_accuracy = (test_preds == y_seq_test).mean()
print(f"Transformer Test Accuracy: {test_accuracy:.2%}")


Epoch 1/5 - Training Loss: 0.7312
Epoch 2/5 - Training Loss: 0.7002
Epoch 3/5 - Training Loss: 0.6939
Epoch 4/5 - Training Loss: 0.6961
Epoch 5/5 - Training Loss: 0.6948
Transformer Test Accuracy: 47.55%


In [None]:
# 6.1 Ensemble probabilities (average of XGBoost and Transformer)
xgb_proba = xgb_model.predict_proba(X_test)[:, 1]            # probability of class 1 from XGBoost
trans_proba = F.softmax(test_outputs, dim=1).numpy()[:, 1]   # probability of class 1 from Transformer
# Ensure we align lengths (Transformer test may have one fewer if sequence window covers until second-last day)
min_len = min(len(xgb_proba), len(trans_proba))
ensemble_proba = (xgb_proba[:min_len] + trans_proba[:min_len]) / 2

# 6.2 Compute Kelly fraction for the latest day in test
latest_p = ensemble_proba[-1]   # ensemble probability of up for the most recent day in test set
kelly_fraction = 2 * latest_p - 1
print(f"Latest ensemble 'Up' probability: {latest_p:.2%}")
print(f"Kelly fraction: {kelly_fraction:.2f}")

# 6.3 Fuzzy logic verdict based on Kelly fraction
if kelly_fraction > 0.5:
    verdict = "Strong Buy"
elif kelly_fraction > 0.1:
    verdict = "Buy"
elif kelly_fraction < -0.5:
    verdict = "Strong Sell"
elif kelly_fraction < -0.1:
    verdict = "Sell"
else:
    verdict = "Hold/Neutral"

print("Fuzzy Verdict for the latest day:", verdict)


Latest ensemble 'Up' probability: 25.36%
Kelly fraction: -0.49
Fuzzy Verdict for the latest day: Sell


In [115]:
df.index = pd.to_datetime(df['Date'])

To work with FRED economic indicators using `pandas-datareader`, follow this guide:

### 1. **Finding FRED Indicators**
There's no direct API to list *all* FRED indicators, but here's how to discover them:
```python
import pandas_datareader as pdr
from pandas_datareader.fred import FredReader

# Search FRED website directly
# https://fred.stlouisfed.org/search?st=economic+indicators
```

### 2. **Top 10 Indicators for Stock Analysis**
Here are key economic indicators with their FRED codes:

| Indicator | FRED Code | Frequency | Description |
|-----------|-----------|-----------|-------------|
| **CPI** | `CPIAUCSL` | Monthly | Consumer Price Index |
| **Treasury Spread** | `T10Y2Y` | Daily | 10-Year vs 2-Year Treasury Spread |
| **Unemployment** | `UNRATE` | Monthly | Unemployment Rate |
| **GDP** | `GDP` | Quarterly | Gross Domestic Product |
| **Fed Funds Rate** | `FEDFUNDS` | Daily | Federal Funds Rate |
| **Mortgage Rates** | `MORTGAGE30US` | Weekly | 30-Year Mortgage Rate |
| **Industrial Production** | `INDPRO` | Monthly | Industrial Production Index |
| **Retail Sales** | `RSXFS` | Monthly | Retail Sales |
| **Housing Starts** | `HOUST` | Monthly | Housing Starts |
| **NASDAQ** | `NASDAQCOM` | Daily | NASDAQ Index |

### 5. **Handling Frequencies**
Different indicators have different frequencies (daily, monthly, quarterly). Resample for alignment:
```python
# Convert monthly data to quarterly
data_daily = data.resample('D').last()
```

### 6. **Additional Useful Indicators**
1. `VIXCLS` - CBOE Volatility Index
2. `UMCSENT` - Consumer Sentiment
3. `PPIACO` - Producer Price Index
4. `A191RL1Q225SBEA` - GDP Growth Rate


In [None]:
import os
import asyncio
import uvloop
import aiohttp
import pandas as pd
import numpy as np
from dotenv import load_dotenv

# ─── Setup ───────────────────────────────────────────────────────────────────
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
load_dotenv(dotenv_path='/workspaces/smart_dev/ml-unified/vars.env')

API_KEY    = os.getenv('FRED_API_KEY')
OBS_START  = '2000-01-01'
OBS_URL    = 'https://api.stlouisfed.org/fred/series/observations'

if not API_KEY:
    raise RuntimeError("FRED_API_KEY not set in vars.env")

# ─── Indicators & FRED codes ─────────────────────────────────────────────────
INDICATORS = {
    'CPI':             'CPIAUCSL',
    'TreasurySpread':  'T10Y2Y',
    'Unemployment':    'UNRATE',
    'GDP':             'GDP',
    'FedFunds':        'FEDFUNDS',
    'Mortgage30Yr':    'MORTGAGE30US',
    'IndustrialProd':  'INDPRO',
    'RetailSales':     'RSXFS',
    'HousingStarts':   'HOUST',
    'NASDAQ':          'NASDAQCOM',
    'ConsumerSenti':   'UMCSENT',
    'ProdPriceIdx':    'PPIACO',
}

# ─── Fetch one series of observations ────────────────────────────────────────
async def fetch_series(session, name, code):
    params = {
        'series_id':         code,
        'api_key':           API_KEY,
        'file_type':         'json',
        'observation_start': OBS_START
    }
    async with session.get(OBS_URL, params=params) as resp:
        resp.raise_for_status()
        payload = await resp.json()

    dates  = [obs['date'] for obs in payload['observations']]
    values = [
        np.nan if obs['value'] == '.' else float(obs['value'])
        for obs in payload['observations']
    ]
    return pd.Series(values,
                     index=pd.to_datetime(dates),
                     name=name)

# ─── Main: fetch all, check, merge, reindex, flag, save ─────────────────────
async def main():
    connector = aiohttp.TCPConnector(limit=0)
    timeout   = aiohttp.ClientTimeout(total=15)

    async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
        tasks = [fetch_series(session, nm, cd) for nm, cd in INDICATORS.items()]
        series_list = await asyncio.gather(*tasks)

    # 1) Concat & sort
    df = pd.concat(series_list, axis=1).sort_index()
    print("\nRaw data sample (last 5 rows before reindex):")
    print(df.tail())

    # 2) Determine full daily index
    full_idx = pd.date_range(df.index.min(), df.index.max(), freq='D')

    # 3) Reindex + forward-fill
    df_daily = df.reindex(full_idx).ffill()

    print("\nAfter reindex+ffill (last 10 rows):")
    print(df_daily.tail(10)[['CPI','Unemployment','GDP','NASDAQ','TreasurySpread']])

    # 4) Debug: verify forward-fill at a known date
    sample = pd.Timestamp('2025-07-02')
    print(f"\nValue on {sample.date()}:")
    for col in ['CPI','Unemployment','GDP']:
        print(f"  {col}: raw last obs @ {df[col].last_valid_index()} -> "
              f"daily[{sample.date()}]={df_daily.at[sample, col]}")

    # 5) Build flags
    obs_dates = {s.name: set(s.index.date) for s in series_list}
    idx_dates = pd.Series(df_daily.index.date, index=df_daily.index)
    flags = {
        f"{name}_is_update": idx_dates.isin(obs_dates[name])
        for name in INDICATORS
    }
    df_flags = pd.DataFrame(flags, index=df_daily.index)

    # 6) Merge & save
    df_final = pd.concat([df_daily, df_flags], axis=1)
    df_final.to_parquet('macro_indicators_daily.parquet')

    print(f"\nSaved {len(df_daily)} days × {df_final.shape[1]} columns "
          "to macro_indicators_daily.parquet")

if __name__ == '__main__':
    # asyncio.run(main())
    await main()  # For Jupyter compatibility, use await directly

RuntimeError: asyncio.run() cannot be called from a running event loop

In [22]:
# df_test = pd.read_parquet('macro_indicators_daily.parquet')
# print(df_test.tail(10))  # Display a sample of the fetched macro indicators data
# 1. Sort & resample
df = df_test.sort_index()
df_daily = df.resample('D').ffill()

# 2. Check around the end of June → early July
print(df_daily['2025-06-20':'2025-07-15'][['CPI','Unemployment','GDP']].head(10))

# 3. Confirm last-valid indices
for name in ['CPI','Unemployment','GDP']:
    print(f"{name} last raw obs: {df[name].last_valid_index()}")
    print(f"{name} value on 2025-07-02: {df_daily.at[pd.Timestamp('2025-07-02'),'CPI']}")



            CPI  Unemployment  GDP
2025-06-20  NaN           NaN  NaN
2025-06-21  NaN           NaN  NaN
2025-06-22  NaN           NaN  NaN
2025-06-23  NaN           NaN  NaN
2025-06-24  NaN           NaN  NaN
2025-06-25  NaN           NaN  NaN
2025-06-26  NaN           NaN  NaN
2025-06-27  NaN           NaN  NaN
2025-06-28  NaN           NaN  NaN
2025-06-29  NaN           NaN  NaN
CPI last raw obs: 2025-05-01 00:00:00
CPI value on 2025-07-02: nan
Unemployment last raw obs: 2025-06-01 00:00:00
Unemployment value on 2025-07-02: nan
GDP last raw obs: 2025-01-01 00:00:00
GDP value on 2025-07-02: nan


In [4]:
import os
import asyncio
import uvloop
import aiohttp

# ─── Setup uvloop as the event loop for maximal performance ─────────────────
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())

# ─── Configuration ─────────────────────────────────────────────────────────
API_KEY   = os.getenv('FRED_API_KEY')
BASE_URL  = 'https://api.stlouisfed.org/fred'
TIMEOUT   = aiohttp.ClientTimeout(total=5)   # total timeout for the single call

if not API_KEY:
    raise RuntimeError("FRED_API_KEY not set in environment")

# ─── Main coroutine ────────────────────────────────────────────────────────
async def main(search_term: str):
    # Use an unlimited connector so aiohttp can fully parallelize if you ever fan-out
    connector = aiohttp.TCPConnector(limit=0)

    # Single HTTP call to fetch titles & IDs in one go
    params = {
        'search_text': search_term,
        'api_key':     API_KEY,
        'file_type':   'json'
    }

    async with aiohttp.ClientSession(connector=connector, timeout=TIMEOUT) as session:
        async with session.get(f'{BASE_URL}/series/search', params=params) as resp:
            resp.raise_for_status()
            data = await resp.json()

    series = data.get('seriess', [])
    for s in series:
        print(f"{s['title']}: {s['id']}")

    print(f"\nTotal series found for '{search_term}': {len(series)}")

In [14]:
# ─── Entrypoint ─────────────────────────────────────────────────────────────
if __name__ == '__main__':
    term = 'finance'
    # asyncio.run(main(term))
    await main(term)  # Use await directly in Jupyter or interactive environments

Secured Overnight Financing Rate: SOFR
30-Day Average SOFR: SOFR30DAYAVG
Finance Rate on Consumer Installment Loans at Commercial Banks, New Autos 48 Month Loan: TERMCBAUTO48NS
Finance Rate on Consumer Installment Loans at Commercial Banks, New Autos 60 Month Loan: RIFLPBCIANM60NM
Average Amount Financed for New Car Loans at Finance Companies: DTCTLVENANM
Average Amount Financed for New Car Loans at Finance Companies (DISCONTINUED): DTCTLVENANQ
90-Day Average SOFR: SOFR90DAYAVG
Finance Rate on Personal Loans at Commercial Banks, 24 Month Loan: TERMCBPER24NS
Average Finance Rate of Used Car Loans at Finance Companies, Amount of Finance Weighted (DISCONTINUED): RIELPCFAUNQ
SOFR Index: SOFRINDEX
Secured Overnight Financing Volume: SOFRVOL
Finance Rate on Consumer Installment Loans at Commercial Banks, New Autos 72 Month Loan: RIFLPBCIANM72NM
Value Added by Industry: Finance, Insurance, Real Estate, Rental, and Leasing: Finance and Insurance as a Percentage of GDP: VAPGDPFI
Average Finance

In [8]:
import os
from dotenv import load_dotenv

# point to your vars.env file
load_dotenv(dotenv_path='/workspaces/smart_dev/ml-unified/vars.env')

api_key = os.getenv('FRED_API_KEY')
if not api_key:
    raise RuntimeError("FRED_API_KEY not set in vars.env")
print(f"FRED API Key: {api_key[:4]}... (truncated for security)")

FRED API Key: e8d2... (truncated for security)


In [13]:
import requests

api_key = 'e8d2479cd2afdf52f8216bb44daffe9f'  # Get at: research.stlouisfed.org/docs/api/api_key.html
search_term = 'finance'
url = f'https://api.stlouisfed.org/fred/series/search?search_text={search_term}&api_key={api_key}&file_type=json'

counter = 0
response = requests.get(url).json()
for series in response['seriess']:
    print(f"{series['title']}: {series['id']}")
    counter += 1

print(f"Total series found for '{search_term}': {counter}")

Secured Overnight Financing Rate: SOFR
30-Day Average SOFR: SOFR30DAYAVG
Finance Rate on Consumer Installment Loans at Commercial Banks, New Autos 48 Month Loan: TERMCBAUTO48NS
Finance Rate on Consumer Installment Loans at Commercial Banks, New Autos 60 Month Loan: RIFLPBCIANM60NM
Average Amount Financed for New Car Loans at Finance Companies: DTCTLVENANM
Average Amount Financed for New Car Loans at Finance Companies (DISCONTINUED): DTCTLVENANQ
90-Day Average SOFR: SOFR90DAYAVG
Finance Rate on Personal Loans at Commercial Banks, 24 Month Loan: TERMCBPER24NS
Average Finance Rate of Used Car Loans at Finance Companies, Amount of Finance Weighted (DISCONTINUED): RIELPCFAUNQ
SOFR Index: SOFRINDEX
Secured Overnight Financing Volume: SOFRVOL
Finance Rate on Consumer Installment Loans at Commercial Banks, New Autos 72 Month Loan: RIFLPBCIANM72NM
Value Added by Industry: Finance, Insurance, Real Estate, Rental, and Leasing: Finance and Insurance as a Percentage of GDP: VAPGDPFI
Average Finance

In [7]:
import pandas as pd
import duckdb
import json
from pathlib import Path

def load_and_save_results(db_path="/workspaces/smart_dev/projects/Notebooks/Stocks/flint/results/audit_log.duckdb", output_csv="audit_log_results.csv"):
    """
    Loads all analysis results from the DuckDB database, saves them to a CSV,
    and returns them as a pandas DataFrame.

    Args:
        db_path (str): The path to the DuckDB database file.
        output_csv (str): The filename for the output CSV file.

    Returns:
        pandas.DataFrame: A DataFrame containing all analysis results,
                          or an empty DataFrame if an error occurs.
    """
    db_file = Path(db_path)
    if not db_file.exists():
        print(f"Error: Database file not found at '{db_path}'")
        return pd.DataFrame()

    print(f"Connecting to '{db_path}'...")
    with duckdb.connect(database=str(db_file), read_only=True) as con:
        try:
            # Fetch the entire table directly into a pandas DataFrame
            df = con.execute("SELECT * FROM analysis_results").fetchdf()
            print(f"Successfully loaded {len(df)} records.")
        except duckdb.Error as e:
            print(f"An error occurred while querying the database: {e}")
            return pd.DataFrame()

    if df.empty:
        print("The 'analysis_results' table is empty.")
        return df

    # Save the full, raw DataFrame to a CSV file
    df.to_csv(output_csv, index=False)
    print(f"All {len(df)} records have been saved to '{output_csv}'.")
    
    return df

# --- Usage in your Jupyter Notebook cell ---

# 1. Call the function to get your data
results_df = load_and_save_results()

# 2. Display the first few rows of the main DataFrame
#    In Jupyter, this will automatically render as a nice HTML table.
results_df

Connecting to '/workspaces/smart_dev/projects/Notebooks/Stocks/flint/results/audit_log.duckdb'...
Successfully loaded 13 records.
All 13 records have been saved to 'audit_log_results.csv'.


Unnamed: 0,run_id,ticker,execution_timestamp,model_name,git_hash,run_config,predictions,metrics,shap_importance
0,1751732807225363968,RYCEY,2025-07-05 16:26:47.225379,Ensemble_v1,b30175110da76bb385640f9872c173302a06fd84,"{""XGBoost"": {""n_estimators"": 100, ""random_stat...","{""probabilities"": [0.7497308254241943, 0.59436...","{""accuracy"": 0.5645161290322581, ""kelly_fracti...","{""features"": [""RSI14"", ""MACD_line"", ""MACD_sign..."
1,1751817325451127040,RYCEY,2025-07-06 15:55:25.451139,Ensemble_v2,0b0fe2ac9b86f4c401a367f576935813e735955e,"{""XGBoost"": {""n_estimators"": 100, ""random_stat...","{""probabilities"": [0.6810305714607239, 0.70815...","{""accuracy"": 0.7027649769585254, ""kelly_fracti...","{""features"": [""RSI14"", ""MACD_line"", ""MACD_sign..."
2,1751819020634740992,RYCEY,2025-07-06 16:23:40.634752,Ensemble_v2,0b0fe2ac9b86f4c401a367f576935813e735955e,"{""XGBoost"": {""n_estimators"": 100, ""random_stat...","{""probabilities"": [0.6794436573982239, 0.70998...","{""accuracy"": 0.6935483870967742, ""kelly_fracti...","{""features"": [""RSI14"", ""MACD_line"", ""MACD_sign..."
3,1751819696203219968,RYCEY,2025-07-06 16:34:56.203232,Ensemble_v2,0b0fe2ac9b86f4c401a367f576935813e735955e,"{""XGBoost"": {""n_estimators"": 100, ""random_stat...","{""probabilities"": [0.7475588917732239, 0.77200...","{""accuracy"": 0.6889400921658986, ""kelly_fracti...","{""features"": [""RSI14"", ""MACD_line"", ""MACD_sign..."
4,1751819958613993984,RYCEY,2025-07-06 16:39:18.614006,Ensemble_v2,0b0fe2ac9b86f4c401a367f576935813e735955e,"{""XGBoost"": {""n_estimators"": 100, ""random_stat...","{""probabilities"": [0.7443850636482239, 0.75662...","{""accuracy"": 0.6935483870967742, ""kelly_fracti...","{""features"": [""RSI14"", ""MACD_line"", ""MACD_sign..."
5,1752008569946716928,TSLA,2025-07-08 21:02:49.946792,Ensemble_v2,0b0fe2ac9b86f4c401a367f576935813e735955e,"{""XGBoost"": {""n_estimators"": 100, ""random_stat...","{""probabilities"": [0.37130093574523926, 0.1937...","{""accuracy"": 0.6296296296296297, ""kelly_fracti...","{""features"": [""RSI14"", ""MACD_line"", ""MACD_sign..."
6,1752008608955315968,RYCEY,2025-07-08 21:03:28.955327,Ensemble_v2,0b0fe2ac9b86f4c401a367f576935813e735955e,"{""XGBoost"": {""n_estimators"": 100, ""random_stat...","{""probabilities"": [0.5254659056663513, 0.66641...","{""accuracy"": 0.7274826789838337, ""kelly_fracti...","{""features"": [""RSI14"", ""MACD_line"", ""MACD_sign..."
7,1752008653498006016,AMC,2025-07-08 21:04:13.498027,Ensemble_v2,0b0fe2ac9b86f4c401a367f576935813e735955e,"{""XGBoost"": {""n_estimators"": 100, ""random_stat...","{""probabilities"": [0.8370150327682495, 0.85433...","{""accuracy"": 0.48842592592592593, ""kelly_fract...","{""features"": [""RSI14"", ""MACD_line"", ""MACD_sign..."
8,1752011003934937856,AMC,2025-07-08 21:43:23.934957,Ensemble_v2,0b0fe2ac9b86f4c401a367f576935813e735955e,"{""XGBoost"": {""n_estimators"": 100, ""random_stat...","{""probabilities"": [0.8538607358932495, 0.84530...","{""accuracy"": 0.5208333333333334, ""kelly_fracti...","{""features"": [""RSI14"", ""MACD_line"", ""MACD_sign..."
9,1752067112966824960,SABR,2025-07-09 13:18:32.966836,Ensemble_v2,2eb86b3fd531b8425826069c6802eaffcf237ff4,"{""XGBoost"": {""n_estimators"": 100, ""random_stat...","{""probabilities"": [0.6468956470489502, 0.67085...","{""accuracy"": 0.5879629629629629, ""kelly_fracti...","{""features"": [""RSI14"", ""MACD_line"", ""MACD_sign..."


In [6]:
import yfinance as yf
df = yf.Ticker('RYCEY').info
df

{'address1': 'Kings Place',
 'address2': '90 York Way',
 'city': 'London',
 'zip': 'N1 9FX',
 'country': 'United Kingdom',
 'phone': '44 1332 242 424',
 'website': 'https://www.rolls-royce.com',
 'industry': 'Aerospace & Defense',
 'industryKey': 'aerospace-defense',
 'industryDisp': 'Aerospace & Defense',
 'sector': 'Industrials',
 'sectorKey': 'industrials',
 'sectorDisp': 'Industrials',
 'longBusinessSummary': 'Rolls-Royce Holdings plc develops and delivers mission-critical power systems in the United Kingdom and internationally. The company operates through four segments: Civil Aerospace, Defence, Power Systems, and New Markets. The Civil Aerospace segment develops, manufactures, markets, and sells aero engines for large commercial aircraft, regional jet, and business aviation markets, as well as provides aftermarket services. The Defence segment is involved in the development, manufacture, marketing, and sale of military aero engines, naval engines, and submarine nuclear power pla

In [12]:
from IPython.display import display, HTML

def inspect_run_details(df, index):
    """
    Selects a single run by its index and displays its nested JSON columns
    (metrics, predictions, shap_importance) in a readable format.

    Args:
        df (pandas.DataFrame): The DataFrame containing all results.
        index (int): The integer index of the row you want to inspect.
    """
    if index not in df.index:
        print(f"Error: Index {index} is out of bounds. Please choose an index between 0 and {len(df)-1}.")
        return

    # Select the specific run (row) you want to inspect
    run_data = df.loc[index]
    
    print("-" * 50)
    display(HTML(f"<h3>Inspecting Run for Ticker: {run_data['ticker']} (Index: {index})</h3>"))
    print(f"Timestamp: {run_data['execution_timestamp']}")
    print("-" * 50)

    # --- 1. Unpack and display METRICS ---
    display(HTML("<h4>Metrics</h4>"))
    try:
        metrics_dict = json.loads(run_data['metrics'])
        metrics_series = pd.Series(metrics_dict, name="Value")
        display(metrics_series.to_frame())
    except (json.JSONDecodeError, TypeError):
        print("Could not parse the 'metrics' column.")

    # --- 2. Unpack and display PREDICTIONS ---
    display(HTML("<h4>Predictions (first 20 probabilities)</h4>"))
    try:
        predictions_dict = json.loads(run_data['predictions'])
        # The probabilities are a list inside the dictionary
        if 'probabilities' in predictions_dict:
            predictions_df = pd.DataFrame(predictions_dict['probabilities'], columns=['probability'])
            display(predictions_df.head(20))
        else:
            print("'probabilities' key not found in predictions JSON.")
    except (json.JSONDecodeError, TypeError):
        print("Could not parse the 'predictions' column.")

    # --- 3. Unpack and display SHAP IMPORTANCE ---
    # This assumes 'shap_importance' and 'feature_names' are in the 'run_config' JSON
    display(HTML("<h4>Feature Importance (SHAP)</h4>"))
    try:
        # The data is in the 'shap_importance' column
        shap_dict = json.loads(run_data['shap_importance'])
        
        if 'values' in shap_dict and 'features' in shap_dict:
            shap_series = pd.Series(
                shap_dict['values'], 
                index=shap_dict['features'],
                name="Mean SHAP Value"
            )
            # Sort for better readability
            display(shap_series.sort_values(ascending=False).to_frame())
        else:
            print("'values' or 'features' keys not found in 'shap_importance' JSON.")

    except (json.JSONDecodeError, TypeError):
        print("Could not parse the 'shap_importance' column. It might be malformed or empty.")
        print("Raw content:", run_data['shap_importance'])


# --- Usage in your Jupyter Notebook cell ---

# Assuming 'results_df' is the DataFrame from Part 1

# Inspect the details of the first run (index 0)
# inspect_run_details(results_df, 0)  # Inspect the last run by default

# Inspect the details of another run (e.g., index 5)
inspect_run_details(results_df, len(results_df) - 1)

--------------------------------------------------


Timestamp: 2025-07-08 21:43:23.934957
--------------------------------------------------


Unnamed: 0,Value
accuracy,0.520833
kelly_fraction,-0.179866
up_prob,0.281327
down_prob,0.718673
trend_strength,-0.245035
ci,"[-0.13128162274837252, 0.18427183735271793]"
n_sims,814
simulated_slopes,"[-0.12401038314700369, 0.006282805975799272, -..."


Unnamed: 0,probability
0,0.853861
1,0.8453
2,0.809569
3,0.763304
4,0.518225
5,0.714415
6,0.751109
7,0.739759
8,0.665142
9,0.672605


Unnamed: 0,Mean SHAP Value
ATR14,0.824338
BB_upper,0.820164
BB_mid,0.751472
BB_lower,0.56748
CCI20,0.471244
Volume,0.444994
MACD_hist,0.406409
MACD_line,0.360028
OBV,0.346973
%K,0.300573


In [None]:
import requests

# replace the "demo" apikey below with your own key from https://www.alphavantage.co/support/#api-key
url = 'https://www.alphavantage.co/query?function=NEWS_SENTIMENT&tickers=AAPL&apikey=demo'
r = requests.get(url)
data = r.json()

print(data)



In [None]:
import torch
import pandas as pd
# Load the file
pt_file = torch.load("/workspaces/smart_dev/projects/Notebooks/Stocks/flint/data/processed/DPST_data.pt", weights_only=False)

# Print the head of the file
display(pt_file['df_features'].tail())

pd.read_parquet("/workspaces/smart_dev/projects/Notebooks/Stocks/flint/data/DPST.parquet").tail()

Unnamed: 0,Date,Close,High,Low,Open,Volume,Return1,RSI14,MACD_line,MACD_signal,...,ConsumerSenti_InEventWindow,ProdPriceIdx_IsUpdateDay,ProdPriceIdx_DaysSinceUpdate,ProdPriceIdx_InEventWindow,Corr_Stock_FedFunds_60D,Corr_Stock_CPI_60D,TreasurySpread_RealVol_21D,Real_FedFunds,Stock_vs_GDP_Ratio,CPI_ROC_3M
2484,2025-07-09,103.290001,106.139999,101.610001,105.589996,788500,-0.019356,71.623382,6.751259,4.454234,...,0,0,69,0,0.091916,-0.080851,0.988861,1.799751,0.003447,0.368068
2485,2025-07-10,105.309998,107.625,102.370003,103.0,658700,1.955656,73.09467,7.124479,4.988283,...,0,0,70,0,0.091916,-0.082767,0.988758,1.799751,0.003515,0.368068
2486,2025-07-11,102.0,104.309998,101.300003,103.5,633900,-3.143099,66.967464,7.071651,5.404957,...,0,0,71,0,0.091916,-0.077573,1.010851,1.799751,0.003404,0.368068
2487,2025-07-14,105.370003,105.580002,101.349998,101.599998,630800,3.303924,69.747939,7.218506,5.767667,...,0,0,74,0,0.091916,-0.076801,1.010122,1.799751,0.003517,0.368068
2488,2025-07-15,94.599998,105.442001,94.106003,105.0,1573400,-10.221129,54.080777,6.392155,5.892564,...,0,0,75,0,0.091916,-0.067292,1.009503,1.799751,0.003157,0.368068


Unnamed: 0,Date,Close,High,Low,Open,Volume
2484,2025-07-09,103.290001,106.139999,101.610001,105.589996,788500
2485,2025-07-10,105.309998,107.625,102.370003,103.0,658700
2486,2025-07-11,102.0,104.309998,101.300003,103.5,633900
2487,2025-07-14,105.370003,105.580002,101.349998,101.599998,630800
2488,2025-07-15,94.599998,105.442001,94.106003,105.0,1573400


In [13]:
import os
from pathlib import Path
from typing import Union, List, Tuple

def convert_files_to_text(
    root_dir: str,
    output_dir: str,
    include_subdirs: Union[bool, List[str]] = True,
    exclude_dirs: List[str] = None,
    extensions: Tuple[str, ...] = ('.py', '.yml', '.toml'),
    exclude_files: List[str] = None
):
    """
    Convert files to text with flexible directory processing options.
    
    Args:
        root_dir: Directory containing source files
        output_dir: Where to save text versions
        include_subdirs: True=all, False=none, List=specific subdirs + root
        exclude_dirs: Directories to always exclude
        extensions: File extensions to process
        exclude_files: Specific filenames to exclude (including extensions)
    """
    # Create output directory if needed
    Path(output_dir).mkdir(parents=True, exist_ok=True)
    
    # Default exclude directories and files
    if exclude_dirs is None:
        exclude_dirs = [".venv", ".vscode", ".git", "__pycache__"]
    if exclude_files is None:
        exclude_files = [".python-version", "*.ipynb", "*.md"]
    if extensions is None:
        extensions = '*'
    
    # Prepare files to process
    files_to_process = []
    
    # Always include root directory files
    files_to_process.extend(Path(root_dir).glob('*'))
    
    # Add subdirectories if specified
    if isinstance(include_subdirs, list):
        for subdir in include_subdirs:
            subdir_path = Path(root_dir) / subdir
            if subdir_path.exists():
                files_to_process.extend(subdir_path.rglob('*'))
    elif include_subdirs is True:
        # Include all subdirectories recursively
        files_to_process.extend(Path(root_dir).rglob('*'))
    
    # Track output filenames to prevent overwrites
    output_files = set()
    
    # NEW: Handle wildcard extension logic
    process_all_files = False
    if extensions == '*' or extensions == ('*',) or extensions is None:
        process_all_files = True
    
    for filepath in files_to_process:
        # Skip directories and excluded files
        if (not filepath.is_file() or 
            any(excluded in filepath.parts for excluded in exclude_dirs) or
            filepath.name in exclude_files):
            continue
            
        # MODIFIED: Extension check logic
        if process_all_files or filepath.suffix.lower() in extensions:
            # Get relative path to maintain structure
            rel_path = filepath.relative_to(root_dir)
            
            # Create unique output filename that preserves original extension
            output_filename = f"{rel_path.stem}_{rel_path.suffix[1:]}.txt"
            output_path = Path(output_dir) / rel_path.with_name(output_filename)
            
            # Ensure we don't overwrite files
            if output_path in output_files:
                print(f"Warning: Skipping potential overwrite of {output_path}")
                continue
                
            output_files.add(output_path)
            
            # Ensure parent directory exists
            output_path.parent.mkdir(parents=True, exist_ok=True)
            
            # Read and write the file
            try:
                with open(filepath, 'r', encoding='utf-8') as infile, \
                     open(output_path, 'w', encoding='utf-8') as outfile:
                    outfile.write(infile.read())
                
                print(f"Converted: {filepath} → {output_path}")
            except Exception as e:
                print(f"Error processing {filepath}: {str(e)}")


# Example usage
if __name__ == "__main__":
    convert_files_to_text(
        root_dir="/workspaces/smart_dev/projects/Notebooks/Stocks",
        output_dir="/workspaces/smart_dev/projects/Notebooks/Stocks/text_output",
        include_subdirs=['flint'],
        extensions=None,
        exclude_dirs=[".venv", ".vscode", ".git", "__pycache__", ".ruff_cache", "data", "results", "ml-unified"],
        exclude_files=[".python-version", "local_settings.py", "stock_algo.ipynb", "ruff_check.log", "README.md"]  # Additional excludes
    )

Converted: /workspaces/smart_dev/projects/Notebooks/Stocks/audit_log_results.csv → /workspaces/smart_dev/projects/Notebooks/Stocks/text_output/audit_log_results_csv.txt
Converted: /workspaces/smart_dev/projects/Notebooks/Stocks/audit_log.txt → /workspaces/smart_dev/projects/Notebooks/Stocks/text_output/audit_log_txt.txt
Converted: /workspaces/smart_dev/projects/Notebooks/Stocks/predictors.py → /workspaces/smart_dev/projects/Notebooks/Stocks/text_output/predictors_py.txt
Converted: /workspaces/smart_dev/projects/Notebooks/Stocks/main.py → /workspaces/smart_dev/projects/Notebooks/Stocks/text_output/main_py.txt
Converted: /workspaces/smart_dev/projects/Notebooks/Stocks/flint/validate.py → /workspaces/smart_dev/projects/Notebooks/Stocks/text_output/flint/validate_py.txt
Converted: /workspaces/smart_dev/projects/Notebooks/Stocks/flint/run.py → /workspaces/smart_dev/projects/Notebooks/Stocks/text_output/flint/run_py.txt
Converted: /workspaces/smart_dev/projects/Notebooks/Stocks/flint/audit_l

In [None]:
# fig = make_subplots(rows=1, cols=2)
# 7.1 Price chart with predicted buy/sell signals
fig1 = go.Figure()
# fig1.add_trace(go.Scatter(x=df.index, y=df['Close'], mode='lines', name='Close Price'))
fig1.add_trace(go.Scatter(x=df.index, y=df['Close'], name="Signal", mode="lines"))
# Mark signals in the test period
test_dates = test_df.index  # dates corresponding to X_test
# Align Transformer's test predictions to actual dates (account for window offset)
signal_start_idx = train_df.index[-1]  # last train date
signal_dates = df.index[train_size:]   # dates from start of test
signals = pd.Series(test_preds, index=signal_dates[:len(test_preds)])
# Plot buy signals (predicted up) and sell signals (predicted down)
buy_signals = signals[signals == 1].index
sell_signals = signals[signals == 0].index
fig1.add_trace(go.Scatter(x=buy_signals, y=df.loc[buy_signals, 'Close'], 
                          mode='markers', marker_symbol='triangle-up', 
                          marker_color='green', marker_size=10, name='Predicted Buy'))
fig1.add_trace(go.Scatter(x=sell_signals, y=df.loc[sell_signals, 'Close'], 
                          mode='markers', marker_symbol='triangle-down', 
                          marker_color='red', marker_size=10, name='Predicted Sell'))
fig1.update_layout(title=f"{symbol} Price with Model Signals", 
                   yaxis_title="Price", xaxis_title="Date")

# 7.2 Feature importance bar chart (top 10 features from SHAP)
top_features = feature_importance.head(10)[::-1]  # reverse for plotting (smallest to largest)
fig2 = go.Figure(go.Bar(x=top_features.values, y=top_features.index, orientation='h', marker_color='blue'))
fig2.update_layout(title="Top 10 Feature Importances (SHAP)", xaxis_title="Mean |SHAP|", yaxis_title="Feature")

# 7.3 Display the interactive charts
fig1.show()
fig2.show()


In [1]:
%%bash
echo "uv run run.py TSLA RYCEY AMC"
uv run /workspaces/smart_dev/projects/Notebooks/Stocks/flint/run.py TSLA > /workspaces/smart_dev/projects/Notebooks/Stocks/flint/run_results.txt 2>&1

uv run run.py TSLA RYCEY AMC


CalledProcessError: Command 'b'echo "uv run run.py TSLA RYCEY AMC"\nuv run /workspaces/smart_dev/projects/Notebooks/Stocks/flint/run.py TSLA > /workspaces/smart_dev/projects/Notebooks/Stocks/flint/run_results.txt 2>&1\n'' returned non-zero exit status 1.

In [18]:
import yfinance as yf
from datetime import date
import pandas as pd
from IPython.display import display, HTML, Markdown

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 8)

def section_header(title):
    display(Markdown(f"## {title}"))
    
def sub_header(title):
    display(Markdown(f"**{title}**"))

# ========================
# OHLCV_VARIANTS
# ========================
section_header("OHLCV Variants")

sub_header("1. Intraday 1-minute data (TSLA)")
display(yf.download("TSLA", period="7d", interval="1h", prepost=True).tail(3))

sub_header("2. Corporate action-adjusted data (AAPL)")
display(yf.download("AAPL", start="2023-01-01", auto_adjust=True, actions=True).tail(3))

sub_header("3. Continuous futures (ES=F)")
display(yf.download("ES=F", period="1y", interval="1d", auto_adjust=False, back_adjust=True).tail(3))

sub_header("4. Repaired prices (BA)")
display(yf.download("BA", start="2020-01-01", end="2020-05-31", interval="1d", repair=True))

# ========================
# FUNDAMENTALS
# ========================
section_header("Fundamentals: MSFT")
tkr = yf.Ticker("MSFT")

fundamentals = pd.Series({
    "Forward P/E": tkr.info.get('forwardPE', 'N/A'),
    "Beta": tkr.info.get('beta', 'N/A'),
    "Trailing EPS": tkr.info.get('trailingEps', 'N/A'),
    "Dividend Yield": tkr.info.get('dividendYield', 'N/A'),
    "Float Shares": tkr.info.get('floatShares', 'N/A'),
    "Short % of Float": tkr.info.get('shortPercentOfFloat', 'N/A'),
    "Institutional Ownership": tkr.info.get('heldPercentInstitutions', 'N/A')
})
display(fundamentals.to_frame(name="Value"))

sub_header("Earnings Dates")
display(tkr.earnings_dates.tail(3))

sub_header("Recent Dividends")
display(tkr.dividends.tail(3).to_frame(name="Dividend"))

sub_header("Major Holders")
display(tkr.major_holders)

# ========================
# OPTIONS_CHAINS
# ========================
section_header("Options Chains: MSFT")

try:
    exp_date = tkr.options[0]
    opt = tkr.option_chain(exp_date)
    
    sub_header(f"Calls for {exp_date}")
    display(opt.calls[['strike', 'openInterest', 'impliedVolatility']].head(3))
    
    sub_header(f"Puts for {exp_date}")
    puts_columns = ['strike', 'inTheMoney', 'openInterest', 'impliedVolatility']
    if 'delta' in opt.puts.columns:
        puts_columns.append('delta')
    display(opt.puts[puts_columns].head(3))
except IndexError:
    display(HTML("<i>No options data available</i>"))


## OHLCV Variants

**1. Intraday 1-minute data (TSLA)**

  display(yf.download("TSLA", period="7d", interval="1h", prepost=True).tail(3))
[*********************100%***********************]  1 of 1 completed


Price,Close,High,Low,Open,Volume
Ticker,TSLA,TSLA,TSLA,TSLA,TSLA
Datetime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2025-07-08 14:30:00+00:00,301.915009,304.049408,298.860199,298.950012,19387942
2025-07-08 15:30:00+00:00,302.25,302.98999,300.820007,301.909912,8552507
2025-07-08 16:30:00+00:00,301.190094,302.399994,300.730011,302.26001,3134659


**2. Corporate action-adjusted data (AAPL)**

[*********************100%***********************]  1 of 1 completed


Price,Close,Dividends,High,Low,Open,Stock Splits,Volume
Ticker,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
2023-01-27,144.063873,0.0,145.347252,141.250327,141.329306,0.0,70555800
2023-01-30,141.17131,0.0,143.688704,141.023235,143.106252,0.0,64015300
2023-01-31,142.444824,0.0,142.494188,140.460533,140.875161,0.0,65874500


**3. Continuous futures (ES=F)**

[*********************100%***********************]  1 of 1 completed


Price,Close,High,Low,Open,Volume
Ticker,ES=F,ES=F,ES=F,ES=F,ES=F
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2025-07-04,6283.5,6322.75,6276.5,6320.75,0
2025-07-07,6276.0,6315.0,6246.25,6307.75,0
2025-07-08,6278.0,6289.0,6254.5,6262.5,657353


**4. Repaired prices (BA)**

  display(yf.download("BA", start="2020-01-01", end="2020-05-31", interval="1d", repair=True))
[*********************100%***********************]  1 of 1 completed


Price,Close,High,Low,Open,Repaired?,Volume
Ticker,BA,BA,BA,BA,BA,BA
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2020-01-02,331.348572,331.378393,325.761816,326.606765,False,4544400
2020-01-03,330.791901,332.909308,328.346428,328.674494,False,3875900
2020-01-06,331.766083,332.879454,325.940756,327.352341,False,5355000
2020-01-07,335.285156,342.154291,328.754007,332.283029,False,9898600
...,...,...,...,...,...,...
2020-05-26,144.729996,145.910004,142.610001,145.210007,False,30338300
2020-05-27,149.520004,149.649994,141.240005,149.139999,False,32799900
2020-05-28,149.820007,156.699997,149.050003,156.100006,False,34734300
2020-05-29,145.850006,152.000000,142.940002,145.300003,False,33853900


## Fundamentals: MSFT

Unnamed: 0,Value
Forward P/E,33.17057
Beta,1.026
Trailing EPS,12.94
Dividend Yield,0.67
Float Shares,7422064000.0
Short % of Float,0.0069
Institutional Ownership,0.74637


**Earnings Dates**

Unnamed: 0_level_0,EPS Estimate,Reported EPS,Surprise(%),Event Type
Earnings Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2023-04-25 16:05:00-04:00,2.23,2.45,9.81,Earnings
2023-01-24 16:04:00-05:00,2.29,2.32,1.09,Earnings
2022-10-25 16:02:00-04:00,2.3,2.35,2.05,Earnings


**Recent Dividends**

Unnamed: 0_level_0,Dividend
Date,Unnamed: 1_level_1
2024-11-21 00:00:00-05:00,0.83
2025-02-20 00:00:00-05:00,0.83
2025-05-15 00:00:00-04:00,0.83


**Major Holders**

Breakdown,Value
insidersPercentHeld,0.00062
institutionsPercentHeld,0.74637
institutionsFloatPercentHeld,0.74683
institutionsCount,7442.0


## Options Chains: MSFT

**Calls for 2025-07-11**

Unnamed: 0,strike,openInterest,impliedVolatility
0,230.0,1,3.890625
1,240.0,7,2.632816
2,245.0,21,3.582032


**Puts for 2025-07-11**

Unnamed: 0,strike,inTheMoney,openInterest,impliedVolatility
0,300.0,False,0,1.406253
1,305.0,False,0,1.375003
2,310.0,False,0,1.312503


In [19]:

# ========================
# SEARCH_LOOKUP
# ========================
section_header("Search & Lookup")

lookup_data = {
    "BRK.B ISIN": yf.Ticker("BRK.B").isin,
    "NVDA Company Name": yf.Ticker("NVDA").info['shortName'],
    "AMD Confirmed Symbol": yf.Tickers(["AMD"]).tickers['AMD'].info['symbol']
}
display(pd.Series(lookup_data).to_frame(name="Value"))

# ========================
# SECTOR_INDUSTRY
# ========================
section_header("Sector & Industry: JPM")
jpm = yf.Ticker("JPM")

sector_info = pd.Series({
    "Sector": jpm.info.get('sector', 'N/A'),
    "Industry": jpm.info.get('industry', 'N/A'),
    "Employees": jpm.info.get('fullTimeEmployees', 'N/A')
})
display(sector_info.to_frame(name="Value"))

# ========================
# SCREENER_QUERY
# ========================
section_header("Screener Query: MSFT")
msft = yf.Ticker("MSFT")

screener_results = []
if msft.info.get('marketCap', 0) > 1e12:
    screener_results.append("Large-cap stock")
if msft.info.get('volume', 0) > 1e7:
    screener_results.append("Highly liquid")
if msft.info.get('trailingPE', float('inf')) < 20:
    screener_results.append("Low P/E")

display(pd.Series(screener_results, name="Screener Results") if screener_results 
        else display("No screener matches"))

# ========================
# MULTI_TICKER
# ========================
section_header("Multi-Ticker Download")

sub_header("Grouped by Ticker (Threaded)")
display(yf.download(["TSLA", "RIVN", "LCID"], period="1d", group_by="ticker", threads=True))

sub_header("Grouped by Column")
display(yf.download("AAPL MSFT", period="1d", group_by="column", threads=False).tail(3))

# ========================
# ERROR_HANDLING
# ========================
section_header("Error Handling")

sub_header("Invalid Ticker")
try:
    yf.download("INVALID_TICKER")
except Exception as e:
    display(HTML(f"<span style='color:red'>Error: {str(e)}</span>"))

sub_header("Stale Data Check")
hist = yf.download("MSFT", period="5d")
if hist.index[-1].date() < date.today():
    display(HTML("<span style='color:orange'>Warning: Data might be stale</span>"))
else:
    display("Data is up-to-date")

## Search & Lookup

Unnamed: 0,Value
BRK.B ISIN,US0846707026
NVDA Company Name,NVIDIA Corporation
AMD Confirmed Symbol,AMD


## Sector & Industry: JPM

Unnamed: 0,Value
Sector,Financial Services
Industry,Banks - Diversified
Employees,318477


## Screener Query: MSFT

0    Large-cap stock
Name: Screener Results, dtype: object

## Multi-Ticker Download

**Grouped by Ticker (Threaded)**

  display(yf.download(["TSLA", "RIVN", "LCID"], period="1d", group_by="ticker", threads=True))
[*********************100%***********************]  3 of 3 completed


Ticker,TSLA,TSLA,TSLA,TSLA,TSLA,RIVN,RIVN,RIVN,RIVN,RIVN,LCID,LCID,LCID,LCID,LCID
Price,Open,High,Low,Close,Volume,Open,High,Low,Close,Volume,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
2025-07-08,296.880005,304.049408,295.641602,301.070892,56739675,12.79,13.29,12.78,13.155,13494175,2.12,2.27,2.11,2.265,108747111


**Grouped by Column**

  display(yf.download("AAPL MSFT", period="1d", group_by="column", threads=False).tail(3))
[*********************100%***********************]  2 of 2 completed


Price,Close,Close,High,High,Low,Low,Open,Open,Volume,Volume
Ticker,AAPL,MSFT,AAPL,MSFT,AAPL,MSFT,AAPL,MSFT,AAPL,MSFT
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
2025-07-08,210.550003,495.790009,211.429993,498.200012,208.449997,494.109985,210.130005,497.410004,20469083,4737048


## Error Handling

**Invalid Ticker**

  yf.download("INVALID_TICKER")
HTTP Error 404: 
[*********************100%***********************]  1 of 1 completed

1 Failed download:
['INVALID_TICKER']: YFPricesMissingError('possibly delisted; no price data found  (period=1mo) (Yahoo error = "No data found, symbol may be delisted")')


**Stale Data Check**

  hist = yf.download("MSFT", period="5d")
[*********************100%***********************]  1 of 1 completed


'Data is up-to-date'