In [1]:
import pandas as pd 
import numpy as np 
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler 

In [12]:
def preprocess_yfinance(df: pd.DataFrame) -> pd.DataFrame:
    df.rename(columns={'Price': 'Date'}, inplace=True)
    df.drop(columns=['Open', 'High', 'Low'], inplace=True)
    df = df[2:]
    df['Date'] = pd.to_datetime(df['Date'])
    df['Volume'] = df['Volume'].astype(int)/1000000
    return df

def preprocess_FRED(df: pd.DataFrame) -> pd.DataFrame:
    df.rename(columns={'observation_date': 'Date'}, inplace=True)
    df['Date'] = pd.to_datetime(df['Date'])
    df['Year'] = df['Date'].dt.year 
    df['Month'] = df['Date'].dt.month
    df = df.groupby(['Year', 'Month']).first().reset_index()
    df['Date'] = df['Date'].apply(lambda x: x.replace(day=1))
    df.drop(columns=['Year', 'Month'], inplace=True)
    return df

def prepare_data(df: pd.DataFrame, test_size: int):
    """
    Prepares the data for training and testing by scaling and splitting.

    Args:
        df (pd.DataFrame): The input dataframe.
        test_size (int): The number of samples to leave for testing.

    Returns:
        X_train, X_test, y_train, y_test, scaler: The split and scaled datasets and the scaler used.
    """
    df_copy = df.copy()
    # Calculate the 12-month percentage change and shift it
    # Using shift(-12) means the target will be the next 12-month return
    df_copy['Target'] = df_copy['Close_spx'].pct_change(-12) * 100
    df_copy.dropna(inplace=True)
    
    scaler = MinMaxScaler(feature_range=(-100, 100))
    cols_to_scale = [col for col in df_copy.columns if 'Target' not in col]
    df_copy[cols_to_scale] = pd.DataFrame(scaler.fit_transform(df_copy[cols_to_scale]), columns=cols_to_scale, index=df_copy.index)

    X = df_copy.drop(columns='Target')
    y = df_copy['Target']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, shuffle=False)

    return X_train, X_test, y_train, y_test, scaler,df_copy

In [46]:
spx = pd.read_csv(r"../reproducability/data/^SPX_1mo_1980-01-01_to_2023-01-01.csv")
ndx = pd.read_csv(r"../reproducability/data/^NDX_1mo_1980-01-01_to_2023-01-01.csv")
vix = pd.read_csv(r"../reproducability/data/^VIX_1mo_1994-01-01_to_2023-01-01.csv")
long_term_yields = pd.read_csv(r'../reproducability/data/LongTermBondYields2.csv')
lower_yields = pd.read_csv(r"../reproducability/data/Lower US High Yield Index Effective Yield2.csv")
spx = preprocess_yfinance(spx)
ndx = preprocess_yfinance(ndx)
vix = preprocess_yfinance(vix)
lower_yields = preprocess_FRED(lower_yields)
long_term_yields = preprocess_FRED(long_term_yields)

# Merging all dfs into one
df = pd.merge(spx, ndx, on='Date', how='inner',suffixes=('_spx', '_ndx'))
df = pd.merge(df, vix, on='Date', how='inner',suffixes=('_spx', '_vix'))
df = pd.merge(df, long_term_yields, on='Date', how='inner')
df = pd.merge(df, lower_yields, on='Date', how='inner')
df.set_index('Date', inplace=True)
df.rename(columns = {'Close':"Close_vix"},inplace=True)
df.drop(columns=['Volume'], inplace=True)
# make sure all cols are numeric
df = df.apply(pd.to_numeric)


In [47]:
df_1 = df.loc[:'2002-01-01']
df_2 = df.loc[:'2009-01-01']
df_3 = df.loc[:'2023-01-01']

X_train, X_test, y_train, y_test, scaler,df_copy = prepare_data(df_3, test_size=12)

In [49]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
# Create subplots with all features
intresting_period = {'start': '2020-01-01', 'end': '2023-02-01'}
sample = df.loc[intresting_period['start']:intresting_period['end']]
sample = pd.DataFrame(scaler.transform(sample), columns=sample.columns, index=sample.index)
fig = make_subplots(
    rows=4, cols=2,
    subplot_titles=('S&P 500 Close Price', 'NASDAQ-100 Close Price', 
                    'VIX Close Price', 'S&P 500 Volume',
                    'NASDAQ-100 Volume', 'Long Term Bond Yields(IRL)',
                    'Lower US High Yield Index (BAM)', ''),
    vertical_spacing=0.08,
    horizontal_spacing=0.1
)

# S&P 500 Close
fig.add_trace(
    go.Scatter(x=sample.index, y=sample['Close_spx'], 
               name='SPX Close', line=dict(color='#1f77b4', width=2)),
    row=1, col=1
)

# NASDAQ-100 Close
fig.add_trace(
    go.Scatter(x=sample.index, y=sample['Close_ndx'], 
               name='NDX Close', line=dict(color='#ff7f0e', width=2)),
    row=1, col=2
)

# VIX Close
fig.add_trace(
    go.Scatter(x=sample.index, y=sample['Close_vix'], 
               name='VIX Close', line=dict(color='#d62728', width=2)),
    row=2, col=1
)

# S&P 500 Volume
fig.add_trace(
    go.Scatter(x=sample.index, y=sample['Volume_spx'], 
               name='SPX Volume', line=dict(color='#2ca02c', width=2), 
               fill='tozeroy', fillcolor='rgba(44, 160, 44, 0.3)'),
    row=2, col=2
)

# NASDAQ-100 Volume
fig.add_trace(
    go.Scatter(x=sample.index, y=sample['Volume_ndx'], 
               name='NDX Volume', line=dict(color='#9467bd', width=2),
               fill='tozeroy', fillcolor='rgba(148, 103, 189, 0.3)'),
    row=3, col=1
)

# Long Term Bond Yields
fig.add_trace(
    go.Scatter(x=sample.index, y=sample['IRLTCT01USM156N'], 
               name='Long Term Yields(IRL)', line=dict(color='#8c564b', width=2)),
    row=3, col=2
)

# Lower US High Yield Index
fig.add_trace(
    go.Scatter(x=sample.index, y=sample['BAMLH0A3HYCEY'], 
               name='High Yield Index(BAM)', line=dict(color='#e377c2', width=2)),
    row=4, col=1
)

# Update layout
fig.update_layout(
    title={
        'text': f'Market Indicators Overview ({intresting_period["start"]} - {intresting_period["end"]})',
        'x': 0.5,
        'xanchor': 'center',
        'font': {'size': 24, 'color': '#2c3e50', 'family': 'Arial Black'}
    },
    showlegend=False,
    height=1200,
    width=1400,
    template='plotly_white',
    font=dict(family='Arial', size=11, color='#2c3e50'),
    hovermode='x unified'
)

# Update axes
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='#ecf0f1', showline=True, linewidth=2, linecolor='#bdc3c7')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='#ecf0f1', showline=True, linewidth=2, linecolor='#bdc3c7')

# Add axis labels
fig.update_yaxes(title_text="Price ($)", row=1, col=1)
fig.update_yaxes(title_text="Price ($)", row=1, col=2)
fig.update_yaxes(title_text="Index", row=2, col=1)
fig.update_yaxes(title_text="Volume (M)", row=2, col=2)
fig.update_yaxes(title_text="Volume (M)", row=3, col=1)
fig.update_yaxes(title_text="Yield (%)", row=3, col=2)
fig.update_yaxes(title_text="Yield (%)", row=4, col=1)

fig.show()

In [52]:
import joblib 
m=joblib.load(r"/Users/adirserruya/Desktop/GRID/oracles/oracle_1_ElasticNet.joblib")

In [None]:
# Plot ElasticNet coefficients
coefficients = pd.DataFrame({
    'Feature': m.feature_names_in_,
    'Coefficient': m.coef_
}).sort_values('Coefficient', key=abs, ascending=False)

fig = go.Figure()

# Create bar chart
colors = ['#d62728' if c < 0 else '#2ca02c' for c in coefficients['Coefficient']]
fig.add_trace(go.Bar(
    x=coefficients['Coefficient'],
    y=coefficients['Feature'],
    orientation='h',
    marker=dict(color=colors),
    text=coefficients['Coefficient'].round(4),
    textposition='outside',
    hovertemplate='<b>%{y}</b><br>Coefficient: %{x:.4f}<extra></extra>'
))

fig.update_layout(
    title={
        'text': 'ElasticNet Model Coefficients',
        'x': 0.5,
        'xanchor': 'center',
        'font': {'size': 20, 'color': '#2c3e50', 'family': 'Arial Black'}
    },
    xaxis_title='Coefficient Value',
    yaxis_title='Feature',
    height=500,
    width=1000,
    template='plotly_white',
    font=dict(family='Arial', size=12, color='#2c3e50'),
    showlegend=False,
    hovermode='closest'
)

fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='#ecf0f1', zeroline=True, zerolinewidth=2, zerolinecolor='#34495e')
fig.update_yaxes(showgrid=False)

fig.show()

print(f"\nModel Intercept: {m.intercept_:.4f}")
print(f"\nCoefficients Summary:")
print(coefficients.to_string(index=False))