# Stress test, beta only

In [1]:
import sys
import pandas as pd
import numpy as np
import random
import tensorflow as tf
import plotly.express as px
import plotly.graph_objects as go
import os
os.environ["PYTHONHASHSEED"] = "42"
os.environ["TF_DETERMINISTIC_OPS"] = "1"
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from pathlib import Path

project_root = Path.cwd()
while project_root != project_root.parent and not (project_root / "src").exists():
    project_root = project_root.parent
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

from src.simulation.dgp0 import Tier0Config, simulate_panel
from src.simulation.validation import plot_market_plotly
from src.data.feature_eng import feature_eng_syn
from src.model.autoencoder import PriceAutoencoder


#set seed
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

In [2]:
df_beta = pd.read_parquet("../data/processed_syn/synth_dgp0_beta_only_L18.parquet")
df_beta.head()

Unnamed: 0,market_id,window_start,window_end,window_length,Price 1,Price 2,Price 3,Price 4,Price 5,Price 6,...,Price 14,Price 15,Price 16,Price 17,Price 18,share_C,share_T,share_K,state_mode,is_pure_80
0,0,0,17,18,0.066714,0.063165,0.056164,0.046739,0.05137,0.051328,...,0.001583,-0.006167,-0.012466,-0.024368,-0.020971,0.111111,0.888889,0.0,1,1.0
1,0,1,18,18,0.063165,0.056164,0.046739,0.05137,0.051328,0.055144,...,-0.006167,-0.012466,-0.024368,-0.020971,-0.078333,0.166667,0.833333,0.0,1,1.0
2,0,2,19,18,0.056164,0.046739,0.05137,0.051328,0.055144,0.032062,...,-0.012466,-0.024368,-0.020971,-0.078333,-0.095205,0.222222,0.777778,0.0,1,0.0
3,0,3,20,18,0.046739,0.05137,0.051328,0.055144,0.032062,0.056263,...,-0.024368,-0.020971,-0.078333,-0.095205,-0.105137,0.277778,0.722222,0.0,1,0.0
4,0,4,21,18,0.05137,0.051328,0.055144,0.032062,0.056263,0.062923,...,-0.020971,-0.078333,-0.095205,-0.105137,-0.100656,0.333333,0.666667,0.0,1,0.0


In [3]:
feature_df = feature_eng_syn(df_beta)
feature_df.head()

Unnamed: 0,market_id,window_start,window_end,window_length,Price 1,Price 2,Price 3,Price 4,Price 5,Price 6,...,CoV_change,zero_change_fraction,AR_1,AR_2,kurtosis_change,max_abs_ret,pos_vol,neg_vol,level_vol,price_range
0,0,0,17,18,0.066714,0.063165,0.056164,0.046739,0.05137,0.051328,...,2.414924,0.058824,-0.127866,0.033985,1.999154,0.035017,0.008844,0.009518,0.031747,0.091082
1,0,1,18,18,0.063165,0.056164,0.046739,0.05137,0.051328,0.055144,...,2.099165,0.058824,-0.210967,0.113727,2.90299,0.057362,0.008844,0.016134,0.039876,0.141498
2,0,2,19,18,0.056164,0.046739,0.05137,0.051328,0.055144,0.032062,...,1.97464,0.058824,-0.069755,0.100084,2.563596,0.057362,0.008844,0.01592,0.047528,0.158129
3,0,3,20,18,0.046739,0.05137,0.051328,0.055144,0.032062,0.056263,...,1.968199,0.058824,-0.067532,0.075652,2.552966,0.057362,0.008844,0.015901,0.054151,0.168061
4,0,4,21,18,0.05137,0.051328,0.055144,0.032062,0.056263,0.062923,...,1.965507,0.058824,-0.095961,0.014479,2.558677,0.057362,0.008861,0.015901,0.058668,0.168061


## Training AE

In [4]:
FEATURES_5 = [
    "volatility", "zero_change_fraction","max_abs_ret",
    "AR_1","price_range"]

X = feature_df[FEATURES_5].to_numpy().astype(np.float32)

# Scale
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X).astype(np.float32)

#train test split
X_train, X_val = train_test_split(X_scaled, test_size=0.2, random_state=42)

In [5]:
ae = PriceAutoencoder(input_dim=5, latent_dim=2, hidden_dims=(16,8), latent_activation=None)
ae.compile(optimizer=tf.keras.optimizers.Adam(1e-3), loss="mse")

history = ae.fit(
    X_train, X_train,
    validation_data=(X_val, X_val),
    epochs=200,
    batch_size=256,
    callbacks=[tf.keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)],
    verbose=1
)

Epoch 1/200
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.8315 - val_loss: 0.5612
Epoch 2/200
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.3003 - val_loss: 0.2023
Epoch 3/200
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.1827 - val_loss: 0.1639
Epoch 4/200
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.1580 - val_loss: 0.1483
Epoch 5/200
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.1472 - val_loss: 0.1411
Epoch 6/200
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.1410 - val_loss: 0.1363
Epoch 7/200
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.1366 - val_loss: 0.1330
Epoch 8/200
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.1335 - val_loss: 0.1304
Epoch 9/200
[1m102/102[0m [32

## Checking Embedding

In [6]:
Z = ae.encoder(X_scaled).numpy()

validation_df = feature_df.copy()

validation_df["z1"] = Z[:, 0]
validation_df["z2"] = Z[:, 1]

In [15]:
#plotting embedding
# Focus on pure windows only for clarity
pure = validation_df[validation_df["is_pure_80"] == 1].copy()

# Map state labels for readability
state_map = {0: "Competitive", 1: "Tacit", 2: "Cartel"}
pure["state_label"] = pure["state_mode"].map(state_map)

fig = px.scatter(
    pure,
    x="z1",
    y="z2",
    color="state_label",
    color_discrete_map={
        "Competitive": "green",
        "Tacit": "orange",
        "Cartel": "red",
    },
    title="Latent Space (Pure Windows Only) Beta Only",
    opacity=0.6
)

fig.update_layout(template="plotly_white")
fig.show()

## Calulating Centriods

In [8]:
Z_pure = pure[["z1","z2"]].to_numpy()


#cacluating centroids
mu_C = pure[pure["state_mode"] == 0][["z1","z2"]].mean().to_numpy()
mu_K = pure[pure["state_mode"] == 2][["z1","z2"]].mean().to_numpy()
mu_T = pure[pure["state_mode"] == 1][["z1","z2"]].mean().to_numpy()

#calculating competition direction and scalling 
v = mu_K - mu_C
v = v / np.linalg.norm(v)

In [9]:
#projecting each coordinate on competitoin axis
Z_all = validation_df[["z1","z2"]].to_numpy()
validation_df["conduct_score"] = Z_all @ v
validation_df["conduct_score_centred"] = (Z_all - mu_C) @ v

In [10]:
validation_df.groupby("state_mode")["conduct_score"].mean()

state_mode
0   -0.875820
1   -0.183474
2    0.628294
Name: conduct_score, dtype: float32

In [11]:
validation_df.groupby("state_mode")["conduct_score"].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
state_mode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,9572.0,-0.87582,1.762113,-8.674465,-1.748693,-0.829645,-0.357198,7.57128
1,9494.0,-0.183474,1.937697,-7.07156,-0.943801,-0.527588,0.162733,11.689965
2,13534.0,0.628294,1.875236,-7.129918,-0.523122,-0.050868,0.797011,9.85368


## Understanding Classification

In [12]:
state_map = {0: "Competitive", 1: "Tacit", 2: "Cartel"}

pure["state_label"] = pure["state_mode"].map(state_map)
validation_df["state_label"] = validation_df["state_mode"].map(state_map)

In [17]:
fig = px.scatter(
    pure,
    x="z1",
    y="z2",
    color="state_label",
    opacity=0,
    title="Latent Space (Beta only) with Centroids and Conduct Axis",
    template="plotly_white"
)

# Add centroid markers
centroids = np.vstack([mu_C, mu_T, mu_K])
centroid_labels = ["Competitive centroid", "Tacit centroid", "Cartel centroid"]

fig.add_trace(go.Scatter(
    x=centroids[:,0],
    y=centroids[:,1],
    mode="markers+text",
    text=centroid_labels,
    textposition="top center",
    marker=dict(size=14, symbol="x"),
    name="Centroids"
))

# Add arrow from mu_C to mu_K
fig.add_trace(go.Scatter(
    x=[mu_C[0], mu_K[0]],
    y=[mu_C[1], mu_K[1]],
    mode="lines",
    line=dict(width=4, dash="dash"),
    name="Conduct axis (C → K)"
))

# Optional: annotate arrow direction
fig.add_annotation(
    x=mu_K[0], y=mu_K[1],
    ax=mu_C[0], ay=mu_C[1],
    xref="x", yref="y", axref="x", ayref="y",
    showarrow=True, arrowhead=3, arrowsize=1.2, arrowwidth=2,
    text="C→K"
)

fig.show()

In [16]:
fig = px.histogram(
    validation_df,
    x="conduct_score_centred",
    color="state_label",
    nbins=60,
    opacity=0.5,
    barmode="overlay",
    title="Centered Conduct Score Distribution by Regime (All Windows) Beta only",
    template="plotly_white"
)
fig.show()