# Scaling Adherence

Compare expected scale path (`scheduled_w`) vs realized path (`actual_w`) from `targets.csv`.

Output:
- Progress gap by tier (`actual_progress - planned_progress`)
- Planned vs actual by day-in-week

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from io import StringIO
from IPython.display import display

pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 6)

from QuantConnect import *
from QuantConnect.Research import QuantBook

qb = QuantBook()
print('QuantBook initialized')


def read_csv_from_store(key):
    try:
        if not qb.ObjectStore.ContainsKey(key):
            print(f'ObjectStore key not found: {key}')
            return None
        content = qb.ObjectStore.Read(key)
        if not content:
            print(f'Empty ObjectStore key: {key}')
            return None
        return pd.read_csv(StringIO(content))
    except Exception as e:
        print(f'Error reading {key}: {e}')
        return None


In [None]:
df_targets = read_csv_from_store('wolfpack/targets.csv')
df_signals = read_csv_from_store('wolfpack/signals.csv')

if df_targets is None:
    raise ValueError('targets.csv is required.')

df_targets['date'] = pd.to_datetime(df_targets['date'])
for col in ['start_w', 'weekly_target_w', 'scheduled_w', 'actual_w', 'scale_day']:
    df_targets[col] = pd.to_numeric(df_targets[col], errors='coerce').fillna(0.0)

if df_signals is not None:
    df_signals['date'] = pd.to_datetime(df_signals['date'])
    df_signals['week_id'] = df_signals['date'].dt.strftime('%Y-%m-%d')
    df_signals['magnitude'] = pd.to_numeric(df_signals['magnitude'], errors='coerce').fillna(0.0).abs()
    sig = df_signals[['week_id', 'symbol', 'magnitude']].drop_duplicates(['week_id', 'symbol'])
else:
    sig = pd.DataFrame(columns=['week_id', 'symbol', 'magnitude'])

df = df_targets.merge(sig, on=['week_id', 'symbol'], how='left')

def tier_from_mag(m):
    if pd.isna(m):
        return 'unknown'
    if m >= 0.7:
        return 'strong'
    if m >= 0.3:
        return 'moderate'
    return 'weak'

df['tier'] = df['magnitude'].apply(tier_from_mag)
df['total_week_order_abs'] = (df['weekly_target_w'] - df['start_w']).abs()
df['planned_progress'] = np.where(
    df['total_week_order_abs'] > 1e-10,
    (df['scheduled_w'] - df['start_w']).abs() / df['total_week_order_abs'],
    1.0
)
df['actual_progress'] = np.where(
    df['total_week_order_abs'] > 1e-10,
    (df['actual_w'] - df['start_w']).abs() / df['total_week_order_abs'],
    1.0
)
df['planned_progress'] = df['planned_progress'].clip(0, 1)
df['actual_progress'] = df['actual_progress'].clip(0, 1)
df['progress_gap'] = df['actual_progress'] - df['planned_progress']

df = df.sort_values(['week_id', 'symbol', 'date'])
df['day_in_week'] = df.groupby(['week_id', 'symbol']).cumcount()

display(df[['date', 'week_id', 'symbol', 'tier', 'planned_progress', 'actual_progress', 'progress_gap']].head())


In [None]:
fig, ax = plt.subplots(figsize=(10, 5))
sns.boxplot(data=df, x='tier', y='progress_gap', order=['strong', 'moderate', 'weak', 'unknown'], ax=ax)
ax.axhline(0, color='black', linewidth=1)
ax.set_title('Progress Gap by Signal Tier (Actual - Planned)')
ax.set_ylabel('Progress gap')
ax.set_xlabel('Tier')
ax.grid(alpha=0.2)
plt.tight_layout()
plt.show()

profile = (
    df.groupby(['tier', 'day_in_week'], as_index=False)
      .agg(planned=('planned_progress', 'mean'), actual=('actual_progress', 'mean'))
)

fig, axes = plt.subplots(1, 2, figsize=(16, 5), sharey=True)
sns.lineplot(data=profile, x='day_in_week', y='planned', hue='tier', marker='o', ax=axes[0])
axes[0].set_title('Planned Progress by Tier')
axes[0].set_xlabel('Day-in-week')
axes[0].set_ylabel('Progress')
axes[0].grid(alpha=0.3)

sns.lineplot(data=profile, x='day_in_week', y='actual', hue='tier', marker='o', ax=axes[1])
axes[1].set_title('Actual Progress by Tier')
axes[1].set_xlabel('Day-in-week')
axes[1].set_ylabel('Progress')
axes[1].grid(alpha=0.3)

plt.tight_layout()
plt.show()


In [None]:
worst_lag = (
    df.groupby('symbol', as_index=False)['progress_gap']
      .mean()
      .sort_values('progress_gap')
)
print('Most negative average gap (lagging schedule):')
display(worst_lag.head(15))
