# CWC Score Adjustment

In [1]:
# imports
import sys
BASE_PATH =  "/Users/audreymcmillion/Documents/acm-thesis"
sys.path.append(BASE_PATH) 
import duckdb
import matplotlib.pyplot as plt
from model_fitting import ModelFitting
from ev_scoring import ExtremeValueScoring
from market_utils import MarketUtilities
import pandas as pd
from tqdm import tqdm
import json 

ev = ExtremeValueScoring(wrds_username='audreymcmillion')
db = ev.wrds_db
conn = ev.sqlite_conn
mkt_utils = MarketUtilities(wrds_username='audreymcmillion', wrds_db = db, sqlite_conn = conn)

Loading library list...
Done


## Coverage Width Criterion Definition

$$
\text{CWC} = \frac{\text{Mean Width}}{R} \cdot \left(1 + \mathbb{1} \{\text{Coverage} < (1-\alpha) \} \cdot \exp {-\eta (\text{Coverage} - (1-\alpha) )}\right)
$$

where $R$ denotes the range of the target variable.

## Calculate

In [None]:
test = pd.read_sql("""
select *
from conformal_results 
where symbol = 'ABIO'
""", conn)
test

In [None]:
# test['actual'].plot()

In [None]:
argarch_set = pd.read_sql("""
select test_set, 
       symbol, 
       start_dt, 
       end_dt, 
       model, 
       avg(within_CI) as coverage, 
       max(actual) - min(actual) as R,
       avg(upper_bound - lower_bound) as mean_width,
       avg(upper_bound - lower_bound)/(max(actual) - min(actual)) as normalized,
       count(*) as count
from argarch_results 
where test_set = 'Real Distribution Shift'
group by 1, 2, 3, 4, 5""", conn)

In [None]:
argarch_set.sort_values("count")

#### Conformal Set

$$
\text{MWI Score} =  \text{Mean Width} + \frac{2}{\alpha} \sum_{i=1}^n \max(0, y_i - \hat{y}_i^{\text{up}}) + \max(0, \hat{y}_i^{\text{low}} - y_i) 
$$

In [2]:
alpha = str(0.05)
eta = str(50)
conformal_set = pd.read_sql(f"""
with min_max as (
    select test_set, 
           symbol, 
           start_dt, 
           end_dt,  
           conformal_mode,	
           conformity_score,	
           base_model,	
           max(actual) - min(actual) as actual_range
    from conformal_results 
    where symbol = '628432101714|std=6;7'
    group by 1, 2, 3, 4, 5, 6, 7
),

collapsed as (
    select cr.test_set, 
           cr.symbol, 
           cr.start_dt, 
           cr.end_dt,  
           cr.conformal_mode,	
           cr.conformity_score,	
           cr.base_model,	
           avg(cr.within_CI) as coverage, 
           mm.actual_range,
           avg(cr.upper_bound - cr.lower_bound)/mm.actual_range as avg_width,
           max(cr.upper_bound - cr.lower_bound)/mm.actual_range as max_width,
           min(cr.upper_bound - cr.lower_bound)/mm.actual_range as min_width,
           sum(
                (case when cr.actual - cr.upper_bound > 0 then cr.actual - cr.upper_bound else 0 end
                 + case when cr.lower_bound - cr.actual > 0 then cr.lower_bound - cr.actual else 0 end
                )
                / mm.actual_range
            ) as mwi_sum
    from conformal_results cr
    join min_max mm
    on (cr.test_set, cr.symbol, cr.start_dt, cr.end_dt, 
        cr.conformal_mode, cr.conformity_score, cr.base_model) = (mm.test_set, mm.symbol, mm.start_dt, mm.end_dt, 
                                                                 mm.conformal_mode, mm.conformity_score, mm.base_model) 
    group by 1, 2, 3, 4, 5, 6, 7
)
select test_set, 
       symbol, 
       start_dt, 
       end_dt,
       case 
           when conformal_mode = 'online' and conformity_score = 'gamma' then 'OC: Gamma Score'
           when conformal_mode = 'online' and conformity_score = 'residual_normalized' then 'OC: Residual Normalized Score'
           when conformal_mode = 'naive' and conformity_score = 'gamma' then 'NC: Gamma Score'
           when conformal_mode = 'naive' and conformity_score = 'residual_normalized' then 'NC: Residual Normalized Score'
           else 'None'
       end as model,
       base_model,
       coverage * 100 as coverage,
       (1-{alpha}) * 100 as target_coverage,
       actual_range,
       avg_width,
       max_width,
       min_width,
       mwi_sum,
       avg_width + (2/{alpha}) * mwi_sum as mwi_score,
       (avg_width) *
        (
            1 + 
            CASE WHEN coverage < (1 - {alpha})
                 THEN EXP(-{eta} * (coverage - (1 - {alpha})))
                 ELSE 0
            END
        ) AS cwc_score
from collapsed
""", conn)

In [3]:
conformal_set

Unnamed: 0,test_set,symbol,start_dt,end_dt,model,base_model,coverage,target_coverage,actual_range,avg_width,max_width,min_width,mwi_sum,mwi_score,cwc_score
0,Simulated DistShift+Anom 700,628432101714|std=6;7,1900-01-01,1900-08-30,NC: Gamma Score,AR(1),92.666667,95.0,3.648627,2.311246,15.157746,0.023573,1.257905,52.627441,9.733282
1,Simulated DistShift+Anom 700,628432101714|std=6;7,1900-01-01,1900-08-30,NC: Residual Normalized Score,"AR(1)-GARCH(1,1)",94.333333,95.0,3.648627,0.441038,0.953393,0.333916,0.934814,37.833579,1.056557
2,Simulated DistShift+Anom 700,628432101714|std=6;7,1900-01-01,1900-08-30,OC: Gamma Score,AR(1),94.0,95.0,3.648627,3.193324,21.380896,0.034704,1.07991,46.389708,8.458226
3,Simulated DistShift+Anom 700,628432101714|std=6;7,1900-01-01,1900-08-30,OC: Residual Normalized Score,"AR(1)-GARCH(1,1)",94.333333,95.0,3.648627,0.445942,0.971151,0.333682,0.934723,37.834869,1.068304


In [4]:
conformal_set.to_sql("model_coverage_stats2", if_exists="append", index=False, con=conn)

4

#### AR-GARCH Set

In [None]:
argarch_set = pd.read_sql("""
select test_set, 
       symbol, 
       start_dt, 
       end_dt, 
       model, 
       avg(within_CI) as coverage, 
       max(actual) - min(actual) as R,
       avg(upper_bound - lower_bound) as mean_width,
       avg(upper_bound - lower_bound)/(max(actual) - min(actual)) as normalized,
       count(*) as count
from argarch_results 
group by 1, 2, 3, 4, 5""", conn)
argarch_set['model'].unique()

In [5]:
alpha = str(0.05)
eta = str(50)
argarch_set = pd.read_sql(f"""
with min_max as (
    select test_set, 
           symbol, 
           start_dt, 
           end_dt,  
           model,		
           max(actual) - min(actual) as actual_range
    from argarch_results 
    where symbol = '628432101714|std=6;7'
    group by 1, 2, 3, 4
),

collapsed as (
    select cr.test_set, 
           cr.symbol, 
           cr.start_dt, 
           cr.end_dt,  
           cr.model,	
           avg(cr.within_CI) as coverage, 
           mm.actual_range,
           avg(cr.upper_bound - cr.lower_bound)/mm.actual_range as avg_width,
           max(cr.upper_bound - cr.lower_bound)/mm.actual_range as max_width,
           min(cr.upper_bound - cr.lower_bound)/mm.actual_range as min_width,
           sum(
                (case when cr.actual - cr.upper_bound > 0 then cr.actual - cr.upper_bound else 0 end
                 + case when cr.lower_bound - cr.actual > 0 then cr.lower_bound - cr.actual else 0 end
                )
                / mm.actual_range
            ) as mwi_sum
    from argarch_results cr
    join min_max mm
    on (cr.test_set, cr.symbol, cr.start_dt, cr.end_dt, cr.model) = (mm.test_set, mm.symbol, mm.start_dt, mm.end_dt, mm.model) 
    group by 1, 2, 3, 4
)
select test_set, 
       symbol, 
       start_dt, 
       end_dt,
       model,
       null as base_model,
       coverage * 100 as coverage,
       (1-{alpha}) * 100 as target_coverage,
       actual_range,
       avg_width,
       max_width,
       min_width,
       mwi_sum,
       avg_width + (2/{alpha}) * mwi_sum as mwi_score,
       (avg_width) *
        (
            1 + 
            CASE WHEN coverage < (1 - {alpha})
                 THEN EXP(-{eta} * (coverage - (1 - {alpha})))
                 ELSE 0
            END
        ) AS cwc_score
from collapsed
""", conn)

In [6]:
argarch_set

Unnamed: 0,test_set,symbol,start_dt,end_dt,model,base_model,coverage,target_coverage,actual_range,avg_width,max_width,min_width,mwi_sum,mwi_score,cwc_score
0,Simulated DistShift+Anom 700,628432101714|std=6;7,1900-01-01,1900-08-30,"AR(1)-GARCH(1,1)",,96.333333,95.0,3.648627,0.474578,1.061621,0.374106,1.478421,59.611405,0.474578


In [7]:
argarch_set.to_sql("model_coverage_stats2", if_exists="append", index=False, con=conn)

1

### DtACI Set

In [None]:
dtaci_test = pd.read_sql("""select *, CAST(SUBSTR(model, INSTR(model, ';I=') + 3) AS INTEGER) as I
                            from dtaci_results_new""", conn)
dtaci_test['model'].unique()

In [8]:
alpha = str(0.05)
eta = str(50)

dtaci_set = pd.read_sql(f"""
with dtaci_results as (
    select *, CAST(SUBSTR(model, INSTR(model, ';I=') + 3) AS INTEGER) as I
    from dtaci_results_new
    where symbol = '628432101714|std=6;7'
),

min_max as (
    select test_set, 
           symbol, 
           start_dt, 
           end_dt,  
           conformity_score, 
           model,
           I,		
           max(actual) - min(actual) as actual_range
    from dtaci_results 
    group by 1, 2, 3, 4, 5, 6, 7
),

collapsed as (
    select cr.test_set, 
           cr.symbol, 
           cr.start_dt, 
           cr.end_dt,  
           cr.conformity_score, 
           cr.model,
           cr.I,	
           avg(cr.within_CI) as coverage, 
           mm.actual_range,
           avg(cr.upper_bound - cr.lower_bound)/mm.actual_range as avg_width,
           max(cr.upper_bound - cr.lower_bound)/mm.actual_range as max_width,
           min(cr.upper_bound - cr.lower_bound)/mm.actual_range as min_width,
           sum(
                (case when cr.actual - cr.upper_bound > 0 then cr.actual - cr.upper_bound else 0 end
                 + case when cr.lower_bound - cr.actual > 0 then cr.lower_bound - cr.actual else 0 end
                )
                / mm.actual_range
            ) as mwi_sum
    from dtaci_results cr
    join min_max mm
    on (cr.test_set, cr.symbol, cr.start_dt, cr.end_dt, 
        cr.conformity_score, cr.model, cr.I) = (mm.test_set, mm.symbol, mm.start_dt, mm.end_dt, mm.conformity_score, mm.model, mm.I) 
    group by 1, 2, 3, 4, 5, 6, 7
)

select test_set, 
       symbol, 
       start_dt, 
       end_dt,
       case
            when I != 0 and conformity_score = 'residual_normalized' then 'DtACI: Residual Normalized Score;I=' || I
            when I = 0 and conformity_score = 'residual_normalized' then 'DtACI: Residual Normalized Score'
            when conformity_score = 'gamma' then 'DtACI: Gamma Score'
            else 'None'
       end as model,
       case 
           when model like 'AR(1)-GARCH(1,1)%' then 'AR(1)-GARCH(1,1)'
           else model
       end as base_model,
       coverage * 100 as coverage,
       (1-{alpha}) * 100 as target_coverage,
       actual_range,
       avg_width,
       max_width,
       min_width,
       mwi_sum,
       avg_width + (2/{alpha}) * mwi_sum as mwi_score,
       (avg_width) *
        (
            1 + 
            CASE WHEN coverage < (1 - {alpha})
                 THEN EXP(-{eta} * (coverage - (1 - {alpha})))
                 ELSE 0
            END
        ) AS cwc_score
from collapsed
""", conn)

In [9]:
dtaci_set

Unnamed: 0,test_set,symbol,start_dt,end_dt,model,base_model,coverage,target_coverage,actual_range,avg_width,max_width,min_width,mwi_sum,mwi_score,cwc_score
0,Simulated DistShift+Anom 700,628432101714|std=6;7,1900-01-01,1900-08-30,DtACI: Gamma Score,AR(1),96.333333,95.0,3.648627,2.85434,22.112032,0.017457,0.759847,33.248213,2.85434
1,Simulated DistShift+Anom 700,628432101714|std=6;7,1900-01-01,1900-08-30,DtACI: Residual Normalized Score,"AR(1)-GARCH(1,1)",96.0,95.0,3.648627,0.449883,0.981538,0.333863,0.834777,33.840957,0.449883
2,Simulated DistShift+Anom 700,628432101714|std=6;7,1900-01-01,1900-08-30,DtACI: Residual Normalized Score;I=100,"AR(1)-GARCH(1,1)",95.0,95.0,3.648627,0.448245,0.980218,0.333879,0.862951,34.966272,0.448245
3,Simulated DistShift+Anom 700,628432101714|std=6;7,1900-01-01,1900-08-30,DtACI: Residual Normalized Score;I=150,"AR(1)-GARCH(1,1)",94.666667,95.0,3.648627,0.448261,0.97651,0.333878,0.865601,35.072289,0.977819
4,Simulated DistShift+Anom 700,628432101714|std=6;7,1900-01-01,1900-08-30,DtACI: Residual Normalized Score;I=50,"AR(1)-GARCH(1,1)",95.0,95.0,3.648627,0.448377,0.990972,0.33388,0.851805,34.520567,0.448377


In [10]:
dtaci_set.to_sql("model_coverage_stats2", if_exists="append", index=False, con=conn)

5

## Testing

In [12]:
pd.read_sql("select * from model_coverage_stats2 where symbol = '585051031353|std=6;7'", conn)

Unnamed: 0,test_set,symbol,start_dt,end_dt,model,base_model,coverage,target_coverage,actual_range,avg_width,max_width,min_width,mwi_sum,mwi_score,cwc_score
