In [1]:
import os
import sys
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import yahooquery as yq
import random

import matplotlib.pyplot as plt


src_path = os.path.abspath(os.path.join('..', '..', 'src'))
sys.path.append(src_path)

from ConformalMethods import AdaptiveCP, ACP_plots, ACP_data

# Testing the new proposed set loss function.

Want to see how the new method performs compared to the old one. 

What I am interested in:
- How the coverage and width differs.
- If the new loss allows the model to learn better, as in it is choosing the most performant head.
        - How will I know what the most performant head is -> I still havent sorted this out.

## Getting the data

We will try on both stock data and on normal data.

In [2]:
def get_stock_data(start_index, end_index):
    # Open txt file containg ticker names
    with open(r'C:\Users\tobyw\Documents\ChrisPython\ConformalProject\scripts\snptickers.txt', 'r') as f:
        all_tickers = f.read().splitlines()
        all_tickers.sort()
    
    stock_tickers = all_tickers[start_index:end_index]

    tickers = yq.Ticker(stock_tickers)
    all_price_data = tickers.history(period='5y', interval='1d')
    price_df = all_price_data[['close']].copy()

    stock_data_tuples = []

    # Some tickers in the list are incorrect or not trading so need 

    for ticker_symbol in price_df.index.get_level_values(0).unique():
        # Getting the volatilty data for each ticker
        ticker_price_data = price_df.loc[ticker_symbol]
        ticker_close = ticker_price_data['close'].to_numpy()

        # As when creating the volaility there is an intial NaN value, I will remove this.
        ticker_close = ticker_close[1:]

        # Appending it to the stock_data_tuples list, the last volatilty is used as the prediciton for the next.
        stock_data_tuples.append((ticker_close[:-1], ticker_close[1:]))
    

    return stock_data_tuples



In [3]:
stock_data = get_stock_data(0, 100)

In [4]:
normal_data = ACP_data.random_multi_shift(100, (1300,1301))

## The new Set method.

We will define the class with the new set_loss_function.

In [5]:
class inverse_AdaptedCP(AdaptiveCP):
    def set_loss(self, optimal_set, given_set):
        # If the optimal set is somehow 0, then we will return the given set.
        if optimal_set == 0:
            return 0
        else:
            val = (optimal_set - given_set) / optimal_set
         
        if val < 0:
            return (self.coverage_target) * (-1* val)
        else:
            return (1 - self.coverage_target) * np.log(1/(1-val))

## Comparing the two

In [6]:
# Initilising each model.
target = 0.1
ACP = AdaptiveCP(0.1)
inverse_ACP = inverse_AdaptedCP(0.1)

We will start with a simple absolute comparison.

In [7]:
# For the random data
vanila_dict = {'coverge':[], 'width':[]}
inverse_dict = {'coverge':[], 'width':[]}

for i, data in enumerate(normal_data):
    vanila = ACP.AwACI(data, nu_sigma=(10, 0.05))
    adapted = inverse_ACP.AwACI(data, nu_sigma=(0.01, 0.05))

    vanila_dict['coverge'].append(vanila['realised_interval_coverage'])
    vanila_dict['width'].append(vanila['average_prediction_interval'])

    inverse_dict['coverge'].append(adapted['realised_interval_coverage'])
    inverse_dict['width'].append(adapted['average_prediction_interval'])

    if i % 10 == 0:
        print(i)

# Computing the averages and printing.
vanila_coverage = np.mean(vanila_dict['coverge'])
vanila_width = np.mean(vanila_dict['width'])

inverse_coverage = np.mean(inverse_dict['coverge'])
inverse_width = np.mean(inverse_dict['width'])

print('\n')
print('Vanila average:', vanila_coverage, vanila_width)
print('Inverse average:', inverse_coverage, inverse_width)

ValueError: Total of weights must be finite

Comparing for the stock data.

In [None]:
# For the random data
vanila_dict = {'coverge':[], 'width':[]}
inverse_dict = {'coverge':[], 'width':[]}

for i, data in enumerate(stock_data):
    vanila = ACP.AwACI(data)
    adapted = inverse_ACP.AwACI(data, nu_sigma=(10, 0.15))

    vanila_dict['coverge'].append(vanila['realised_interval_coverage'])
    vanila_dict['width'].append(vanila['average_prediction_interval'])

    inverse_dict['coverge'].append(adapted['realised_interval_coverage'])
    inverse_dict['width'].append(adapted['average_prediction_interval'])

    if i % 10 == 0:
        print(i)

# Computing the averages and printing.
vanila_coverage = np.mean(vanila_dict['coverge'])
vanila_width = np.mean(vanila_dict['width'])

inverse_coverage = np.mean(inverse_dict['coverge'])
inverse_width = np.mean(inverse_dict['width'])

print('\n')
print('Vanila average:', vanila_coverage, vanila_width)
print('Inverse average:', inverse_coverage, inverse_width)

ValueError: Total of weights must be finite