In [1]:
%load_ext autoreload
%autoreload 2

# wSAA

> Module description for wSAA classes

In [2]:
#| default_exp wSAA

In [3]:
#| hide
from nbdev.showdoc import *

# from nbdev.qmd import *

## Packages

In [4]:
#| export
from __future__ import annotations
from fastcore.docments import *
from fastcore.test import *
from fastcore.utils import *

import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor

from dddex.basePredictor import BasePredictor, restructureWeightsDataList

ImportError: cannot import name 'BasePredictor' from 'dddex.basePredictor' (/home/kagu/dddex/dddex/basePredictor.py)

## wSAA - Random Forest

In [None]:
#|export 

class RandomForestWSAA(RandomForestRegressor, BasePredictor):
    
    def fit(self, X, y):

        super(RandomForestRegressor, self).fit(X = X, y = y)
        
        self.y = y
        self.leafIndicesTrain = self.apply(X)
        

In [None]:
show_doc(RandomForestWSAA)

In [None]:
show_doc(RandomForestWSAA.fit)

In [None]:
#|export

@patch
def getWeightsData(self: RandomForestWSAA, 
                   X: np.ndarray, # Feature matrix of samples for which conditional density estimates are computed.
                   outputType: 'all' | # Specifies structure of output.
                               'onlyPositiveWeights' | 
                               'summarized' | 
                               'cumulativeDistribution' | 
                               'cumulativeDistributionSummarized' = 'onlyPositiveWeights', 
                   scalingList: list | np.ndarray | None = None, # List or array with same size as self.y containing floats being multiplied with self.y.
                   ):

    leafIndicesDf = self.apply(X)

    weightsDataList = list()

    for leafIndices in leafIndicesDf:
        leafComparisonMatrix = (self.leafIndicesTrain == leafIndices) * 1
        nObsInSameLeaf = np.sum(leafComparisonMatrix, axis = 0)

        # It can happen that RF decides that the best strategy is to fit no tree at
        # all and simply average all results (happens when min_child_sample is too high, for example).
        # In this case 'leafComparisonMatrix' mustn't be averaged because there has been only a single tree.
        if len(leafComparisonMatrix.shape) == 1:
            weights = leafComparisonMatrix / nObsInSameLeaf
        else:
            weights = np.mean(leafComparisonMatrix / nObsInSameLeaf, axis = 1)

        weightsPosIndex = np.where(weights > 0)[0]

        weightsDataList.append((weights[weightsPosIndex], weightsPosIndex))

    #---

    weightsDataList = restructureWeightsDataList(weightsDataList = weightsDataList, 
                                                 outputType = outputType, 
                                                 y = self.y, 
                                                 scalingList = scalingList,
                                                 equalWeights = False)

    return weightsDataList

In [None]:
show_doc(RandomForestWSAA.getWeightsData)

In [None]:
# #| export

# @patch
# def predict(self: RandomForestWSAA, 
#             X: np.ndarray, # Feature matrix of samples for which an estimation of conditional quantiles is computed.
#             probs: list | np.ndarray = [0.1, 0.5, 0.9], # Probabilities for which the estimated conditional p-quantiles are computed.
#             outputAsDf: bool = False, # Output is either a dataframe with 'probs' as cols or a dict with 'probs' as keys.
#             scalingList: list | np.ndarray | None = None, # List or array with same size as self.y containing floats being multiplied with self.y.
#             ):

#     quantileRes = super(BaseWeightsBasedPredictor, self).predict(X = X,
#                                                                  probs = probs,
#                                                                  outputAsDf = outputAsDf,
#                                                                  scalingList = scalingList)

#     return quantileRes

In [None]:
show_doc(RandomForestWSAA.predict)

## SAA

In [None]:
#| export

class SAA(BasePredictor):
    """SAA is a featureless approach that assumes the density of the target variable is given
    by assigning equal probability to each historical observation of said target variable."""
    
    def __init__(self):
        
        self.y = None
        
    def __str__(self):
        return "SAA()"
    __repr__ = __str__     
    

In [None]:
show_doc(SAA)

In [None]:
#| export

@patch
def fit(self: SAA, 
        y: np.ndarray, # Target values which form the estimated density function based on the SAA algorithm.
        ):
    self.y = y

In [None]:
show_doc(SAA.fit)

In [None]:
#| export

@patch
def getWeightsData(self: SAA, 
                   X: np.ndarray, # Feature matrix for whose rows conditional density estimates are computed.
                   outputType: 'all' | # Specifies structure of output.
                               'onlyPositiveWeights' | 
                               'summarized' | 
                               'cumulativeDistribution' | 
                               'cumulativeDistributionSummarized' = 'onlyPositiveWeights', 
                   scalingList: list | np.ndarray | None = None, # List or array with same size as self.y containing floats being multiplied with self.y.
                   ):

    if X is None:
        neighborsList = [np.arange(len(self.y))]
    else:
        neighborsList = [np.arange(len(self.y)) for i in range(X.shape[0])]

    # weightsDataList is a list whose elements correspond to one test prediction each. 
    weightsDataList = [(np.repeat(1 / len(neighbors), len(neighbors)), np.array(neighbors)) for neighbors in neighborsList]

    weightsDataList = restructureWeightsDataList(weightsDataList = weightsDataList, 
                                                 outputType = outputType, 
                                                 y = self.y,
                                                 scalingList = scalingList,
                                                 equalWeights = True)

    return weightsDataList

In [None]:
show_doc(SAA.getWeightsData)

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()