In [None]:
%load_ext autoreload
%autoreload 2

# wSAA

> Module description for wSAA classes

In [None]:
#| default_exp wSAA

In [None]:
#| hide
from nbdev.showdoc import *

# from nbdev.qmd import *

## Packages

In [None]:
#| export
from __future__ import annotations
from fastcore.docments import *
from fastcore.test import *
from fastcore.utils import *

import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor

from dddex.baseWeightsPredictor import BaseWeightsBasedPredictor, restructureWeightsDataList

## wSAA - Random Forest

In [None]:
#|export 

class RandomForestWSAA(RandomForestRegressor, BaseWeightsBasedPredictor):
    
    def fit(self, X, Y):

        super(RandomForestRegressor, self).fit(X = X, y = Y)
        
        self.Y = Y
        self.leafIndicesTrain = self.apply(X)
        

In [None]:
show_doc(RandomForestWSAA)

---

[source](https://github.com/kaiguender/dddex/blob/main/dddex/wSAA.py#L19){target="_blank" style="float:right; font-size:smaller"}

### RandomForestWSAA

>      RandomForestWSAA (**kwargs)

A random forest regressor.

A random forest is a meta estimator that fits a number of classifying
decision trees on various sub-samples of the dataset and uses averaging
to improve the predictive accuracy and control over-fitting.
The sub-sample size is controlled with the `max_samples` parameter if
`bootstrap=True` (default), otherwise the whole dataset is used to build
each tree.

Read more in the :ref:`User Guide <forest>`.

In [None]:
show_doc(RandomForestWSAA.fit)

---

[source](https://github.com/kaiguender/dddex/blob/main/dddex/wSAA.py#L24){target="_blank" style="float:right; font-size:smaller"}

### RandomForestWSAA.fit

>      RandomForestWSAA.fit (X, Y)

Build a forest of trees from the training set (X, y).

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| X | {array-like, sparse matrix} of shape (n_samples, n_features) | The training input samples. Internally, its dtype will be converted<br>to ``dtype=np.float32``. If a sparse matrix is provided, it will be<br>converted into a sparse ``csc_matrix``. |
| Y |  |  |
| **Returns** | **object** |  |

In [None]:
#|export

@patch
def getWeightsData(self: RandomForestWSAA, 
                   X: np.ndarray, # Feature matrix of samples for which conditional density estimates are computed.
                   outputType: 'all' | # Specifies structure of output.
                               'onlyPositiveWeights' | 
                               'summarized' | 
                               'cumulativeDistribution' | 
                               'cumulativeDistributionSummarized' = 'onlyPositiveWeights', 
                   scalingList: list | np.ndarray | None = None, # List or array with same size as self.Y containing floats being multiplied with self.Y.
                   ):

    leafIndicesDf = self.apply(X)

    weightsDataList = list()

    for leafIndices in leafIndicesDf:
        leafComparisonMatrix = (self.leafIndicesTrain == leafIndices) * 1
        nObsInSameLeaf = np.sum(leafComparisonMatrix, axis = 0)

        # It can happen that RF decides that the best strategy is to fit no tree at
        # all and simply average all results (happens when min_child_sample is too high, for example).
        # In this case 'leafComparisonMatrix' mustn't be averaged because there has been only a single tree.
        if len(leafComparisonMatrix.shape) == 1:
            weights = leafComparisonMatrix / nObsInSameLeaf
        else:
            weights = np.mean(leafComparisonMatrix / nObsInSameLeaf, axis = 1)

        weightsPosIndex = np.where(weights > 0)[0]

        weightsDataList.append((weights[weightsPosIndex], weightsPosIndex))

    #---

    weightsDataList = restructureWeightsDataList(weightsDataList = weightsDataList, 
                                                 outputType = outputType, 
                                                 Y = self.Y, 
                                                 scalingList = scalingList,
                                                 equalWeights = False)

    return weightsDataList

In [None]:
show_doc(RandomForestWSAA.getWeightsData)

---

[source](https://github.com/kaiguender/dddex/blob/main/dddex/wSAA.py#L42){target="_blank" style="float:right; font-size:smaller"}

### RandomForestWSAA.getWeightsData

>      RandomForestWSAA.getWeightsData (X:numpy.ndarray, outputType:Union[Forwar
>                                       dRef('all'),ForwardRef('onlyPositiveWeig
>                                       hts'),ForwardRef('summarized'),ForwardRe
>                                       f('cumulativeDistribution'),ForwardRef('
>                                       cumulativeDistributionSummarized')]='onl
>                                       yPositiveWeights', scalingList:Union[lis
>                                       t,numpy.ndarray,NoneType]=None)

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| X | np.ndarray |  | Feature matrix of samples for which conditional density estimates are computed. |
| outputType | 'all' \| 'onlyPositiveWeights' \| 'summarized' \| 'cumulativeDistribution' \| 'cumulativeDistributionSummarized' | onlyPositiveWeights | Specifies structure of output. |
| scalingList | list \| np.ndarray \| None | None | List or array with same size as self.Y containing floats being multiplied with self.Y. |

In [None]:
# #| export

# @patch
# def predict(self: RandomForestWSAA, 
#             X: np.ndarray, # Feature matrix of samples for which an estimation of conditional quantiles is computed.
#             probs: list | np.ndarray = [0.1, 0.5, 0.9], # Probabilities for which the estimated conditional p-quantiles are computed.
#             outputAsDf: bool = False, # Output is either a dataframe with 'probs' as cols or a dict with 'probs' as keys.
#             scalingList: list | np.ndarray | None = None, # List or array with same size as self.Y containing floats being multiplied with self.Y.
#             ):

#     quantileRes = super(BaseWeightsBasedPredictor, self).predict(X = X,
#                                                                  probs = probs,
#                                                                  outputAsDf = outputAsDf,
#                                                                  scalingList = scalingList)

#     return quantileRes

In [None]:
show_doc(RandomForestWSAA.predict)

---

[source](https://github.com/kaiguender/dddex/blob/main/dddex/wSAA.py#L84){target="_blank" style="float:right; font-size:smaller"}

### RandomForestWSAA.predict

>      RandomForestWSAA.predict (X:numpy.ndarray,
>                                probs:Union[list,numpy.ndarray]=[0.1, 0.5,
>                                0.9], outputAsDf:bool=False, scalingList:Union[
>                                list,numpy.ndarray,NoneType]=None)

Predict regression target for X.

The predicted regression target of an input sample is computed as the
mean predicted regression targets of the trees in the forest.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| X | np.ndarray |  | Feature matrix of samples for which an estimation of conditional quantiles is computed. |
| probs | list \| np.ndarray | [0.1, 0.5, 0.9] | Probabilities for which the estimated conditional p-quantiles are computed. |
| outputAsDf | bool | False | Output is either a dataframe with 'probs' as cols or a dict with 'probs' as keys. |
| scalingList | list \| np.ndarray \| None | None | List or array with same size as self.Y containing floats being multiplied with self.Y. |
| **Returns** | **ndarray of shape (n_samples,) or (n_samples, n_outputs)** |  | **The predicted values.** |

## SAA

In [None]:
#| export

class SAA(BaseWeightsBasedPredictor):
    """SAA is a featureless approach that assumes the density of the target variable is given
    by assigning equal probability to each historical observation of said target variable."""
    
    def __init__(self):
        
        self.Y = None
        
    def __str__(self):
        return "SAA()"
    __repr__ = __str__     
    

In [None]:
show_doc(SAA)

---

[source](https://github.com/kaiguender/dddex/blob/main/dddex/wSAA.py#L99){target="_blank" style="float:right; font-size:smaller"}

### SAA

>      SAA ()

SAA is a featureless approach that assumes the density of the target variable is given
by assigning equal probability to each historical observation of said target variable.

In [None]:
#| export

@patch
def fit(self: SAA, 
        Y: np.ndarray, # Target values which form the estimated density function based on the SAA algorithm.
        ):
    self.Y = Y

In [None]:
show_doc(SAA.fit)

---

[source](https://github.com/kaiguender/dddex/blob/main/dddex/wSAA.py#L114){target="_blank" style="float:right; font-size:smaller"}

### SAA.fit

>      SAA.fit (Y:numpy.ndarray)

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| Y | np.ndarray | Target values which form the estimated density function based on the SAA algorithm. |

In [None]:
#| export

@patch
def getWeightsData(self: SAA, 
                   X: np.ndarray, # Feature matrix for whose rows conditional density estimates are computed.
                   outputType: 'all' | # Specifies structure of output.
                               'onlyPositiveWeights' | 
                               'summarized' | 
                               'cumulativeDistribution' | 
                               'cumulativeDistributionSummarized' = 'onlyPositiveWeights', 
                   scalingList: list | np.ndarray | None = None, # List or array with same size as self.Y containing floats being multiplied with self.Y.
                   ):

    if X is None:
        neighborsList = [np.arange(len(self.Y))]
    else:
        neighborsList = [np.arange(len(self.Y)) for i in range(X.shape[0])]

    # weightsDataList is a list whose elements correspond to one test prediction each. 
    weightsDataList = [(np.repeat(1 / len(neighbors), len(neighbors)), np.array(neighbors)) for neighbors in neighborsList]

    weightsDataList = restructureWeightsDataList(weightsDataList = weightsDataList, 
                                                 outputType = outputType, 
                                                 Y = self.Y,
                                                 scalingList = scalingList,
                                                 equalWeights = True)

    return weightsDataList

In [None]:
show_doc(SAA.getWeightsData)

---

[source](https://github.com/kaiguender/dddex/blob/main/dddex/wSAA.py#L121){target="_blank" style="float:right; font-size:smaller"}

### SAA.getWeightsData

>      SAA.getWeightsData (X:numpy.ndarray, outputType:Union[ForwardRef('all'),F
>                          orwardRef('onlyPositiveWeights'),ForwardRef('summariz
>                          ed'),ForwardRef('cumulativeDistribution'),ForwardRef(
>                          'cumulativeDistributionSummarized')]='onlyPositiveWei
>                          ghts',
>                          scalingList:Union[list,numpy.ndarray,NoneType]=None)

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| X | np.ndarray |  | Feature matrix for whose rows conditional density estimates are computed. |
| outputType | 'all' \| 'onlyPositiveWeights' \| 'summarized' \| 'cumulativeDistribution' \| 'cumulativeDistributionSummarized' | onlyPositiveWeights | Specifies structure of output. |
| scalingList | list \| np.ndarray \| None | None | List or array with same size as self.Y containing floats being multiplied with self.Y. |

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()