In [None]:
#
##
### COMBINED NEURAL NETWORK

import numpy as np
import pandas as pd
from typing import List
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import OneHotEncoder, PowerTransformer

class MultiOutputTransformer(BaseEstimator, TransformerMixin):

    def fit(self, y):
        if isinstance(y, pd.DataFrame):
            y = y.values
        y_class, y_reg = y[:, 0].reshape(-1,1), y[:, 1].reshape(-1,1)

        self.class_encoder_ = OneHotEncoder(sparse=False)
        self.reg_transformer_ = PowerTransformer()
        # Fit them to the input data
        self.class_encoder_.fit(y_class)
        self.reg_transformer_.fit(y_reg)
        # Save the number of classes
        self.n_classes_ = len(self.class_encoder_.categories_)
        self.n_outputs_expected_ = 2
        return self

    def transform(self, y: np.ndarray) -> List[np.ndarray]:
        if isinstance(y, pd.DataFrame):
            y = y.values
        y_class, y_reg = y[:, 0].reshape(-1,1), y[:, 1].reshape(-1,1)
        # Apply transformers to input array
        y_class = self.class_encoder_.transform(y_class)
        y_reg = self.reg_transformer_.transform(y_reg)
        # Split the data into a list
        return [y_class, y_reg]

    def inverse_transform(self, y: List[np.ndarray], return_proba: bool = False) -> np.ndarray:
        y_pred_reg = y[1]
        if return_proba:
            return y[0]
        else:
            y_pred_class = np.zeros_like(y[0])
            y_pred_class[np.arange(len(y[0])), np.argmax(y[0], axis=1)] = 1
            y_pred_class = self.class_encoder_.inverse_transform(y_pred_class)
        y_pred_reg = self.reg_transformer_.inverse_transform(y_pred_reg)
        return np.column_stack([y_pred_class, y_pred_reg])

    def get_metadata(self):
        return {
            "n_classes_": self.n_classes_,
            "n_outputs_expected_": self.n_outputs_expected_,
        }

from scikeras.wrappers import BaseWrapper
from tensorflow.keras.initializers import HeNormal, LecunNormal, HeNormal
from tensorflow.keras.layers import Input, Dense, BatchNormalization, concatenate, LeakyReLU
from tensorflow.keras import Model

class CombiNet(BaseWrapper):

    def __init__(self, activation = "selu",
        se_layers=1, se_units=256,
        re_layers=5, re_units=100,
        ce_layers=5, ce_units=100, cc_units=75,
        epochs=10, verbose=0,
        optimizer="adam", optimizer__clipvalue=1.0, **kwargs):
            super().__init__(**kwargs)
            self.activation = activation
            self.se_layers = se_layers
            self.se_units = se_units
            self.re_layers = re_layers
            self.re_units = re_units
            self.ce_layers = ce_layers
            self.ce_units = ce_units
            self.cc_units = cc_units
            self.epochs = epochs
            self.verbose = verbose
            self.prediction_scope = {"classification":0,"regression":1,"full":range(2)}

    def _get_weight_init(self):
        if isinstance(self.activation, LeakyReLU):
            
            init = HeNormal()
        elif self.activation in ["selu", "elu"]:
            init = LecunNormal()
        else:
            init = HeNormal()  
        return init

    def _keras_build_fn(self, compile_kwargs):
        weight_init = self._get_weight_init()

        # shared extraction
        inp = Input(shape=(self.n_features_in_))
        fe = inp
        for i in range(self.se_layers):
            fe = Dense(self.se_units, self.activation,
                kernel_initializer=weight_init)(fe)
            fe = BatchNormalization()(fe)
        # regression branch
        re = fe
        for i in range(self.re_layers):
            re = Dense(self.re_units, self.activation,
                kernel_initializer=weight_init)(re)
            re = BatchNormalization()(re)
        rr_head = Dense(1,"linear")(re)
        # classification branch
        ce = fe
        for i in range(self.ce_layers):
            ce = Dense(self.ce_units, self.activation,
                kernel_initializer=weight_init)(ce)
            ce = BatchNormalization()(ce)
        cc = Dense(self.cc_units, self.activation,
            kernel_initializer=weight_init)(concatenate([ce, re]))
        cc = BatchNormalization()(cc)
        cc_head = Dense(2, "softmax")(cc)

        model = Model(inputs=inp, outputs=[cc_head, rr_head])
        model.compile(loss=["categorical_crossentropy","mse"], loss_weights=[.5,.5],
            optimizer=compile_kwargs["optimizer"])
        return model
        
    @property
    def target_encoder(self):
        return MultiOutputTransformer()
        
    def predict_proba(self, X):
        X = self.feature_encoder_.transform(X)
        y_pred = self.model_.predict(X)
        return self.target_encoder_.inverse_transform(y_pred, return_proba=True)

    def predict(self, X, scope="classification"):
        X = self.feature_encoder_.transform(X)
        y_pred = self.model_.predict(X)
        y_pred = self.target_encoder_.inverse_transform(y_pred)
        return y_pred[:,self.prediction_scope[scope]]

In [None]:
#
##
### FEATURE SELECTION

import numpy as np
import pandas as pd
from scipy.stats import pearsonr
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.feature_selection import SelectorMixin
from imblearn.pipeline import Pipeline
from sklearn.random_projection import GaussianRandomProjection
from sklearn.cluster import AgglomerativeClustering
from sklearn.utils.validation import check_is_fitted

class DataFrameTransposer(BaseEstimator, TransformerMixin):
  
    def fit(self, X, y=None):
        return self
    def transform(self, X, y=None):
        return X.copy().T

class HierarchicalFeatureSelector(SelectorMixin, BaseEstimator):
  
    def __init__(self, n_features=10, alpha=0.001):
        self.n_features = n_features
        self.alpha = alpha    

    def _get_cluster_assignments(self, data):
        data = data.loc[:,self.results_.feature.values]
        n_components = data.shape[1]
        pipe = Pipeline([("rotate", DataFrameTransposer()),
            ("pca", GaussianRandomProjection(n_components=n_components)),
            ("cluster", AgglomerativeClustering(n_clusters=self.n_features))])
        return pipe.fit_predict(data)
    
    def _get_correlations(self, X, y):
        tf_corr = [pearsonr(y, X[c]) for c in X.columns]
        correlations = pd.DataFrame(tf_corr, index=X.columns).reset_index()
        correlations.columns = ["feature", "r", "p"]
        correlations["abs_r"] = correlations.r.abs()
        correlations["sf"] = correlations.p<=self.alpha/X.shape[1]
        return correlations
    
    def fit(self, X, y):
        if len(y.shape)>1:
            y = y[:,0]
        X = pd.DataFrame(X)
        y = pd.Series(y)

        self.in_features_ =  X.columns
        self.results_ = self._get_correlations(X, y)

        if np.sum(self.results_.sf)<= self.n_features:
            self.best_ = self.results_[self.results_.sf]
        else:
            self.results_["cluster"] = self._get_cluster_assignments(X)
            self.best_ = self.results_[self.results_.sf]\
                .merge(self.results_.groupby("cluster",
                    as_index=False).abs_r.max(), on=["cluster", "abs_r"])\
                        .drop_duplicates(["cluster", "abs_r"]).dropna()
        return self
    
    def _get_support_mask(self):
        return np.array([c in set(self.best_.feature) for c in self.in_features_])

In [None]:
#
##
### SAMPLING STRATEGIES
# NOTE: REDO THIS USING DECORATORS

from imblearn import FunctionSampler
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import RandomOverSampler

def _sampling(X, y, sampling_instance):
    if len(y.shape)==1:
        _ = sampling_instance.fit_resample(X, y)
        return X[sampling_instance.sample_indices_,:], y[sampling_instance.sample_indices_]
    else:
        _ = sampling_instance.fit_resample(X, y[:,0])
        return X[sampling_instance.sample_indices_,:], y[sampling_instance.sample_indices_,:]

class _RandomUnderSampler(FunctionSampler):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.validate = False
        self.func = _sampling
        self.kw_args = {"sampling_instance":RandomUnderSampler()}

class _RandomOverSampler(FunctionSampler):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.validate = False
        self.func = _sampling
        self.kw_args = {"sampling_instance":RandomOverSampler()}

In [None]:
import pandas as pd

# dtype opt
def _optimize_numeric_dtypes(df):
    import pandas as pd
    float_cols = df.select_dtypes("float").columns
    int_cols = df.select_dtypes("integer").columns
    df[float_cols] = df[float_cols].\
        apply(pd.to_numeric, downcast="float")
    df[int_cols] = df[int_cols].\
        apply(pd.to_numeric, downcast="integer")
    return df

# data load
data = _optimize_numeric_dtypes(
    pd.read_parquet("../data/customer_model/retailrocket/"))
train = data[data.week_step>2]
test = data[data.week_step==2]

out_cols = ["user_id", "target_event", "target_revenue", "week_step",
    "target_cap"]
feat_cols = [c for c in train.columns if c not in set(out_cols)]
target_cols = ["target_event", "target_cap"]

X = train.loc[:,feat_cols].values
y = train.loc[:,target_cols].values

In [None]:
from imblearn.pipeline import Pipeline
from sklearn.feature_selection import VarianceThreshold
from sklearn.preprocessing import PowerTransformer, QuantileTransformer

pipe = Pipeline([("variance_filter", VarianceThreshold(10)),
            ("data_scaler", PowerTransformer()),
            ("feature_selector", HierarchicalFeatureSelector(n_features=50)),
            ("data_sampler", _RandomOverSampler()),
            ("nn", CombiNet(se_layers=2, se_units=512, batch_size=16,
                re_layers=10, ce_layers=10, optimizer="rmsprop", epochs=10, verbose=1))])
pipe.fit(X, y)

In [None]:
from sklearn.metrics import f1_score, r2_score
y_pred = pipe.predict(test.loc[:,feat_cols], scope="full")
print("f1 : {:.3f}".format(f1_score(test["target_event"], y_pred[:,0])))
print("r2 : {:.3f}".format(r2_score(test["target_cap"], y_pred[:,1])))

In [None]:
# CALIBRATED CLASSIFIER
    # JUST FIT ON FOLD
    # FIT CAI
    # OVERLOAD PREDICT AS IN COMPOSED NET 
    # HOW ABOUT JUST PREFIT AND RECALIBRATION WITHOUT THE

In [None]:
import copy

import warnings
from inspect import signature
from functools import partial

from math import log
import numpy as np
from joblib import Parallel

from scipy.special import expit
from scipy.special import xlogy
from scipy.optimize import fmin_bfgs

from sklearn.base import (
    BaseEstimator,
    ClassifierMixin,
    RegressorMixin,
    clone,
    MetaEstimatorMixin,
    is_classifier,
)
from sklearn.preprocessing import label_binarize, LabelEncoder
from sklearn.utils import (
    column_or_1d,
    indexable,
    check_matplotlib_support,
)

from sklearn.utils.multiclass import check_classification_targets
from sklearn.utils.fixes import delayed
from sklearn.utils.validation import (
    _check_fit_params,
    _check_sample_weight,
    _num_samples,
    check_consistent_length,
    check_is_fitted,
)
from sklearn.utils import _safe_indexing
from sklearn.isotonic import IsotonicRegression
from sklearn.svm import LinearSVC
from sklearn.model_selection import check_cv, cross_val_predict
from sklearn.metrics._base import _check_pos_label_consistency
from sklearn.metrics._plot.base import _get_response



from sklearn.calibration import CalibratedClassifierCV

class DEVCC(CalibratedClassifierCV):

    def fit(self, X, y, sample_weight=None, **fit_params):
            
            self.y_ = copy.copy(y)
            if len(y.shape)>1:
                y = y[:,0]
            
            check_classification_targets(y)
            X, y = indexable(X, y)
            if sample_weight is not None:
                sample_weight = _check_sample_weight(sample_weight, X)

            for sample_aligned_params in fit_params.values():
                check_consistent_length(y, sample_aligned_params)

            if self.base_estimator is None:
                # we want all classifiers that don't expose a random_state
                # to be deterministic (and we don't want to expose this one).
                base_estimator = LinearSVC(random_state=0)
            else:
                base_estimator = self.base_estimator

            self.calibrated_classifiers_ = []
            if self.cv == "prefit":
                # `classes_` should be consistent with that of base_estimator
                check_is_fitted(self.base_estimator, attributes=["classes_"])
                self.classes_ = self.base_estimator.classes_

                pred_method, method_name = _get_prediction_method(base_estimator)
                n_classes = len(self.classes_)
                predictions = _compute_predictions(pred_method, method_name, X, n_classes)

                calibrated_classifier = _fit_calibrator(
                    base_estimator,
                    predictions,
                    y,
                    self.classes_,
                    self.method,
                    sample_weight,
                )
                self.calibrated_classifiers_.append(calibrated_classifier)
            else:
                # Set `classes_` using all `y`
                label_encoder_ = LabelEncoder().fit(y)
                self.classes_ = label_encoder_.classes_
                n_classes = len(self.classes_)

                # sample_weight checks
                fit_parameters = signature(base_estimator.fit).parameters
                supports_sw = "sample_weight" in fit_parameters
                if sample_weight is not None and not supports_sw:
                    estimator_name = type(base_estimator).__name__
                    warnings.warn(
                        f"Since {estimator_name} does not appear to accept sample_weight, "
                        "sample weights will only be used for the calibration itself. This "
                        "can be caused by a limitation of the current scikit-learn API. "
                        "See the following issue for more details: "
                        "https://github.com/scikit-learn/scikit-learn/issues/21134. Be "
                        "warned that the result of the calibration is likely to be "
                        "incorrect."
                    )

                # Check that each cross-validation fold can have at least one
                # example per class
                if isinstance(self.cv, int):
                    n_folds = self.cv
                elif hasattr(self.cv, "n_splits"):
                    n_folds = self.cv.n_splits
                else:
                    n_folds = None
                if n_folds and np.any(
                    [np.sum(y == class_) < n_folds for class_ in self.classes_]
                ):
                    raise ValueError(
                        f"Requesting {n_folds}-fold "
                        "cross-validation but provided less than "
                        f"{n_folds} examples for at least one class."
                    )
                cv = check_cv(self.cv, y, classifier=True)

                if self.ensemble:
                    parallel = Parallel(n_jobs=self.n_jobs)
                    self.calibrated_classifiers_ = parallel(
                        delayed(_fit_classifier_calibrator_pair)(
                            clone(base_estimator),
                            X,
                            y,
                            train=train,
                            test=test,
                            method=self.method,
                            classes=self.classes_,
                            supports_sw=supports_sw,
                            sample_weight=sample_weight,
                            **fit_params,
                        )
                        for train, test in cv.split(X, y)
                    )
                else:
                    this_estimator = clone(base_estimator)
                    _, method_name = _get_prediction_method(this_estimator)
                    fit_params = (
                        {"sample_weight": sample_weight}
                        if sample_weight is not None and supports_sw
                        else None
                    )
                    pred_method = partial(
                        cross_val_predict,
                        estimator=this_estimator,
                        X=X,
                        y=y,
                        cv=cv,
                        method=method_name,
                        n_jobs=self.n_jobs,
                        fit_params=fit_params,
                    )
                    predictions = _compute_predictions(
                        pred_method, method_name, X, n_classes
                    )

                    if sample_weight is not None and supports_sw:
                        this_estimator.fit(X, y, sample_weight=sample_weight)
                    else:
                        this_estimator.fit(X, y)
                    # Note: Here we don't pass on fit_params because the supported
                    # calibrators don't support fit_params anyway
                    calibrated_classifier = _fit_calibrator(
                        this_estimator,
                        predictions,
                        y,
                        self.classes_,
                        self.method,
                        sample_weight,
                    )
                    self.calibrated_classifiers_.append(calibrated_classifier)

            first_clf = self.calibrated_classifiers_[0].base_estimator
            if hasattr(first_clf, "n_features_in_"):
                self.n_features_in_ = first_clf.n_features_in_
            if hasattr(first_clf, "feature_names_in_"):
                self.feature_names_in_ = first_clf.feature_names_in_
            return self
    

In [None]:
import copy
def reduce_y(func):
    def actual_reduce(self, y, *args, **kwargs):
        if len(y.shape)>1:
            self.y_ = copy(y)
            y = y[:,0]
        func(self, y, *args, **kwargs)
    return actual_reduce


In [None]:
DEVCC(pipe).method