In [1]:
import os
import numpy as np
import pandas as pd

import mliv.dgps_nested as dgps
import matplotlib.pyplot as plt

from mliv.rkhs import RKHSIV, ApproxRKHSIVCV, RKHSIVCV, RKHSIVL2CV, RKHSIVL2, RKHS2IV

from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
import os
import numpy as np
import mliv.dgps_mediated as dgps
import sys

# Now you can import the module
from dml_mediated import DML_mediated

In [11]:

fn_number = 0
tau_fn = dgps.get_tau_fn(fn_number)
tauinv_fn = dgps.get_tauinv_fn(fn_number)
W, Z, X, M, D, Y, tau_fn = dgps.get_data(2000, tau_fn)

V = np.random.rand(Y.shape[0])
V = V.reshape(-1, 1)

print(np.column_stack((W,X,Z)).shape)
ind = np.where(D==0)[0]
W0 = W[ind]
X0 = X[ind,:]
W0_test = np.zeros((1000, 1+X.shape[1]))
W0_test += np.median(np.column_stack((X0,W0)), axis=0, keepdims=True)
W0_test[:, 2] = np.linspace(np.percentile(
            W0[:, 0], 5), np.percentile(W0[:, 0], 95), 1000)

# True parameters
b_yd = 2.0; b_ym = 1.0; b_yx = np.array([[-1.0],[-1.0]]); b_yu = -1.0; b_yw = 2.0; b_y0 = 2.0
b_wx = np.array([[0.2],[0.2]]); b_wu = -0.6; b_w0 = 0.3
b_md = -0.3; b_mx = np.array([[-0.5],[-0.5]]); b_mu = 0.4; b_m0 = 0.0
    
gamma_1w = (b_yw*b_wu + b_yu)/b_wu
gamma_1x = b_yw*b_wx + b_yx - gamma_1w*b_wx
gamma_1m = b_ym
gamma_10 = b_y0 + b_yd + b_yw*b_w0 - gamma_1w*b_w0

gamma_0w = (gamma_1m*b_mu + gamma_1w*b_wu)/b_wu
gamma_0x = gamma_1m*b_mx + gamma_1w*b_wx + gamma_1x - gamma_0w*b_wx
gamma_00 = gamma_10 + gamma_1m*b_m0 + gamma_1w*b_w0 - gamma_0w*b_w0

    # True nuisance function
expected_te = gamma_00 + tauinv_fn(W0_test)@np.row_stack((gamma_0x, gamma_0w))
D_ = D.copy()


(2000, 4)


In [31]:
from sklearn.metrics.pairwise import pairwise_kernels
from sklearn.model_selection import KFold
from sklearn.kernel_approximation import Nystroem, RBFSampler
import numpy as np
import scipy


def _check_auto(param):
    return (isinstance(param, str) and (param == 'auto'))


class _BaseRKHSIV:

    def __init__(self, *args, **kwargs):
        return

    def _get_delta(self, n):
        '''
        delta -> Critical radius
        '''
        delta_scale = 5 if _check_auto(self.delta_scale) else self.delta_scale
        delta_exp = .4 if _check_auto(self.delta_exp) else self.delta_exp
        return delta_scale / (n**(delta_exp))

    def _get_alpha_scale(self):
        return 60 if _check_auto(self.alpha_scale) else self.alpha_scale

    def _get_alpha_scales(self):
        return ([c for c in np.geomspace(0.1, 1e4, self.n_alphas)]
                if _check_auto(self.alpha_scales) else self.alpha_scales)

    def _get_alpha(self, delta, alpha_scale):
        return alpha_scale * (delta**4)

    def _get_kernel(self, X, Y=None):
        if callable(self.kernel):
            params = self.kernel_params or {}
        else:
            params = {"gamma": self.gamma,
                      "degree": self.degree,
                      "coef0": self.coef0}
        return pairwise_kernels(X, Y, metric=self.kernel,
                                filter_params=True, **params)


class RKHSIV(_BaseRKHSIV):

    def __init__(self, kernel='rbf', gamma=2, degree=3, coef0=1,
                 delta_scale='auto', delta_exp='auto', alpha_scale='auto',
                 kernel_params=None):
        """
        Parameters:
            kernel : a pairwise kernel function or a string; similar interface with KernelRidge in sklearn
            gamma : the gamma parameter for the kernel
            degree : the degree of a polynomial kernel
            coef0 : the zero coef for a polynomia kernel
            delta_scale : the scale of the critical radius; delta_n = delta_scal / n**(delta_exp)
            delta_exp : the exponent of the critical radius; delta_n = delta_scal / n**(delta_exp)
            alpha_scale : the scale of the regularization; alpha = alpha_scale * (delta**4)
            kernel_params : other kernel params passed to the kernel
        """
        self.kernel = kernel
        self.degree = degree
        self.coef0 = coef0
        self.gamma = gamma
        self.kernel_params = kernel_params
        self.delta_scale = delta_scale  # worst-case critical value of RKHS spaces
        self.delta_exp = delta_exp
        self.alpha_scale = alpha_scale  # regularization strength from Theorem 5

    def fit(self, Z, T, Y):
        n = Y.shape[0]  # number of samples
        delta = self._get_delta(n)
        alpha = self._get_alpha(delta, self._get_alpha_scale())

        Kh = self._get_kernel(T)
        Kf = self._get_kernel(Z)

        RootKf = scipy.linalg.sqrtm(Kf).astype(float)
        M = np.linalg.inv(
            Kf  + np.eye(n) ) @ Kf
        # M = 2 * Kf @ (np.eye(n) - Kf/(n * delta**2) + Kf @ Kf/(n**2 * delta**4))
        # M = 2 * Kf @ (np.eye(n) - Kf/(n * delta**2))
        # M = Kf
        self.T = T.copy()
        self.a = np.linalg.pinv(Kh @ M @ Kh + alpha * Kh) @ Kh @ M @ Y
        return self

    def predict(self, T_test):
        return self._get_kernel(T_test, Y=self.T) @ self.a

    def score(self, Z, T, Y, delta='auto'):
        n = Y.shape[0]
        delta = self._get_delta(n)
        Kf = self._get_kernel(Z)
        RootKf = scipy.linalg.sqrtm(Kf).astype(float)
        M = RootKf @ np.linalg.inv(
            Kf / (2 * n * delta**2) + np.eye(n) / 2) @ RootKf
        Y_pred = self.predict(T)
        return ((Y - Y_pred).T @ M @ (Y - Y_pred))[0, 0] / n**2


class RKHSIVL2(_BaseRKHSIV):

    def __init__(self, kernel='rbf', gamma=2, degree=3, coef0=1,
                 delta_scale='auto', delta_exp='auto', kernel_params=None):
        """
        Parameters:
            kernel : a pairwise kernel function or a string; similar interface with KernelRidge in sklearn
            gamma : the gamma parameter for the kernel
            degree : the degree of a polynomial kernel
            coef0 : the zero coef for a polynomia kernel
            kernel_params : other kernel params passed to the kernel
        """
        self.kernel = kernel
        self.degree = degree
        self.coef0 = coef0
        self.gamma = gamma
        self.kernel_params = kernel_params
        self.delta_scale = delta_scale  # worst-case critical value of RKHS spaces
        self.delta_exp = delta_exp

    def fit(self, Z, T, Y):
        n = Y.shape[0]  # number of samples
        delta = self._get_delta(n)
        alpha = delta**4

        Kh = self._get_kernel(T)
        Kf = self._get_kernel(Z)

        M = np.linalg.pinv(Kf) @ Kf
        self.T = T.copy()
        self.a = np.linalg.pinv(Kh @ M @ Kh + alpha * Kh @ Kh) @ Kh @ M @ Y 
        return self

    def predict(self, T_test):
        return self._get_kernel(T_test, Y=self.T) @ self.a



In [32]:
rkhs_model = RKHSIVL2(kernel='rbf', gamma=.1, delta_scale='auto', delta_exp=.4)

In [33]:

dml_rkhs = DML_mediated(Y, D, M, W, Z, X,
                        estimator='OR',
                        model1 = rkhs_model,
                        model2 = rkhs_model,
                        modelq1 = rkhs_model,
                        modelq2 = rkhs_model,
                n_folds=5, n_rep=1)
print(dml_rkhs.dml())


Rep: 1


100%|██████████| 5/5 [00:03<00:00,  1.42it/s]

(3.915927100396628, 18.231047137046808, array([3.72879905, 4.10305515]))





In [34]:

ind = np.where(D==1)[0]
M1 = M[ind]
W1 = W[ind]
X1 = X[ind,:]
Z1 = Z[ind]
Y1 = Y[ind]


A2 = np.column_stack((M1,X1,Z1))
A1 = np.column_stack((M1,X1,W1))

bridge_1 = rkhs_model.fit(A2, A1, Y1)


A1 = np.column_stack((M,X,W))
bridge_1_hat = bridge_1.predict(A1)
hola = bridge_1.predict(np.column_stack((M,X,W)))
print(np.mean(hola))
print(np.var(hola))
bridge_1_hat = bridge_1_hat.reshape(A1.shape[:1] + Y.shape[1:])
       
   
ind = np.where(D==0)[0]
W0 = W[ind]
X0 = X[ind,:]
Z0 = Z[ind]
bridge_1_hat = bridge_1_hat[ind]

B2 = np.column_stack((Z0,X0))
B1 = np.column_stack((X0,W0))


bridge_2 = rkhs_model.fit(B2, B1, bridge_1_hat)
       
gamma_0_hat = bridge_2.predict(np.column_stack((X, W))).reshape(-1, 1)

print(np.mean(gamma_0_hat))
print(np.var(gamma_0_hat))

3.840539503054925
14.557440064081337
4.013673818987448
12.288107410558663


In [None]:
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

from sklearn.metrics.pairwise import pairwise_kernels
from sklearn.model_selection import KFold
from sklearn.kernel_approximation import Nystroem, RBFSampler
import numpy as np
import scipy


def _check_auto(param):
    return (isinstance(param, str) and (param == 'auto'))


class _BaseRKHS2IV:

    def __init__(self, *args, **kwargs):
        return

    def _get_delta(self, n):
        '''
        delta -> Critical radius
        '''
        delta_scale = 5 if _check_auto(self.delta_scale) else self.delta_scale
        delta_exp = .4 if _check_auto(self.delta_exp) else self.delta_exp
        return delta_scale / (n**(delta_exp))

    def _get_alpha_scale(self):
        return 60 if _check_auto(self.alpha_scale) else self.alpha_scale

    def _get_alpha_scales(self):
        return ([c for c in np.geomspace(0.1, 1e4, self.n_alphas)]
                if _check_auto(self.alpha_scales) else self.alpha_scales)

    def _get_alpha(self, delta, alpha_scale):
        return alpha_scale * (delta**4)

    def _get_kernel(self, X, Y=None):
        if callable(self.kernel):
            params = self.kernel_params or {}
        else:
            params = {"gamma": self.gamma,
                      "degree": self.degree,
                      "coef0": self.coef0}
        return pairwise_kernels(X, Y, metric=self.kernel,
                                filter_params=True, **params)


class RKHS2IV(_BaseRKHS2IV):

    def __init__(self, kernel='rbf', gamma=2, degree=3, coef0=1,
                 delta_scale='auto', delta_exp='auto', kernel_params=None):
        """
        Parameters:
            kernel : a pairwise kernel function or a string; similar interface with KernelRidge in sklearn
            gamma : the gamma parameter for the kernel
            degree : the degree of a polynomial kernel
            coef0 : the zero coef for a polynomial kernel
            kernel_params : other kernel params passed to the kernel
        """
        self.kernel = kernel
        self.degree = degree
        self.coef0 = coef0
        self.gamma = gamma
        self.kernel_params = kernel_params
        self.delta_scale = delta_scale  # worst-case critical value of RKHS spaces
        self.delta_exp = delta_exp

    def fit(self, A, B, C, D, Y, subsetted=False, subset_ind1=None, subset_ind2=None):
        if subsetted:
            if subset_ind1 is None:
                raise ValueError("subset_ind1 must be provided when subsetted is True")
            if len(subset_ind1) != len(Y):
                raise ValueError("subset_ind1 must have the same length as Y")

        n = Y.shape[0]  # number of samples
        Id = np.eye(n)

        if subsetted:
            ind1 = np.where(subset_ind1==1)[0] 
            ind2 = np.where(subset_ind2==1)[0] if subset_ind2 is not None else np.where(subset_ind1==0)[0] 
            Ip = Id[ind1, :]
            Iq = Id[ind2, :] 
            p = Ip.shape[0]
            q = Iq.shape[0]

        delta = self._get_delta(n)
        alpha = delta**4

        Ka = self._get_kernel(A)
        Kb = self._get_kernel(B)
        Kc = self._get_kernel(C) if not subsetted else Iq @ self._get_kernel(C) @ Iq.T
        Kd = self._get_kernel(D) if not subsetted else Ip @ self._get_kernel(D) @ Ip.T

        Pc = np.linalg.pinv(Kc + Id) @ Kc if not subsetted else (n/q) * Iq.T @ np.linalg.pinv(Kc + Iq @ Iq.T) @ Kc @ Iq
        Pd = np.linalg.pinv(Kd + Id) @ Kd if not subsetted else (n/p) * Ip.T @ np.linalg.pinv(Kd + Ip @ Ip.T) @ Kd @ Ip

        KbPcKa_inv = np.linalg.pinv(Kb @ Pc @ Ka)

        M = Ka @ (Pc + (Pd @ Ka + Pc @ Ka + alpha * Id) @ KbPcKa_inv @ (Kb @ Pc + alpha * Id)) @ Kb
        
        self.b = np.linalg.pinv(M) @ Ka @ Pd @ Y
        self.a = KbPcKa_inv @ (Kb @ Pc + alpha * Id) @ Kb @ self.b

        self.A = A.copy()
        self.B = B.copy()
        return self

    def predict(self, B_test, *args):
        if len(args) == 0:
            # Only B_test provided, return h prediction
            return self._get_kernel(B_test, Y=self.B) @ self.b
        elif len(args) == 1:
            # Two arguments provided, assume the second is A_test
            A_test = args[0]
            return (self._get_kernel(B_test, Y=self.B) @ self.b, self._get_kernel(A_test, Y=self.A) @ self.a)
        else:
            # More than one additional argument provided, raise an error
            raise ValueError("predict expects at most two arguments, B_test and optionally A_test")


In [59]:
rkhs2_model = RKHS2IV(kernel='rbf', coef0=0, gamma=.1, delta_scale='auto', delta_exp=.4)

In [60]:
A = np.column_stack((M,X,W))
D = np.column_stack((M,X,Z))
B = np.column_stack((X,W))
C = np.column_stack((X,Z))
ones = np.ones(2000)
ones[0] = 0
ind = np.where(D_==0)[0]
B_test = B[ind]

rkhs2_pred, hola= rkhs2_model.fit(A, B, C, D, Y, subsetted=True, subset_ind1=D_).predict(B, A)
print(np.mean(rkhs2_pred))
print(np.var(rkhs2_pred))
print(np.mean(hola))
print(np.var(hola))


-0.7822199006576444
29872.5368857442
1.9692111221427215
1839.9714188141634
