# [Multilevel Modeling](https://en.wikipedia.org/wiki/Multilevel_model)
Idea: Decompose the sum of random numbers into its contributions

for a given set of $x_i$ and $a_{ik}$ with
$$x_i = \Sigma_{j=0}^n \Sigma_{k=1}^{m_j} a_{ik}y_{jk}$$ 
with
* $m_j$ being the number of contributers of layer j
* $y_{jk}$ being the k-th contribution of layer j
* $m_0 = 1$ by default
* $a_{ik} \in \{0, 1\}$
* $\Sigma a_{ik} = 1$ only one contributer per layer

calculate the Distributions $y_{k} \sim N(μ_{k}, σ_{k})$
Boundary conditions:
* $\Sigma _{k} μ_k = 0$ via $μ_{m_j} = -\Sigma_{k = 1}^{m_j-1} μ_{k}$


Goal of this workbook to explore [Linear Models](https://scikit-learn.org/stable/modules/linear_model.html) see also [(Examples)](https://scikit-learn.org/stable/auto_examples/linear_model/index.html)


In [1]:
import numpy as np
import sklearn
from sklearn import linear_model
from sklearn.preprocessing import OneHotEncoder

In [2]:
def align_coefficients(coefs):
    """ensure that the average over all contributers per level is 0"""
    mem = []
    for lcoefs in coefs:
        avg = np.mean([m for (m, s) in lcoefs.values()])
        mem.append({k: (m-avg, s) for k, (m,s) in lcoefs.items()})
    
    return mem

def gen_contributer_coefficients(n_contributers, contributer_avg, contributer_sig):
    """creates for n_contributers the average and standard deviation
    Input:
    * n_contributers: list with number of influencers for each level
    * contributer_avg: average for the average of the influencers
    * contributer_sig: average for the stdev for the influencers (lognormal distribution) 
     
    Output:
    Dictionary {level : { influencer: (mu, sigma)}} with the coefficients for every
    influencer in each level
    """
    assert n_contributers[0] == 1, "First level is allowed to have one contributer"
    return [{i : (np.random.normal(contributer_avg), np.random.lognormal(contributer_sig)) for i in range(num)} for lvl, num in enumerate(n_contributers)]
    return align_coefficients([{i : (np.random.normal(contributer_avg), np.random.lognormal(contributer_sig)) for i in range(num)} for lvl, num in enumerate(n_contributers)])

def print_coefficients(contributer_coefficients):
    #contributer_coefficients = align_coefficients(contributer_coefficients)
    for m, stage in enumerate(contributer_coefficients):
        for i, (mu, sig) in stage.items():
            print(f"Stage {m}: Contributer {i} mu={mu:.2f}, sig={sig:.2f}")

def gen_data(contributer_coefficients, n_samples):
    """generate random data.
    The first level defines the baseline that holds for all random numbers
    
    Inputs:
    * n_samples: number of samples
    * contributer_coefficients: dictionary with (mean,sig) per level per contributer
    
    Outputs:
    * data: array with the final number
    * contributers: matrix defining the contributers, first column is for the first level"""
    
    assert len(contributer_coefficients[0]) == 1, "Level 0 defines the baseline. It should have exactly one contributer"
    #data = np.random.normal(gen_avg, gen_sig, n_samples)
    data = np.zeros((n_samples, ))
    contributers = np.zeros((n_samples, len(contributer_coefficients)))
    for lvl, cdict in enumerate(contributer_coefficients):
        #print(f"creating level {lvl}")
        lvl_influencers = len(cdict) #number of influencers in this level
        lvldata = np.zeros((n_samples, lvl_influencers))

        for i, (mu,sig) in cdict.items():
            lvldata[:,i] = np.random.normal(mu, sig, n_samples)

        selection = np.random.randint(low=0,
                                    high=lvl_influencers,
                                    size=(n_samples))
        contributers[:, lvl] = selection
        
        data += np.array([lvldata[row, col] for row, col in enumerate(selection)])
        # Note: The first level 
    return data, contributers[:, 1:].astype(int)

def generate(n_samples, contributer_coefficients):
    if contributer_coefficients == 2:
        n_contributers= len(contributer_coefficients[1])
        print("ATTENTION: just for one level")
    else:
        n_contributers = [len(level) for level in contributer_coefficients][1:]

    print("generating Data for")
    print_coefficients(contributer_coefficients)

    data, contributers = gen_data(n_samples=n_samples,
                              contributer_coefficients=contributer_coefficients)

    #contributers = contributers.squeeze(-1) # kill the first level - not needed
    return data, contributers, n_contributers

In [64]:
n_samples = 100000

cc_27 = [
    {    0 : (2, 5)},
    {    0: (2, 1),
        1: (-1, 1),
        2: (-1, 1)},
    {    0: (3, 1),
        1: (-1, 1),
        2: (-1, 1),
        3: (-1, 1)}
        ]

cc_27b = [
    {    0 : (2, 5)},
    {    0: (2, 1),
        1: (-1, 1),
        2: (-1, 3)},
    {    0: (2, 1),
        1: (-1, 1),
        2: (-1, 1),
        3: (0, 1)}
        ]


data, contributers, n_contributers = generate(n_samples, contributer_coefficients=cc_27b)

enc = OneHotEncoder(handle_unknown='ignore')
enc.fit(contributers)
contributers_ohe = enc.transform(contributers).toarray()
n_features = np.sum(n_contributers)
print(f"number of features: {n_features}")

generating Data for
Stage 0: Contributer 0 mu=2.00, sig=5.00
Stage 1: Contributer 0 mu=2.00, sig=1.00
Stage 1: Contributer 1 mu=-1.00, sig=1.00
Stage 1: Contributer 2 mu=-1.00, sig=3.00
Stage 2: Contributer 0 mu=2.00, sig=1.00
Stage 2: Contributer 1 mu=-1.00, sig=1.00
Stage 2: Contributer 2 mu=-1.00, sig=1.00
Stage 2: Contributer 3 mu=0.00, sig=1.00
number of features: 7


7

In [30]:
def show(coefficients, text = ""):

    l = list(coefficients)
    p = lambda i: [f"{l.pop(0):.3f}" for _ in range(i)]
    print(text) if text != "" else ""
    
    for i,n in enumerate(n_contributers):
        print(f"{i}: {p(n)}")
        
def eval(regressor, n_contributers=n_contributers):
    """print intercept and coefficients"""
    print(f"Intercept: {regressor.intercept_}")
    show(regressor.coef_)

# [Ridge Regression](https://scikit-learn.org/stable/modules/linear_model.html#regression)

In [15]:
reg = linear_model.Ridge(alpha=1, fit_intercept=True)
reg.fit(contributers_ohe, data)
eval(reg)

Intercept: 1.9681512365785025
0: ['1.908', '-0.932', '-0.975']
1: ['2.177', '-1.042', '-1.012', '-0.123']


# [Lasso](https://scikit-learn.org/stable/modules/linear_model.html#lasso)
--> [LassoLarsCV](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LassoLarsCV.html#sklearn.linear_model.LassoLarsCV)

In [16]:
reg = linear_model.LassoLarsCV(cv=5).fit(contributers_ohe, data)
reg.score(contributers_ohe, data)
eval(reg)

Intercept: 0.886657158656976
0: ['2.821', '0.000', '-0.023']
1: ['2.301', '-0.866', '-0.838', '0.000']


# [Elastic-Net](https://scikit-learn.org/stable/modules/linear_model.html#elastic-net)

In [17]:
reg = linear_model.ElasticNetCV(cv=5).fit(contributers_ohe, data)
eval(reg)

Intercept: 1.4958976611423003
0: ['2.250', '-0.581', '-0.625']
1: ['2.294', '-0.913', '-0.884', '0.000']


# [LARS-Lasso](https://scikit-learn.org/stable/modules/linear_model.html#lars-lasso)
--> finds biggest contributers

In [18]:
reg = linear_model.LassoLars(alpha=0.1).fit(contributers_ohe, data)
eval(reg)

Intercept: 0.6458608897258027
0: ['2.419', '0.000', '0.000']
1: ['2.295', '-0.108', '-0.117', '0.000']


# [Orthogonal Matching Pursuit](https://scikit-learn.org/stable/modules/linear_model.html#orthogonal-matching-pursuit-omp)

In [7]:
reg = linear_model.OrthogonalMatchingPursuit().fit(contributers_ohe, data)
eval(reg)

Intercept: 1.1001706748930251
0: [2.865314887852755, 0.0, 0.0]
1: [0.0, 0.0, 0.0, 0.0]


# [Bayesian Ridge Regression](https://scikit-learn.org/stable/modules/linear_model.html#bayesian-ridge-regression)

In [40]:
reg = linear_model.BayesianRidge(verbose=True).fit(contributers_ohe, data)
eval(reg)

print(f"precision of the noise: {reg.alpha_}")
print(f"Estimated precision of the weights: {reg.lambda_}")
print(f"Estimated variance-covariance matrix of the weights: {reg.sigma_}")
print(f"")

Convergence after  2  iterations
Intercept: 1.9903518245545497
0: ['2.003', '-0.987', '-1.016']
1: ['2.035', '-1.037', '-0.988', '-0.010']
precision of the noise: 0.033596669987209934
Estimated precision of the weights: 0.40927591723407725
Estimated variance-covariance matrix of the weights: [[ 8.15039474e-01  8.14152385e-01  8.14147604e-01  3.27145415e-06
  -4.27959187e-06 -4.91017499e-07  1.49915522e-06]
 [ 8.14152385e-01  8.15040319e-01  8.14146759e-01 -5.08046494e-06
   3.70194365e-06  9.15845238e-07  4.62676057e-07]
 [ 8.14147604e-01  8.14146759e-01  8.15045100e-01  1.80901079e-06
   5.77648226e-07 -4.24827741e-07 -1.96183127e-06]
 [ 3.27145415e-06 -5.08046494e-06  1.80901079e-06  6.11724678e-01
   6.10537209e-01  6.10539123e-01  6.10538453e-01]
 [-4.27959187e-06  3.70194365e-06  5.77648226e-07  6.10537209e-01
   6.11730405e-01  6.10536270e-01  6.10535579e-01]
 [-4.91017499e-07  9.15845238e-07 -4.24827741e-07  6.10539123e-01
   6.10536270e-01  6.11726598e-01  6.10537473e-01]
 [ 1.

# [Auomatic Relevance Determination](https://scikit-learn.org/stable/modules/linear_model.html#automatic-relevance-determination-ard)

In [54]:
reg = linear_model.ARDRegression(verbose=True,
                                 compute_score=True,
                                 ).fit(contributers_ohe, data)
eval(reg)

print(f"precision of the noise: {reg.alpha_}")

show(reg.lambda_, "Estimated precision of the weights:")

print(f"Estimated variance-covariance matrix of the weights:\n {reg.sigma_}")

print("Scores")
print(reg.scores_)

Converged after 49 iterations
Intercept: -0.0195114002739758
0: ['3.005', '0.000', '0.000']
1: ['3.044', '-0.008', '0.000', '0.997']
precision of the noise: 0.033596888807175006
Estimated precision of the weights:
0: ['0.111', '12385.628', '11902.197']
1: ['0.108', '2341.851', '130831.296', '1.005']
Estimated variance-covariance matrix of the weights:
 [[ 1.33445803e-03  8.08229456e-06 -8.65594155e-07  5.37931777e-06]
 [ 8.08229456e-06  1.87065032e-03  1.80352290e-04  6.84965547e-04]
 [-8.65594155e-07  1.80352290e-04  3.62147387e-04  1.80161405e-04]
 [ 5.37931777e-06  6.84965547e-04  1.80161405e-04  1.87452842e-03]]
Scores
[-219681.28420304903, -219684.744744543, -219679.26014689074, -219679.38364089004, -219680.1884062983, -219675.75162674865, -219675.9051049339, -219671.33456588793, -219671.334836234, -219671.33516932424, -219671.3355691393, -219671.3360370432, -219671.33657528216, -219671.33718692113, -219671.33787586793, -219671.33864690847, -219671.33950574862, -219671.34045905643

In [56]:
reg = linear_model.ARDRegression(verbose=True,
                                 compute_score=True,
                                 threshold_lambda=1e6, 
                                 tol=1e-8
                                 ).fit(contributers_ohe, data)
eval(reg)

print(f"precision of the noise: {reg.alpha_}")

show(reg.lambda_, "Estimated precision of the weights:")

print(f"Estimated variance-covariance matrix of the weights:\n {reg.sigma_}")

print("Scores")
print(reg.scores_)


Converged after 95 iterations
Intercept: -0.022224261144030866
0: ['3.005', '0.001', '-0.001']
1: ['3.046', '-0.005', '0.003', '1.000']
precision of the noise: 0.03359688382044469
Estimated precision of the weights:
0: ['0.111', '12154.660', '11949.879']
1: ['0.108', '3540.244', '5975.120', '0.999']
Estimated variance-covariance matrix of the weights:
 [[ 1.37593184e-03  4.14614249e-05  4.14981777e-05  8.39876613e-06
  -5.68126864e-07  3.35510296e-07  5.69530978e-06]
 [ 4.14614249e-05  7.88166541e-05  3.51516677e-06 -3.81864215e-07
   7.47735741e-09 -4.42504902e-09  9.68094102e-09]
 [ 4.14981777e-05  3.51516677e-06  8.01070693e-05  3.88330287e-07
  -7.60023142e-09  4.49777153e-09 -9.89959881e-09]
 [ 8.39876613e-06 -3.81864215e-07  3.88330287e-07  1.89178675e-03
   1.34997060e-04  8.72230795e-05  7.06079033e-04]
 [-5.68126864e-07  7.47735741e-09 -7.60023142e-09  1.34997060e-04
   2.54306920e-04  1.66594423e-05  1.34855001e-04]
 [ 3.35510296e-07 -4.42504902e-09  4.49777153e-09  8.7223079

In [72]:
D = np.zeros((n_features, n_features))
np.fill_diagonal(D, 1)

In [76]:
reg.predict(D, return_std=True)

(array([ 2.98313636, -0.02101741, -0.02347963,  3.02407272, -0.02733895,
        -0.01941585,  0.97741452]),
 array([5.45582635, 5.45570748, 5.45570759, 5.45587363, 5.45572356,
        5.45571468, 5.45587398]))

# Conclusion
Best Results are obtained by the Ridge Regression and the Bayesian Ridge Regression if all standard deviations are the same within one level.

If the standard deviation changes, the estimation gets artefacts

In [104]:
type(data)

numpy.ndarray

In [13]:
full_data = np.hstack((contributers_ohe, np.expand_dims(data, axis=1)))
clf = sklearn.svm.OneClassSVM(gamma='auto').fit(full_data)
clf.predict(full_data)

array([-1,  1, -1, ..., -1,  1, -1], dtype=int64)

array([[ 0.        ,  0.        ,  1.        , ...,  0.        ,
         1.        , -0.27646882],
       [ 1.        ,  0.        ,  0.        , ...,  0.        ,
         1.        ,  5.54752395],
       [ 0.        ,  1.        ,  0.        , ...,  0.        ,
         1.        ,  5.54658123],
       ...,
       [ 0.        ,  0.        ,  1.        , ...,  0.        ,
         1.        , -2.87074299],
       [ 1.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  2.803585  ],
       [ 1.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  6.56330766]])

# [Quantile Regressor](https://scikit-learn.org/stable/auto_examples/linear_model/plot_quantile_regression.html)

In [14]:
X = contributers_ohe
y = data
from sklearn.utils.fixes import parse_version, sp_version

# This is line is to avoid incompatibility if older SciPy version.
# You should use `solver="highs"` with recent version of SciPy.
solver = "highs" if sp_version >= parse_version("1.6.0") else "interior-point"

from sklearn.linear_model import QuantileRegressor

quantiles = [0.05, 0.5, 0.95]
predictions = {}
#out_bounds_predictions = np.zeros_like(y_true_mean, dtype=np.bool_)
for quantile in quantiles:
    qr = QuantileRegressor(quantile=quantile, alpha=0, solver=solver)
    y_pred = qr.fit(X, y).predict(X)
    predictions[quantile] = y_pred
"""
    if quantile == min(quantiles):
        out_bounds_predictions = np.logical_or(
            out_bounds_predictions, y_pred >= y_normal
        )
    elif quantile == max(quantiles):
        out_bounds_predictions = np.logical_or(
            out_bounds_predictions, y_pred <= y_normal
        )
        """

'\n    if quantile == min(quantiles):\n        out_bounds_predictions = np.logical_or(\n            out_bounds_predictions, y_pred >= y_normal\n        )\n    elif quantile == max(quantiles):\n        out_bounds_predictions = np.logical_or(\n            out_bounds_predictions, y_pred <= y_normal\n        )\n        '

In [15]:
predictions

{0.05: array([-8.59017276, -4.58415179, -7.70880034, ..., -8.59017276,
        -2.93747308, -5.25631195]),
 0.5: array([1.18534433, 3.93500599, 0.96129069, ..., 1.18534433, 6.07404894,
        2.82335316]),
 0.95: array([11.27795775, 12.58010155,  9.64836501, ..., 11.27795775,
        14.46249496, 11.34803703])}

In [16]:
cc_27b

[{0: (2, 5)},
 {0: (2, 1), 1: (-1, 1), 2: (-1, 3)},
 {0: (2, 1), 1: (-1, 1), 2: (-1, 1), 3: (0, 1)}]