# [Multilevel Modeling](https://en.wikipedia.org/wiki/Multilevel_model)
Idea: Decompose the sum of random numbers into its contributions

for a given set of $x_i$ and $a_{ik}$ with
$$x_i = \Sigma_{j=0}^n \Sigma_{k=1}^{m_j} a_{ik}y_{jk}$$ 
with
* $m_j$ being the number of contributers of layer j
* $y_{jk}$ being the k-th contribution of layer j
* $m_0 = 1$ by default
* $a_{ik} \in \{0, 1\}$
* $\Sigma a_{ik} = 1$ only one contributer per layer

calculate the Distributions $y_{k} \sim N(μ_{k}, σ_{k})$
Boundary conditions:
* $\Sigma _{k} μ_k = 0$ via $μ_{m_j} = -\Sigma_{k = 1}^{m_j-1} μ_{k}$


Goal of this workbook to explore [Linear Models](https://scikit-learn.org/stable/modules/linear_model.html) see also [(Examples)](https://scikit-learn.org/stable/auto_examples/linear_model/index.html)


In [73]:
import numpy as np
from sklearn import linear_model
from sklearn.preprocessing import OneHotEncoder

In [74]:
def align_coefficients(coefs):
    """ensure that the average over all contributers per level is 0"""
    mem = []
    for lcoefs in coefs:
        avg = np.mean([m for (m, s) in lcoefs.values()])
        mem.append({k: (m-avg, s) for k, (m,s) in lcoefs.items()})
    
    return mem

def gen_contributer_coefficients(n_contributers, contributer_avg, contributer_sig):
    """creates for n_contributers the average and standard deviation
    Input:
    * n_contributers: list with number of influencers for each level
    * contributer_avg: average for the average of the influencers
    * contributer_sig: average for the stdev for the influencers (lognormal distribution) 
     
    Output:
    Dictionary {level : { influencer: (mu, sigma)}} with the coefficients for every
    influencer in each level
    """
    assert n_contributers[0] == 1, "First level is allowed to have one contributer"
    return align_coefficients([{i : (np.random.normal(contributer_avg), np.random.lognormal(contributer_sig)) for i in range(num)} for lvl, num in enumerate(n_contributers)])

def print_coefficients(contributer_coefficients):
    contributer_coefficients = align_coefficients(contributer_coefficients)
    for m, stage in enumerate(contributer_coefficients):
        for i, (mu, sig) in stage.items():
            print(f"Stage {m}: Contributer {i} mu={mu:.2f}, sig={sig:.2f}")

def gen_data(contributer_coefficients, n_samples):
    """generate random data.
    The first level defines the baseline that holds for all random numbers
    
    Inputs:
    * n_samples: number of samples
    * contributer_coefficients: dictionary with (mean,sig) per level per contributer
    
    Outputs:
    * data: array with the final number
    * contributers: matrix defining the contributers, first column is for the first level"""
    
    assert len(contributer_coefficients[0]) == 1, "Level 0 defines the baseline. It should have exactly one contributer"
    #data = np.random.normal(gen_avg, gen_sig, n_samples)
    data = np.zeros((n_samples, ))
    contributers = np.zeros((n_samples, len(contributer_coefficients)))
    for lvl, cdict in enumerate(contributer_coefficients):
        #print(f"creating level {lvl}")
        lvl_influencers = len(cdict) #number of influencers in this level
        lvldata = np.zeros((n_samples, lvl_influencers))

        for i, (mu,sig) in cdict.items():
            lvldata[:,i] = np.random.normal(mu, sig, n_samples)

        selection = np.random.randint(low=0,
                                    high=lvl_influencers,
                                    size=(n_samples))
        contributers[:, lvl] = selection
        
        data += np.array([lvldata[row, col] for row, col in enumerate(selection)])
        # Note: The first level 
    return data, contributers[:, 1:].astype(int)

def generate(n_samples, contributer_coefficients):
    if contributer_coefficients == 2:
        n_contributers= len(contributer_coefficients[1])
        print("ATTENTION: just for one level")
    else:
        n_contributers = [len(level) for level in contributer_coefficients][1:]

    print("generating Data for")
    print_coefficients(contributer_coefficients)

    data, contributers = gen_data(n_samples=n_samples,
                              contributer_coefficients=contributer_coefficients)

    #contributers = contributers.squeeze(-1) # kill the first level - not needed
    return data, contributers, n_contributers

In [75]:
n_samples = 10000

cc_27 = [
    {    0 : (2, 5)},
    {    0: (2, 1),
        1: (-1, 1),
        2: (-1, 1)},
    {    0: (3, 1),
        1: (-1, 1),
        2: (-1, 1),
        3: (-1, 1)}
        ]

cc_27b = [
    {    0 : (2, 5)},
    {    0: (2, 1),
        1: (-1, 1),
        2: (-1, 3)},
    {    0: (2, 1),
        1: (-1, 1),
        2: (-1, 1),
        3: (0, 1)}
        ]

data, contributers, n_contributers = generate(n_samples, contributer_coefficients=cc_27b)

enc = OneHotEncoder(handle_unknown='ignore')
enc.fit(contributers)
contributers_ohe = enc.transform(contributers).toarray()

generating Data for
Stage 0: Contributer 0 mu=0.00, sig=5.00
Stage 1: Contributer 0 mu=2.00, sig=1.00
Stage 1: Contributer 1 mu=-1.00, sig=1.00
Stage 1: Contributer 2 mu=-1.00, sig=3.00
Stage 2: Contributer 0 mu=2.00, sig=1.00
Stage 2: Contributer 1 mu=-1.00, sig=1.00
Stage 2: Contributer 2 mu=-1.00, sig=1.00
Stage 2: Contributer 3 mu=0.00, sig=1.00


In [76]:
def eval(regressor, n_contributers=n_contributers):
    """print intercept and coefficients"""
    print(f"Intercept: {regressor.intercept_}")
    l = list(regressor.coef_)
    p = lambda i: [l.pop(0) for _ in range(i)]
    for i,n in enumerate(n_contributers):
        print(f"{i}: {p(n)}")
    

# [Ridge Regression](https://scikit-learn.org/stable/modules/linear_model.html#regression)

In [77]:
reg = linear_model.Ridge(alpha=1, fit_intercept=False)
reg.fit(contributers_ohe, data)
eval(reg)

Intercept: 0.0
0: [3.097302050817801, 0.04766634459597961, 0.26073853568382976]
1: [2.954364252644228, -0.07923289135828614, -0.3578894019992378, 0.8884649718152803]


## Conclusion
Magnitude of the estimated coefficients sometimes fits, the sign does not
# [Lasso](https://scikit-learn.org/stable/modules/linear_model.html#lasso)
--> [LassoLarsCV](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LassoLarsCV.html#sklearn.linear_model.LassoLarsCV)

In [78]:
reg = linear_model.LassoLarsCV(cv=5).fit(contributers_ohe, data)
reg.score(contributers_ohe, data)
eval(reg)

Intercept: 0.45856503635063905
0: [2.837431104774484, -0.21314387213902017, 0.0]
1: [2.757450471040065, -0.2773614829134523, -0.5561259387303802, 0.6907534127347048]


# [Elastic-Net](https://scikit-learn.org/stable/modules/linear_model.html#elastic-net)

In [79]:
reg = linear_model.ElasticNetCV(cv=5).fit(contributers_ohe, data)
eval(reg)

Intercept: 1.631752576828788
0: [2.304240152859488, -0.7363586094433596, -0.5237140689177535]
1: [2.103001090198751, -0.9188301162845305, -1.1969318466149743, 0.041339111601008345]


# [LARS-Lasso](https://scikit-learn.org/stable/modules/linear_model.html#lars-lasso)
--> finds biggest contributers

In [80]:
reg = linear_model.LassoLars(alpha=0.1).fit(contributers_ohe, data)
eval(reg)

Intercept: 0.6310597851849238
0: [2.4769130156925088, 0.0, 0.0]
1: [2.2337632301961667, 0.0, -0.28022853369218503, 0.14918580267530482]


# [Orthogonal Matching Pursuit](https://scikit-learn.org/stable/modules/linear_model.html#orthogonal-matching-pursuit-omp)

In [81]:
reg = linear_model.OrthogonalMatchingPursuit().fit(contributers_ohe, data)
eval(reg)

Intercept: 1.01284633963874
0: [2.9225102342995055, 0.0, 0.0]
1: [0.0, 0.0, 0.0, 0.0]


# [Bayesian Ridge Regression](https://scikit-learn.org/stable/modules/linear_model.html#bayesian-ridge-regression)

In [82]:
reg = linear_model.BayesianRidge().fit(contributers_ohe, data)
eval(reg)

print(f"precision of the noise:{reg.alpha_}")
print(f"Estimated precision of the weights: {reg.lambda_}")
print(f"stimated variance-covariance matrix of the weights: {reg.sigma_}")
print(f"")
print(f"")

Intercept: 1.9869654053273471
0: [1.9557167629006482, -1.0840400308455844, -0.8716767320490477]
1: [2.0940611945347807, -0.9266376291178181, -1.2041547983416072, 0.03673123292333058]
precision of the noise:0.03428529358321994
Estimated precision of the weights: 0.3997707714213595
stimated variance-covariance matrix of the weights: [[ 8.39661586e-01  8.30904004e-01  8.30867911e-01  7.55590649e-06
  -8.60946973e-05 -5.69608620e-05  1.35499653e-04]
 [ 8.30904004e-01  8.39590610e-01  8.30938886e-01 -2.04927474e-05
   5.85937463e-05  7.32765492e-05 -1.11377548e-04]
 [ 8.30867911e-01  8.30938886e-01  8.39626703e-01  1.29368409e-05
   2.75009510e-05 -1.63156872e-05 -2.41221047e-05]
 [ 7.55590649e-06 -2.04927474e-05  1.29368409e-05  6.34026951e-01
   6.22489834e-01  6.22494332e-01  6.22422383e-01]
 [-8.60946973e-05  5.85937463e-05  2.75009510e-05  6.22489834e-01
   6.34041908e-01  6.22488728e-01  6.22413030e-01]
 [-5.69608620e-05  7.32765492e-05 -1.63156872e-05  6.22494332e-01
   6.22488728e-0

# [Auomatic Relevance Determination](https://scikit-learn.org/stable/modules/linear_model.html#automatic-relevance-determination-ard)

In [83]:
reg = linear_model.ARDRegression().fit(contributers_ohe, data)
eval(reg)

Intercept: 1.0854079282631273
0: [2.87493700896671, -0.13194958858388103, 0.0034860958933150667]
1: [2.084006595408319, -0.9328661746751291, -1.2140800762437054, 0.00622032983995785]
