In [1]:
import numpy as np
from sklearn.linear_model import Ridge, Lasso
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd
import matplotlib.pyplot as plt
import pandas as pd

## Problem 2

## Define functions to generate a specific data set

In [14]:
def generate_signal(n=100,p=10,rho = 0,R2 = 0.8,use_sparse_beta=True):

    #define beta
    if use_sparse_beta:
        Beta = compute_sparse_beta(n,p)
    else:
        Beta = compute_dense_beta(n,p)

    #compute covariance matrix
    cov = compute_covariance(p,rho)

    #compute the X values
    X = np.random.multivariate_normal(np.zeros(p),cov,n)

    #compute the error values
    sigma_2_E = compute_sigma2_e(Beta,cov,0.8)
    Error = np.random.normal(0,np.sqrt(sigma_2_E),(n,1))

    #compute the Y values
    y = X @ Beta + Error

    return X,y

def compute_sparse_beta(n = 100,p = 10):
    """Compute Beta* for the sparse signal

    Args:
        n (int, optional): number of samples. Defaults to 100.
        p (int, optional): dimmension (number of parameters). Defaults to 10.

    Returns:
        _type_: Beta* (np.array with size nxp)
    """
    #define helper variables
    sqrt_n = np.sqrt(n)
    sqrt_p = np.sqrt(p)
    max_j = np.floor(sqrt_p).astype(int)

    #compute beta
    Beta = np.zeros((p,1))
    Beta[0:max_j] = 2/np.sqrt(n)

    return Beta

def compute_dense_beta(n= 100,p = 10):
    """Compute Beta* for the dense signal

    Args:
        n (int, optional): number of samples. Defaults to 100.
        p (int, optional): dimmension (number of parameters). Defaults to 10.

    Returns:
        _type_: Beta* (np.array with size nxp)
    """
    sqrt_n = np.sqrt(n)

    Beta = np.zeros((p,1))
    for j in range(0,p):
        Beta[j] = 5/((j+1)*sqrt_n)
    
    return Beta

def compute_covariance(p = 10, rho = 0):
    """Compute the covariance matrix

    Args:
        p (int, optional): dimmension (number of parameters). Defaults to 10.
        rho (int, optional): correlation (must be between 0 and 1). Defaults to 0.

    Returns:
        _type_: Sigma (np.array covariance matrix of size pxp)
    """
    
    #define the covariance matrix
    Sigma = np.full((p,p),rho)
    powers = np.zeros((p,p))

    cols = np.arange(1,p+1,1)

    for row in range(p):
        powers[row,:] = row + 1 - cols
    
    powers = np.abs(powers)
    Sigma = np.power(Sigma,powers)

    return Sigma

def compute_sigma2_e(Beta,Cov,R2=0.8):
    sigma2_E = (np.transpose(Beta) @ Cov @ Beta) * (1 - R2)/R2

    return sigma2_E[0,0]

X,y = generate_signal(n=100,p=10,rho=0.25,R2=0.8)