In [None]:
class LinearRegressionMAP:
    def __init__(self, lambda_reg=1.0, prior='gaussian'):
        self.lambda_reg = lambda_reg
        self.prior = prior
        self.w = None
        self.b = None
        
    def negative_log_posterior(self, X, y):
        # Likelihood term (assuming Gaussian noise)
        y_pred = X.dot(self.w) + self.b
        likelihood_term = np.sum((y - y_pred) ** 2)
        
        # Prior term
        if self.prior == 'gaussian':
            # Gaussian prior leads to L2 regularization
            prior_term = self.lambda_reg * np.sum(self.w ** 2)
        elif self.prior == 'laplace':
            # Laplace prior leads to L1 regularization
            prior_term = self.lambda_reg * np.sum(np.abs(self.w))
            
        return likelihood_term + prior_term

In [1]:
def choose_prior_for_problem(n_features, n_samples, domain_knowledge):
    """
    Helper function to choose appropriate prior based on problem characteristics
    """
    if n_features > n_samples:
        # If we have more features than samples, sparsity might help
        return 'laplace'  # L1 regularization
    elif domain_knowledge.get('parameters_should_be_small', False):
        return 'gaussian'  # L2 regularization
    elif domain_knowledge.get('parameters_should_be_sparse', False):
        return 'laplace'  # L1 regularization
    else:
        # Default to Gaussian prior if no strong preference
        return 'gaussian'

In [2]:
def set_regularization_strength(train_size, noise_level, confidence_in_prior):
    """
    Helper to set regularization strength based on problem characteristics
    """
    base_lambda = 1.0
    
    # Adjust based on training set size
    # Less data = rely more on prior
    size_factor = np.exp(-train_size / 1000)
    
    # Adjust based on noise level
    # More noise = rely more on prior
    noise_factor = np.exp(noise_level)
    
    # Adjust based on confidence in prior
    prior_factor = confidence_in_prior
    
    return base_lambda * size_factor * noise_factor * prior_factor

In [3]:
def compare_map_vs_mle(X, y, n_samples_list):
    """
    Compare MAP and MLE performance for different dataset sizes
    """
    results = []
    for n in n_samples_list:
        # Train MAP and MLE models
        X_subset = X[:n]
        y_subset = y[:n]
        
        mle_model = LinearRegression().fit(X_subset, y_subset)
        map_model = Ridge(alpha=1.0).fit(X_subset, y_subset)
        
        # Compare performance
        results.append({
            'n_samples': n,
            'mle_error': compute_error(mle_model, X_test, y_test),
            'map_error': compute_error(map_model, X_test, y_test)
        })
    return results