In [None]:
# CS670 - AI
# NJIT at Jersey City
# Summer 2023 
# Assignment #2 - Gaussian Maximum Likelihood

In [None]:
# Question #1 - MLE of a Gaussian

In [None]:
# To estimate the parameters of the Gaussian p_model using gradient-based optimization,
# we can use the Maximum Likelihood Estimation (MLE) method. MLE is a common approach for 
# estimating the parameters of a probability distribution based on observed data.
# In the case of a Gaussian distribution, we need to estimate the mean (μ) and standard deviation (σ).

# Below is the Pyhon implementation that estimates the optimal parameters for the Gaussian p_model
# using gradient-based optimization:

 

In [1]:
import numpy as np

In [2]:

# Function to calculate the negative log-likelihood
def neg_log_likelihood(params, data):
    mean, std = params
    log_likelihood = -np.sum(np.log(np.exp(-(data - mean) ** 2 / (2 * std ** 2)) / (np.sqrt(2 * np.pi) * std)))
    return log_likelihood

In [3]:
# Function to calculate the gradient of the negative log-likelihood
def gradient(params, data):
    mean, std = params
    d_mean = np.sum((data - mean) / std ** 2)
    d_std = np.sum(((data - mean) ** 2 - std ** 2) / std ** 3)
    return np.array([-d_mean, -d_std])

In [4]:
# Gradient-based optimization using the L-BFGS-B method
def optimize_params(data):
    # Initial guess for the parameters
    initial_params = [np.mean(data), np.std(data)]

    # Minimize the negative log-likelihood using L-BFGS-B
    from scipy.optimize import minimize
    result = minimize(neg_log_likelihood, initial_params, args=(data,), jac=gradient, method='L-BFGS-B')

    # Extract the optimal parameters
    optimal_params = result.x

    return optimal_params

In [5]:
# Given data
data = np.array([4, 5, 7, 8, 8, 9, 10, 5, 2, 3, 5, 4, 8, 9])

In [6]:
# Optimize the parameters
optimal_params = optimize_params(data)


In [7]:

# Print the optimal parameters
print("Optimal mean:", optimal_params[0])
print("Optimal standard deviation:", optimal_params[1])

Optimal mean: 6.214285714285714
Optimal standard deviation: 2.425418120907092


In [None]:
# Let's go through the code step by step:

# We define the neg_log_likelihood function to calculate the negative log-likelihood of the Gaussian distribution given the data. This function takes the parameters (mean and standard deviation) and the data as input and returns the negative log-likelihood.
# We define the gradient function to calculate the gradient of the negative log-likelihood with respect to the parameters. This function takes the parameters and the data as input and returns the gradient.
# The optimize_params function performs the gradient-based optimization using the L-BFGS-B method. It takes the data as input and returns the optimal parameters.
# We define the given data as a NumPy array.
# We call the optimize_params function to estimate the optimal parameters.
# Finally, we print the optimal mean and standard deviation.

In [None]:
# The code uses the minimize function from the scipy.optimize module, specifically using the L-BFGS-B method,
# which is a gradient-based optimization algorithm that supports bounds on the variables. The minimize function 
# minimizes the negative log-likelihood function by iteratively updating the parameters until convergence. The
# negative log-likelihood and its gradient are provided as input to the minimize function 
# using the args and jac arguments, respectively. The resulting optimal parameters are extracted from the 
# optimization result and printed.

In [None]:
# Note: The code assumes that you have the necessary dependencies installed, including NumPy and SciPy.

In [None]:
# Explanation:
# Step 1: The neg_log_likelihood function The neg_log_likelihood function calculates the negative 
# log-likelihood of the Gaussian distribution given the data. The negative log-likelihood is a measure of 
# how well the Gaussian distribution with specific parameters fits the observed data. The function 
# takes two arguments: params (which represents the mean and standard deviation of the Gaussian distribution) and 
# data (the observed data points). It uses the formula for the Gaussian probability density function (PDF) to calculate
# the log-likelihood for each data point and then sums them up. Finally, it returns the negative log-likelihood.

# Step 2: The gradient function The gradient function calculates the gradient of the negative log-likelihood with
# respect to the parameters (mean and standard deviation). The gradient provides information on the direction 
# and magnitude of the steepest increase in the negative log-likelihood. The function takes the same arguments
# as neg_log_likelihood: params and data. It computes the partial derivatives of the negative log-likelihood function
# with respect to the mean and standard deviation using the chain rule. The resulting gradients are
# returned as a NumPy array.

 

In [None]:
# Step 3: The optimize_params function The optimize_params function performs the gradient-based optimization 
# using the L-BFGS-B method. It takes the observed data (data) as input and returns
# the optimal parameters (mean and standard deviation) of the Gaussian distribution. Here's how the function works:

# First, we initialize the initial guess for the parameters using the mean and standard deviation of the data.

# We use the minimize function from the scipy.optimize module to minimize the negative log-likelihood function.
# It takes several arguments:

# neg_log_likelihood: The objective function to minimize.
# initial_params: The initial guess for the parameters.
# args=(data,): Additional arguments to be passed to the objective function (in this case, the data).
# jac=gradient: The function that calculates the gradient of the objective function.
# method='L-BFGS-B': The optimization method to use (L-BFGS-B in this case).

# The minimize function performs the optimization by iteratively updating the parameters based on the negative log-likelihood and its gradient until convergence.

# Finally, we extract the optimal parameters from the optimization result using result.x and return them.

# Step 4: Given data The given data is represented as a NumPy array. In this case, we have an array 
# data containing the observed data points.

In [None]:
# Step 5: Estimating the optimal parameters We call the optimize_params function with 
# the given data (data) as input. It performs the optimization and returns the
# optimal parameters (mean and standard deviation) for the Gaussian distribution. 

# Step 6: Printing the optimal parameters Finally, we print the optimal mean and standard deviation that 
# were obtained from the optimization process.

# The code leverages the minimize function from the scipy.optimize module, utilizing
# the L-BFGS-B optimization algorithm. L-BFGS-B is a gradient-based optimization method that supports bounds 
# on the variables, making it suitable for estimating the parameters of the Gaussian distribution. The negative 
# log-likelihood function and its gradient are provided to the minimize function as arguments, and it performs
# the optimization to find the optimal parameters that maximize the likelihood of the observed data under 
# the Gaussian distribution.

In [None]:
# Question #2 - MLE of a Conditional Gaussian

In [None]:
# Explanation:
# To estimate the parameters of a conditional Gaussian model, we can use Maximum Likelihood Estimation (MLE). MLE aims
# to find the parameters that maximize the likelihood of the observed data.

In [8]:
import numpy as np

In [9]:
# Define the dataset
x = np.array([8, 16, 22, 33, 50, 51])
y = np.array([5, 20, 14, 32, 42, 58])

In [10]:

# Define the conditional Gaussian model: p_model(y|x, w) = N(w*x, sigma^2)
# We assume a linear relationship between x and y with parameters w and constant variance sigma^2

In [11]:

# Initialize the parameters
w_initial = 0.0  # Initial value for the parameter w
sigma_initial = 1.0  # Initial value for the standard deviation sigma

In [12]:

# Define the negative log-likelihood function
def negative_log_likelihood(w, sigma):
    # Compute the predicted values
    y_pred = w * x
    
    # Compute the negative log-likelihood
    nll = -np.sum(np.log(1 / (np.sqrt(2 * np.pi) * sigma)) - 0.5 * ((y - y_pred) / sigma)**2)
    
    return nll

In [13]:
# Define the gradient of the negative log-likelihood function
def gradient_negative_log_likelihood(w, sigma):
    # Compute the predicted values
    y_pred = w * x
    
    # Compute the gradients with respect to w and sigma
    grad_w = np.sum((y_pred - y) * x / sigma**2)
    grad_sigma = np.sum(((y_pred - y) / sigma)**2 - 1)
    
    return grad_w, -grad_sigma  # Fix the sign of the gradient for sigma

In [14]:

# Perform gradient-based optimization using stochastic gradient descent (SGD)

In [15]:
# Set the learning rate and number of iterations
learning_rate = 0.001
num_iterations = 1000

In [16]:
# Initialize the parameters
w = w_initial
sigma = sigma_initial

In [17]:
# Perform gradient descent
for iteration in range(num_iterations):
    # Compute the gradients
    grad_w, grad_sigma = gradient_negative_log_likelihood(w, sigma)
    
    # Update the parameters
    w -= learning_rate * grad_w
    sigma -= learning_rate * grad_sigma

In [18]:

# Print the estimated parameters
print("Estimated parameter w:", w)
print("Estimated parameter sigma:", sigma)

Estimated parameter w: 0.9696900477450642
Estimated parameter sigma: 19.478651962283752


In [None]:
# In this code, we initialize the parameters w and sigma with some initial values. Then, we define the 
# negative log-likelihood function and its gradient. We use stochastic gradient descent (SGD) to perform 
# gradient-based optimization and update the parameters iteratively. Finally, we print the 
# estimated values of w and sigma.

# Note: This code assumes that the relationship between x and y is linear
# with constant variance. If you have a different model in mind, you can modify the code accordingly.