In [5]:
import numpy as np

# Set seed for reproducibility
np.random.seed(0)

# Number of samples
num_samples = 2400

# Generate X[:, 0] in the range [1, 45]
X_0 = np.random.uniform(1, 45, num_samples)

# Generate X[:, 1] in the range [-5, 5]
X_1 = np.random.uniform(-5, 5, num_samples)

# Generate X[:, 2] with binary values [0, 1]
X_2 = np.random.randint(0, 2, num_samples)

# Combine the features to create X with shape (2400, 3)
X = np.column_stack((X_0, X_1, X_2))

# Generate y with random values from [0, 1, 2]
y = np.random.choice([0, 1, 2], num_samples).reshape(num_samples, 1)

# Print shapes to verify
print("Shape of X:", X.shape)
print("Shape of y:", y.shape)
print("First 10 rows of X:", X[:10])
print("First 10 elements of y:", y[:10].flatten())


Shape of X: (2400, 3)
Shape of y: (2400, 1)
First 10 rows of X: [[25.14779417  3.86683221  0.        ]
 [32.46833212  3.30908798  0.        ]
 [27.52158855 -4.68394557  0.        ]
 [24.97486005  0.68419043  1.        ]
 [19.64081117  1.09161264  0.        ]
 [29.41934097  4.61575144  1.        ]
 [20.2538373  -4.76763373  0.        ]
 [40.23801203  0.31103771  1.        ]
 [43.40116146 -2.95218638  0.        ]
 [17.87142683 -4.46337094  1.        ]]
First 10 elements of y: [0 0 1 1 2 0 2 2 2 0]


In [7]:
import numpy as np

# Suppose we have the following probabilities for 5 samples and 3 classes
probabilities = np.array([
    [0.1, 0.3, 0.6],
    [0.2, 0.5, 0.3],
    [0.7, 0.1, 0.2],
    [0.3, 0.4, 0.3],
    [0.5, 0.2, 0.3]
])

# True class labels for these samples
y = np.array([2, 0, 1, 2, 1])

# Number of samples
num_samples = probabilities.shape[0]

# Select the probabilities corresponding to the true class labels
selected_probabilities = probabilities[np.arange(num_samples), y]

# Compute the log of these probabilities
log_probabilities = np.log(selected_probabilities)

# Sum up the log probabilities
log_loss = np.sum(log_probabilities)

print(f"Log loss: {log_loss}")


Log loss: -7.236259345954173


In [None]:
import numpy as np
from scipy.optimize import minimize

def softmax(beta, X):
    """
    Compute softmax probabilities for each alternative.
    
    Parameters:
    beta : array_like
        Coefficient vector to be estimated.
    X : array_like
        Matrix of explanatory variables (including intercept).
        
    Returns:
    probabilities : array_like
        Softmax probabilities for each alternative.
    """
    # Reshape beta to ensure it's a 1D array
    beta = np.reshape(beta, (X.shape[1],))
    
    exp_utilities = np.exp(np.dot(X, beta))
    sum_exp_utilities = np.sum(exp_utilities, axis=1, keepdims=True)
    probabilities = exp_utilities / sum_exp_utilities
    return probabilities

def log_likelihood(beta, X, y):
    """
    Compute the log-likelihood function for multinomial logit model.
    
    Parameters:
    beta : array_like
        Coefficient vector to be estimated.
    X : array_like
        Matrix of explanatory variables (including intercept).
    y : array_like
        Vector of choices made by individuals.
        
    Returns:
    log_likelihood_value : float
        Log-likelihood value.
    """
    probabilities = softmax(beta, X)
    log_likelihood_value = np.sum(np.log(probabilities[np.arange(len(X)), y]))
    return log_likelihood_value

# Example data (predefined X and y)
# X should have dimensions (N, K), where N is the number of observations and K is the number of explanatory variables (including intercept)
# y should be a vector of length N containing integers representing the chosen alternative (starting from 0)


# Initial guess for beta (can be zeros or any initial guess)
initial_beta = np.zeros(X.shape[1])

# Define the negative log-likelihood function (to be minimized)
negative_log_likelihood = lambda beta: -log_likelihood(beta, X, y)

# Minimize the negative log-likelihood function to find the estimated beta values
result = minimize(negative_log_likelihood, initial_beta, method='BFGS')

# Estimated beta values
estimated_beta = result.x

# Maximum log-likelihood value (negative of the minimized value)
max_log_likelihood_value = -result.fun

print("Estimated beta values:", estimated_beta)
print("Maximum log-likelihood value:", max_log_likelihood_value)
