In [1]:
import numpy as np
from scipy.optimize import minimize

# Sample FICO scores
fico_scores = np.array([600, 650, 700, 720, 750, 780, 800, 820, 850])

# Function to calculate mean squared error
def mean_squared_error(boundaries, fico_scores):
    # Sort boundaries
    boundaries = np.sort(boundaries)
    
    # Map FICO scores to buckets
    bucket_indices = np.digitize(fico_scores, boundaries)
    mapped_scores = np.array([np.mean(fico_scores[bucket_indices == i]) for i in range(1, len(boundaries) + 1)])
    
    # Calculate mean squared error
    mse = np.mean((mapped_scores - fico_scores) ** 2)
    return mse

# Initial guess for boundaries
initial_guess = [625, 675, 725, 775, 825]

# Minimize mean squared error
result_mse = minimize(mean_squared_error, initial_guess, args=(fico_scores,), method='Nelder-Mead')

# Extract optimized boundaries
optimized_boundaries_mse = np.sort(result_mse.x)
print("Optimized Boundaries (Mean Squared Error):", optimized_boundaries_mse)

# Function to calculate log-likelihood
def log_likelihood(boundaries, fico_scores, defaults):
    # Sort boundaries
    boundaries = np.sort(boundaries)
    
    # Map FICO scores to buckets
    bucket_indices = np.digitize(fico_scores, boundaries)
    
    # Calculate number of records and defaults in each bucket
    bucket_counts = np.array([np.sum(bucket_indices == i) for i in range(1, len(boundaries) + 1)])
    default_counts = np.array([np.sum(defaults[bucket_indices == i]) for i in range(1, len(boundaries) + 1)])
    
    # Calculate probabilities of default in each bucket
    probs_default = default_counts / bucket_counts
    
    # Calculate log-likelihood
    log_likelihood = np.sum(default_counts * np.log(probs_default) + (bucket_counts - default_counts) * np.log(1 - probs_default))
    return -log_likelihood  # Minimize negative log-likelihood

# Sample default data (for demonstration)
defaults = np.array([0, 0, 1, 0, 1, 0, 1, 1, 1])  # Example: 1 indicates default, 0 indicates no default

# Minimize log-likelihood
result_ll = minimize(log_likelihood, initial_guess, args=(fico_scores, defaults), method='Nelder-Mead')

# Extract optimized boundaries
optimized_boundaries_ll = np.sort(result_ll.x)
print("Optimized Boundaries (Log-Likelihood):", optimized_boundaries_ll)




ValueError: operands could not be broadcast together with shapes (5,) (9,) 