In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats as stats

import numpy as np
from scipy import sparse
from scipy.sparse.linalg import spsolve

In [2]:
def save_matrix(mat, path, ages=range(35,65+1), years=range(2015,2022+1)):
    df = pd.DataFrame(mat, columns=[f"Age {i}" for i in ages], index=[f'Year {i}' for i in years])
    df.to_csv(path, sep=',', index=True, encoding='utf-8')

def load_matrix(filepath, ages=range(35,65+1), years=range(2015,2022+1)):
    df = pd.read_csv(filepath, index_col=0)
    df.columns = [int(col) if isinstance(col, int) or col.isdigit() else int(col[4:]) for col in df.columns]
    df.index = [int(idx) if isinstance(idx, int) or idx.isdigit() else int(idx[5:]) for idx in df.index]
    # Find common years and ages
    common_years = df.index.intersection(years)
    common_ages = df.columns.intersection(ages)
    # Filter dataframes to only include common years and ages
    mat = df.loc[common_years, common_ages].to_numpy()
    return mat

def load_matrix2(filepath, ages=range(35,65+1), years=range(2015,2022+1), transpose=False):
    df = pd.read_csv(filepath, index_col=0)
    if transpose:
        df = df.T
    df.columns = [int(col) if isinstance(col, int) or col.isdigit() else int(col[4:]) for col in df.columns]
    df.index = [int(idx) if isinstance(idx, int) or idx.isdigit() else int(idx[5:]) for idx in df.index]
    # Find common years and ages
    common_years = df.index.intersection(years)
    common_ages = df.columns.intersection(ages)
    # Filter dataframes to only include common years and ages
    mat = df.loc[common_years, common_ages].to_numpy()
    return mat, common_ages, common_years

def plot_matrix(X, Y, Z, title="Taux de mortalité", save=None):
    X, Y = np.meshgrid(X, Y)
    
    # Create a 3D plot
    fig = plt.figure(figsize=(10, 7))
    ax = fig.add_subplot(111, projection='3d')
    
    # Plot the surface
    surface = ax.plot_surface(X, Y, Z, cmap='plasma', edgecolor='none')
    
    # Add a color bar which maps values to colors
    fig.colorbar(surface, shrink=0.5, aspect=10, pad=0.1)
    
    # Set titles and labels
    ax.set_title(title)
    ax.set_xlabel('Age', labelpad=15)
    ax.set_ylabel('Année', labelpad=15)
    ax.set_zlabel('Taux', labelpad=15)

    ax.tick_params(axis='z', labelsize=8, pad=5)
    
    ax.view_init(elev=40, azim=120) 

    if save is not None:
        plt.savefig(save, bbox_inches='tight')
    
    # Show the plot
    plt.show()

In [3]:
def whittaker_henderson_2d_smoothing(Z, lambda_x, lambda_y):
    """
    Apply 2D Whittaker-Henderson smoothing to a matrix.
    
    Parameters:
    Z (2D array): The matrix to be smoothed.
    lambda_x (float): Smoothing parameter for the rows.
    lambda_y (float): Smoothing parameter for the columns.
    
    Returns:
    2D array: Smoothed matrix.
    """
    m, n = Z.shape  # m: number of rows (ages), n: number of columns (years)
    
    # Identity matrices
    I_m = sparse.eye(m)
    I_n = sparse.eye(n)
    
    # Difference matrices
    D_m = sparse.diags([1, -2, 1], [0, 1, 2], shape=(m-2, m))
    D_n = sparse.diags([1, -2, 1], [0, 1, 2], shape=(n-2, n))
    
    # Penalty matrices
    P_m = lambda_x * D_m.T @ D_m
    P_n = lambda_y * D_n.T @ D_n
    
    # Kronecker products
    P = sparse.kron(P_m, I_n) + sparse.kron(I_m, P_n)
    
    # Flatten Z for solving
    Z_flat = Z.flatten()
    
    # Identity matrix for the entire system
    I_total = sparse.eye(m * n)
    
    # Solve the system (I + P) * smooth_Z_flat = Z_flat
    smooth_Z_flat = spsolve(I_total + P, Z_flat)
    
    # Reshape back to original matrix shape
    smooth_Z = smooth_Z_flat.reshape((m, n))
    
    return smooth_Z

def whittaker_henderson_2d_smoothing_with_weights(Z, lambda_x, lambda_y, weights=None):
    """
    Apply 2D Whittaker-Henderson smoothing to a matrix with optional weights.
    
    Parameters:
    Z (2D array): The matrix to be smoothed.
    lambda_x (float): Smoothing parameter for the rows.
    lambda_y (float): Smoothing parameter for the columns.
    weights (2D array or None): Weights matrix, same shape as Z. Default is None (equal weighting).
    
    Returns:
    2D array: Smoothed matrix.
    """
    m, n = Z.shape  # m: number of rows (ages), n: number of columns (years)

    # Identity matrices
    I_m = sparse.eye(m)
    I_n = sparse.eye(n)

    # Difference matrices
    D_m = sparse.diags([1, -2, 1], [0, 1, 2], shape=(m-2, m))
    D_n = sparse.diags([1, -2, 1], [0, 1, 2], shape=(n-2, n))

    # Penalty matrices
    P_m = lambda_x * D_m.T @ D_m
    P_n = lambda_y * D_n.T @ D_n

    # Kronecker products for the penalty matrix
    P = sparse.kron(P_m, I_n) + sparse.kron(I_m, P_n)

    # Flatten Z for solving
    Z_flat = Z.flatten()

    # Check if weights are provided
    if weights is None:
        weights = np.ones_like(Z)  # Equal weighting if none provided

    # Create weight matrix W as a diagonal sparse matrix
    W = sparse.diags(weights.flatten())

    # Identity matrix for the entire system
    I_total = sparse.eye(m * n)

    # Solve the system (W + P) * smooth_Z_flat = W * Z_flat
    smooth_Z_flat = spsolve(W + P, W @ Z_flat)

    # Reshape back to original matrix shape
    smooth_Z = smooth_Z_flat.reshape((m, n))

    return smooth_Z


def are_all_columns_increasing(matrix):
    """
    Check if all columns of a matrix are strictly increasing using a vectorized approach.
    
    Parameters:
    matrix (2D array-like): The matrix to check.
    
    Returns:
    bool: True if all columns are strictly increasing, False otherwise.
    """
    # Convert input to a NumPy array if it isn't already
    matrix = np.array(matrix)
    
    # Calculate the difference between consecutive elements along the columns
    diff = np.diff(matrix, axis=1)
    
    # Check if all differences are positive (indicating strictly increasing)
    return np.all(diff > 0)

def dichotomy_minimize(f, a, b, tol=1e-5, max_iter=100):
    """
    Find the minimum of a unimodal function using the dichotomy method.

    Parameters:
    f (function): The unimodal function to minimize.
    a (float): The start of the interval.
    b (float): The end of the interval.
    tol (float): The tolerance for convergence (default is 1e-5).
    max_iter (int): The maximum number of iterations (default is 100).

    Returns:
    float: The point where the function is minimized.
    """
    # Ensure that the interval is valid
    if a >= b:
        raise ValueError("The start of the interval must be less than the end.")

    # Initialize the interval endpoints
    x1 = a + (b - a) / 4
    x2 = b - (b - a) / 4
    
    for _ in range(max_iter):
        # Evaluate the function at x1 and x2
        f1 = f(x1)
        f2 = f(x2)
        
        # Check the condition to stop
        if abs(b - a) < tol:
            return (a + b) / 2

        # Update the interval based on function values
        if f1 < f2:
            b = x2
        else:
            a = x1
        
        # Update x1 and x2
        x1 = a + (b - a) / 4
        x2 = b - (b - a) / 4

    # Return the midpoint of the final interval
    return (a + b) / 2

def whittaker_henderson_2d_smoothing_with_weights_and_optilambda(Z, weights=None):
    """
    Apply 2D Whittaker-Henderson smoothing to a matrix with optional weights.
    
    Parameters:
    Z (2D array): The matrix to be smoothed.
    weights (2D array or None): Weights matrix, same shape as Z. Default is None (equal weighting).
    
    Returns:
    2D array: Smoothed matrix.
    """
    lambda_value = 0

    function_to_optimize = lambda l: whittaker_henderson_2d_smoothing_with_weights()
    pass


In [None]:
ages = np.arange(35, 65+1)
years = np.arange(2015, 2022+1)

names = ["IRL_F", "IRL_M", "UK_F", "UK_M"]
lambda_x, lambda_y = 2, 2
alpha = 0.05
for name in names:
    QHmd, QHmd_ages, QHmd_years = load_matrix2(f"./HMD_inputs/matrices/HMD_{name}.csv", ages=ages, years=years, transpose=True)
    mat = load_matrix(f"./matrices/BRUT_{name}.csv", ages=ages, years=years)
    expo = load_matrix(f"./matrices/BRUT_EXPO_{name}.csv", ages=ages, years=years)
    weights = expo/np.sum(expo)
    plot_matrix(ages, years, mat, title=f"Taux bruts {name}")

    smoothed_mat = whittaker_henderson_2d_smoothing_with_weights(mat, lambda_x=lambda_x, lambda_y=lambda_y, weights=weights)
    plot_matrix(ages, years, smoothed_mat, title=f"Taux bruts lissés {name}", save=f"./images/BRUT_{name}_lisses.png")

    smoothed_ic_inf = np.maximum(smoothed_mat + stats.norm.ppf(alpha / 2)*np.sqrt((smoothed_mat*(1-smoothed_mat))/expo), 0)
    smoothed_ic_sup = np.minimum(smoothed_mat + stats.norm.ppf(1 - alpha / 2)*np.sqrt((smoothed_mat*(1-smoothed_mat))/expo), 1)


    n_years = mat.shape[0]

    # Create a figure and a set of subplots
    ncols = 3
    nrows = 3
    fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(ncols*4, nrows*3), sharey=True)
    axes = axes.flatten()

    # Plotting
    for year_idx, ax in enumerate(axes[:n_years]):
        if years[year_idx] in QHmd_years:
            ax.plot(QHmd_ages, QHmd[year_idx], color='fuchsia', label='Taux HMD')
        ax.scatter(ages, mat[year_idx], color='blue', label="Taux bruts")
        ax.plot(ages, smoothed_mat[year_idx], label="Taux bruts lissés", color="orange")
        ax.fill_between(ages, smoothed_ic_inf[year_idx], smoothed_ic_sup[year_idx], alpha=0.2, color="orange", label="Intervalle de confiance")
        ax.set_xlabel('Age')
        ax.set_title(f"Année {years[year_idx]}")
        if year_idx == 0:
            ax.set_ylabel('Taux')
            ax.legend(loc='upper left')
    # Hide any unused subplots
    for ax in axes[n_years:]:
        ax.axis('off')

    fig.suptitle(f"Lissage des taux bruts {name} (Whittaker-Henderson)")
    # Adjust layout
    plt.tight_layout()

    plt.savefig(f"images/BRUT_{name}_lissage.png", bbox_inches='tight')
    plt.show()

    save_matrix(smoothed_mat, f"./matrices/BRUT_LISSES_{name}.csv", ages=ages, years=years)

    print("All increasing ?", are_all_columns_increasing(smoothed_mat))