# Question 5. Illustrer que la factorisation QR creuse telle qu’implémentée dans SuiteSparseQR ne fournit pas la solution de norme minimale d’un problème aux moindres carrés

In [36]:
using Pkg 
Pkg.activate("projet-MTH8211")
Pkg.add(["LinearAlgebra","SparseArrays","Test","HarwellRutherfordBoeing","LDLFactorizations","BenchmarkTools","PrettyTables","Krylov"])


[32m[1m  Activating[22m[39m project at `~/projet-mth8211/projet MTH8211`
[32m[1m   Resolving[22m[39m package versions...
[32m[1m    Updating[22m[39m `~/projet-mth8211/projet MTH8211/Project.toml`
  [90m[ba0b0d4f] [39m[92m+ Krylov v0.9.6[39m
[32m[1m    Updating[22m[39m `~/projet-mth8211/projet MTH8211/Manifest.toml`
  [90m[ba0b0d4f] [39m[92m+ Krylov v0.9.6[39m


In [37]:
using LinearAlgebra
using SparseArrays

In [38]:
A = sparse([1. 0. 0. 0.;
0. 1. 0. 0.;
0. 0. 1. 1.;
0. 0. 1. 1.;
0. 0. 1. 1.])
b = A * [1; 1; 1; 0]
x = A \ b

4-element Vector{Float64}:
 1.0
 1.0
 1.0
 0.0

In [11]:
x_min = [1.; 1.; 0.5; 0.5]
A * x_min ≈ b

true

In [12]:
norm(x) ≈ norm(x_min)

false

Nous avons trouver une solution de norme plus petite que celle trouvé à l’aide de la factorisation
QR creuse de A.

# Question 7. Première implémentation

In [13]:
using Test

In [15]:
function test_least_square(f::Function, minimum_norm::Bool)
    A_1 = [1. 2. 3.; 4. 5. 6.; 7. 8. 9.; 5. 7. 9.]
    b_1 = ones(Float64, 4)
    
    x, _, _ = f(sparse(A_1), b_1)
    x_baseline = A_1 \ b_1

    r = A_1 * x - b_1
    r_baseline = A_1 * x_baseline - b_1
    @test norm(r - r_baseline) ≈ 0.0 atol=1e-6   

    if minimum_norm
        @test norm(x - x_baseline) ≈ 0.0 atol=1e-6
    end
end

test_least_square (generic function with 1 method)

In [16]:
function golub_riley(A::SparseMatrixCSC{Float64}, b::Vector{Float64}, λ::Float64=0.1, ϵ::Float64=1e-4)
    m, n = size(A)
    x_k = zeros(Float64, n)
    b_k = [b; zeros(n)]
    A_augmented = sparse_vcat(A, sparse(λ * I, n, n))
    QR_A_augmented = qr(A_augmented)

    Δx_k = Vector{Float64}(undef, n)
    while true
        # On utilise \ parce que ldiv!(Y, A, b) ne semble par être implanter pour
        # la factorisation QR creuse
        Δx_k = QR_A_augmented \ b_k
        x_k += Δx_k
        b_k[1:m] = b - A * x_k
        b_k[m+1:m+n] .= 0.0
        if (norm(Δx_k) / norm(x_k)) ≤ ϵ
            break
        end
    end
    
    return x_k, nnz(A), nnz(QR_A_augmented.R)
end


golub_riley (generic function with 3 methods)

In [17]:
test_least_square(golub_riley, false)

# Question 8. Aller chercher les données de la collection animal 

In [44]:
run(`git clone https://github.com/optimizers/animal.git`)

Cloning into 'animal'...


Process(`[4mgit[24m [4mclone[24m [4mhttps://github.com/optimizers/animal.git[24m`, ProcessExited(0))

In [18]:
using HarwellRutherfordBoeing

In [42]:
function get_problem_from_animal(path_to_animal_folder::String, problem_name::String)
    A = HarwellBoeingMatrix(joinpath(path_to_animal_folder, "hb", problem_name * ".hb"))
    return A.matrix, vec(A.rhs)
end

get_problem_from_animal (generic function with 1 method)

In [43]:
function get_solution_from_animal(path_to_animal_folder::String, problem_name::String)
    path = joinpath(path_to_animal_folder, "mls", "txt", problem_name * "_scaled_mls.txt")
    x = map(x -> parse(Float64, x), readlines(path))
    return x
end

get_solution_from_animal (generic function with 1 method)

# Question 9. Normaliser les colonnes d'une matrice creuse

We can efficiently normalize the columns of a (sparse) matrix A with :
`foreach(normalize!, eachcol(A))`

In [21]:
# Tests
for n = 100:10:500
    A = sprandn(Float64, n, n, 0.5)
    foreach(normalize!, eachcol(A))
    for j = 1:n
        @test norm(A[:, j]) ≈ 1.0
    end
end

# Question 10. Statistique de la première implémentation

Le tableau est produit pour les deux méthodes à la fois, à la question 13.

# Question 13. Deuxième implémentation

In [22]:
using LDLFactorizations

In [23]:
function golub_riley_2(A::SparseMatrixCSC{Float64}, b::Vector{Float64}, λ::Float64=0.1, ϵ::Float64=1e-4)
    m, n = size(A)
    x_k = zeros(Float64, n)
    b_augmented_k = [b; zeros(n)]

    # We could pass an upper triangular matrix to ldl for more memory efficiency
    K = [sparse(I, m, m) A; adjoint(A) sparse(-(λ^2)I, n, n)]
    LDLT = ldl(K)
    Δsol_k = Vector{Float64}(undef, m + n)
    while true
        ldiv!(Δsol_k, LDLT, b_augmented_k)
        x_k .+= (@view Δsol_k[m+1:m+n])
        b_augmented_k[1:m] = b

        # b_augmented_k[1:m] -= A * x_k
        mul!(view(b_augmented_k, 1:m), A, x_k, -1.0, 1.0)
        if (norm(@view Δsol_k[m+1:m+n]) / norm(x_k)) ≤ ϵ
            break
        end
    end
    
    return x_k, nnz(A), nnz(LDLT.L)
end

golub_riley_2 (generic function with 3 methods)

In [24]:
test_least_square(golub_riley_2, false)

## Tableau comparatif des deux méthodes

In [27]:
using BenchmarkTools
using PrettyTables

In [30]:
header = ["Problème", "nnz A", "ϵ", "λ", "nnz R", "Err rel x méth 1", "Temps (s) méth 1",
          "nnz L", "Err rel x méth 2", "Temps (s) méth 2"]
λ_values = [1e-2, 1e-4, 1e-6]
ϵ_values = [1e-3, 1e-4]

# On a pas la solution pour very2 donc on l'ignore
animal_problem = ["small", "small2", "medium", "medium2", "large", "large2", "very"]
table_data = Array{Any, 2}(undef, length(animal_problem) * length(λ_values) * length(ϵ_values), length(header));

La prochaine cellule prend longtemps à exécuter

In [49]:
i = 1
for problem in animal_problem
    
    # Remplacer "animal" par le chemin vers le dossier "animal" sur votre machine
    A, b = get_problem_from_animal("animal", problem)
    foreach(normalize!, eachcol(A))
    x_reference = get_solution_from_animal("animal", problem)
    r_reference = b - A * x_reference    
    
    for λ in λ_values
        for ϵ in ϵ_values                  
            x_1, nnz_A, nnz_R = golub_riley(A, b, λ, ϵ)
            r_1 = b - A * x_1
            
            # Le temps est en ns, on divise par 1e9 pour le convertir en secondes
            median_time_seconds_1 = median(@benchmark golub_riley($A, $b, $λ, $ϵ) samples = 1).time / 1e9   
        
            x_relative_error_1 = norm(x_1 - x_reference) / norm(x_reference)           
            r_relative_error_1 = norm(r_1 - r_reference) / norm(r_reference)
            @test r_relative_error_1 ≤ 1e-2

            x_2, nnz_A, nnz_L = golub_riley_2(A, b, λ, ϵ)
            r_2 = b - A * x_2
            
            median_time_seconds_2 = median(@benchmark golub_riley_2($A, $b, $λ, $ϵ) samples = 1).time / 1e9   
        
            x_relative_error_2 = norm(x_2 - x_reference) / norm(x_reference)           
            r_relative_error_2 = norm(r_2 - r_reference) / norm(r_reference)
            @test r_relative_error_2 ≤ 1e-2

            table_data[i, 1] = problem  
            table_data[i, 2] = nnz_A
            table_data[i, 3] = ϵ
            table_data[i, 4] = λ
            table_data[i, 5] = nnz_R
            table_data[i, 6] = x_relative_error_1        
            table_data[i, 7] = median_time_seconds_1
            table_data[i, 8] = nnz_L
            table_data[i, 9] = x_relative_error_2        
            table_data[i, 10] = median_time_seconds_2
            @show i
            i += 1
        end
    end
end

i = 1
i = 2
i = 3
i = 4
i = 5
i = 6
i = 7
i = 8
i = 9
i = 10
i = 11
i = 12
i = 13
i = 14
i = 15
i = 16


In [46]:
pretty_table(table_data; formatters = ft_printf("%.2e", [3,4,6,7,9,10]), header)

┌──────────┬────────┬──────────┬──────────┬────────┬──────────────────┬──────────────────┬────────┬──────────────────┬──────────────────┐
│[1m Problème [0m│[1m  nnz A [0m│[1m        ϵ [0m│[1m        λ [0m│[1m  nnz R [0m│[1m Err rel x méth 1 [0m│[1m Temps (s) méth 1 [0m│[1m  nnz L [0m│[1m Err rel x méth 2 [0m│[1m Temps (s) méth 2 [0m│
├──────────┼────────┼──────────┼──────────┼────────┼──────────────────┼──────────────────┼────────┼──────────────────┼──────────────────┤
│    small │   8510 │ 1.00e-03 │ 1.00e-02 │  14895 │         1.24e-06 │         4.89e-03 │  18812 │         1.24e-06 │         2.55e-03 │
│    small │   8510 │ 1.00e-04 │ 1.00e-02 │  14895 │         1.24e-06 │         5.02e-03 │  18812 │         1.24e-06 │         2.25e-03 │
│    small │   8510 │ 1.00e-03 │ 1.00e-04 │  14942 │         3.46e-12 │         5.00e-03 │  18812 │         8.58e-10 │         2.11e-03 │
│    small │   8510 │ 1.00e-04 │ 1.00e-04 │  14942 │         3.46e-12 │         4.28e-03 │  

# Question 14. Comparaison des méthodes de Krylov sur les problèms de la collection animal

In [39]:
using Krylov

In [40]:
header_3 = ["Problème", "Méthode", "Erreur relative x", "Temps d'exécution (s)", "Nb itérations"]
nb_of_methods = 5
table_data_3 = Array{Any, 2}(undef, length(animal_problem) * nb_of_methods, length(header_3));

In [47]:
i = 1
for problem in animal_problem
    
    # Remplacer "animal" par le chemin vers le dossier "animal" sur votre machine
    A, b = get_problem_from_animal("animal", problem)
    foreach(normalize!, eachcol(A))
    x_reference = get_solution_from_animal("animal", problem)

    ϵ = 1e-4
    x_cgls, stats_cgls = cgls(A, b, atol=ϵ, history=true)
    table_data_3[i, 1] = problem
    table_data_3[i, 2] = "CGLS"
    table_data_3[i, 3] =  norm(x_cgls - x_reference) / norm(x_reference)
    table_data_3[i, 4] = stats_cgls.timer
    table_data_3[i, 5] = stats_cgls.niter
    i += 1
    
    x_crls, stats_crls = crls(A, b, atol=ϵ, history=true)
    table_data_3[i, 1] = problem
    table_data_3[i, 2] = "CRLS"
    table_data_3[i, 3] =  norm(x_crls - x_reference) / norm(x_reference)
    table_data_3[i, 4] = stats_crls.timer
    table_data_3[i, 5] = stats_crls.niter
    i += 1

    x_lslq, stats_lslq = lslq(A, b, atol=ϵ, history=true)
    table_data_3[i, 1] = problem
    table_data_3[i, 2] = "LSLQ"
    table_data_3[i, 3] =  norm(x_lslq - x_reference) / norm(x_reference)
    table_data_3[i, 4] = stats_lslq.timer
    table_data_3[i, 5] = stats_lslq.niter
    i += 1 

    x_lsqr, stats_lsqr = lsqr(A, b, atol=ϵ, history=true)
    table_data_3[i, 1] = problem
    table_data_3[i, 2] = "LSQR"
    table_data_3[i, 3] =  norm(x_lsqr - x_reference) / norm(x_reference)
    table_data_3[i, 4] = stats_lsqr.timer
    table_data_3[i, 5] = stats_lsqr.niter
    i += 1  

    x_lsmr, stats_lsmr = lsmr(A, b, atol=ϵ, history=true)
    table_data_3[i, 1] = problem
    table_data_3[i, 2] = "LSMR"
    table_data_3[i, 3] =  norm(x_lsmr - x_reference) / norm(x_reference)
    table_data_3[i, 4] = stats_lsmr.timer
    table_data_3[i, 5] = stats_lsmr.niter
    i += 1  
    
    #x_usymqr, stats_usymqr = usymqr(A, b, A' * b, atol=ϵ, history=true)
    #table_data_3[i, 1] = problem
    #table_data_3[i, 2] = "USYMQR"
    #table_data_3[i, 3] =  norm(x_usymqr - x_reference) / norm(x_reference)
    #table_data_3[i, 4] = stats_usymqr.timer
    #table_data_3[i, 5] = stats_usrmqr.niter
    #i += 1
end

In [48]:
hl = Highlighter(
    f      = (data, i, j) -> (Int64(ceil(i / nb_of_methods)) % 2) == 1,
    crayon = Crayon(background = :blue))
pretty_table(table_data_3; header=header_3, highlighters=hl, formatters = ft_printf("%.2e", [3,4]))

┌──────────┬─────────┬───────────────────┬───────────────────────┬───────────────┐
│[1m Problème [0m│[1m Méthode [0m│[1m Erreur relative x [0m│[1m Temps d'exécution (s) [0m│[1m Nb itérations [0m│
├──────────┼─────────┼───────────────────┼───────────────────────┼───────────────┤
│[44m    small [0m│[44m    CGLS [0m│[44m          3.53e-07 [0m│[44m              4.82e-03 [0m│[44m           157 [0m│
│[44m    small [0m│[44m    CRLS [0m│[44m          3.35e-06 [0m│[44m              3.74e-03 [0m│[44m           150 [0m│
│[44m    small [0m│[44m    LSLQ [0m│[44m          1.87e-04 [0m│[44m              2.47e-03 [0m│[44m           130 [0m│
│[44m    small [0m│[44m    LSQR [0m│[44m          1.79e-07 [0m│[44m              3.07e-03 [0m│[44m           161 [0m│
│[44m    small [0m│[44m    LSMR [0m│[44m          1.64e-06 [0m│[44m              3.72e-03 [0m│[44m           155 [0m│
│   small2 │    CGLS │          4.36e-07 │              1.52e-02 │     