# Federal University of Ceará
# Teleinformatics Departament
# Graduate Program in Teleinformatics Engeneering
## TIP8419 - Tensor Algebra
## Homework 2 - Khatri-Rao Product
### Simulation results

- Ezequias Márcio - 497779

To run this notebook properly, it is necessary Python3 installed alongside alongside with the packages listed below:

- `numpy 1.17.2`
- `tensorly 0.4.5`
- `tdqm 4.36.1`
- `bokeh 1.3.4`

Make sure that the file `tensoralg.py` is in the same directory as this notebook.

In [2]:
import numpy as np
import tensoralg # Module created for this hw
from bokeh.plotting import figure, output_notebook, show
from bokeh.layouts import row
from tensorly.tenalg import khatri_rao
from time import perf_counter_ns
from tqdm.notebook import tqdm, tnrange
np.set_printoptions(3, linewidth=175)
output_notebook()

In [3]:
# Number of columns:
I = np.array([2, 4, 8, 16, 32, 64, 128, 256])
# Number of rows:
R = np.array([2, 4])
# Number of N matrix to problem 2:
n = np.array([2, 4, 6, 8, 10])
# Monte Carlo Realizations:
realiz = 100
# Variables to store the time values for problems 1:
method1 = np.zeros((R.size, realiz, I.size))
method2 = np.zeros((R.size, realiz, I.size))
method3 = np.zeros((R.size, realiz, I.size))
# and 2:
problem2 = np.zeros((realiz, n.size))

### Testing functions

In [4]:
def n_kr(mt_list):
    # Function to calculate the product kr(kr(A, A), A)
    n_kr_prod = mt_list[0]
    for matrix in mt_list[1:]:
        n_kr_prod = tensoralg.kr(n_kr_prod, matrix)
    
    return n_kr_prod

In [5]:
# Matrices for testing:
M, N = 4, 2
A = np.random.rand(M, 2*N).view(np.complex_)

# Nested Khatri-Rao product:
print(f'''Matrix A:
{A}

A \diamond A \diamond A - shape: {n_kr([A]*3).shape}

{np.allclose(n_kr([A]*3), tensoralg.kr(tensoralg.kr(A, A), A))}
''')

Matrix A:
[[0.48 +0.727j 0.817+0.933j]
 [0.987+0.713j 0.771+0.549j]
 [0.072+0.802j 0.821+0.012j]
 [0.491+0.77j  0.973+0.273j]]

A \diamond A \diamond A - shape: (64, 2)

True



### Performance simulations - Problem 01 (a, b and c)

Dealing with $N\in \{2,4,8,16,32\}$ avoid memory issues (only 6Gb avaliable in my machine).

Considering randomly chosen matrices $\mathbf{A}$ and $\mathbf{B} \in \mathbb{R}^{I\times R}$

- For the number of columns R = 2

In [6]:
# Mean over 100 realizations:
for r in tnrange(realiz):
    for idx, val in enumerate(I):
        for c, ncol in enumerate(R):
            # Generating I x R matrices:
            A = np.random.rand(val, ncol)
            B = np.random.rand(val, ncol)
    
            # (a) Numpy pinv:
            ti = perf_counter_ns() # measuring the execution time
            np.linalg.pinv(tensoralg.kr(A, B)) 
            tf = perf_counter_ns()
            method1[c, r, idx] = tf - ti 
        
            # (b) Method2: left pinv
            ti = perf_counter_ns() # measuring the execution time
            pord = tensoralg.kr(A, B) # Kr product 
            np.linalg.inv(pord.T @ pord) @ pord.T
            tf = perf_counter_ns()
            method2[c, r, idx] = tf - ti

            # (c) Method3: left pinv - w/ hadamard product
            ti = perf_counter_ns() # measuring the execution time
            prod = tensoralg.kr(A, B) # Kr product
            np.linalg.inv((A.T @ A) * (B.T @ B)) @ prod.T
            tf = perf_counter_ns()
            method3[c, r, idx] = tf - ti

HBox(children=(IntProgress(value=0), HTML(value='')))




In [11]:
# Plotting results:
plot_R1 = figure(tools="hover,pan,wheel_zoom,box_zoom,reset,save", 
                 plot_width=550, plot_height=400, 
                 background_fill_color="#fafafa",
                 x_axis_label='Number of rows I',
                 y_axis_label='Run time [s] - log scale',
                 y_axis_type="log",
                 title='Execution time for R = 2 columns')
plot_R1.circle(I, method1[0].mean(axis=0)*1e-9, size=8, fill_color=None, legend='Numpy pinv')
plot_R1.line(I, method1[0].mean(axis=0)*1e-9, line_width=2, legend='Numpy pinv')

plot_R1.triangle(I, method2[0].mean(axis=0)*1e-9, size=8, color='orange', legend='Method 2')
plot_R1.line(I, method2[0].mean(axis=0)*1e-9, line_width=2, color='orange', legend='Method 2')

plot_R1.square(I, method3[0].mean(axis=0)*1e-9, size=8, fill_color=None, color='green', 
                                                        legend='Method 3 - Hadamard prod.')
plot_R1.line(I, method3[0].mean(axis=0)*1e-9, line_width=2, color='green', 
                                                       legend='Method 3 - Hadamard prod.')
plot_R1.legend.location = "top_left"

plot_R2 = figure(tools="hover,pan,wheel_zoom,box_zoom,reset, save",
                 plot_width=550, plot_height=400,
                 background_fill_color="#fafafa",
                 x_axis_label='Number of rows I',
                 y_axis_label='Run time [s] - log sclae',
                 y_axis_type="log",
                 title='Execution time for R = 4 columns')
plot_R2.circle(I, method1[1].mean(axis=0)*1e-9, size=8, fill_color=None, legend='Numpy pinv')
plot_R2.line(I, method1[1].mean(axis=0)*1e-9, line_width=2, legend='Numpy pinv')

plot_R2.triangle(I, method2[1].mean(axis=0)*1e-9, size=8, color='orange', legend='Method 2')
plot_R2.line(I, method2[1].mean(axis=0)*1e-9, line_width=2, color='orange', legend='Method 2')

plot_R2.square(I, method3[1].mean(axis=0)*1e-9, size=8, fill_color=None, color='green', 
                                                        legend='Method 3 - Hadamard prod.')
plot_R2.line(I, method3[1].mean(axis=0)*1e-9, line_width=2, color='green', 
                                                        legend='Method 3 - Hadamard prod.')
plot_R2.legend.location = "top_left"
show(row(plot_R1, plot_R2))

### Performance simulations - Problem 02

Dealing with n matrices $\mathbf{A} \in \mathbb{R}^{4\times 2}$ for $n \in \{2,4,6,8,10\}$ to calulates the nested 
Khatri-Rao product:

$$
\mathbf{X} = \diamond^{N}_{n=1} \mathbf{A}_{(n)} = \mathbf{A}_{(1)} \diamond \dots \diamond \mathbf{A}_{(N)}
$$


In [8]:
# Mean over 100 realizations:
for r in tnrange(realiz):
    for idx, val in enumerate(n):
        # Generating N x N matrix:
        A = np.random.rand(4, 2) 
        ti = perf_counter_ns() # measuring the execution time
        n_kr([A]*val) 
        tf = perf_counter_ns()
        problem2[r, idx] = tf - ti

HBox(children=(IntProgress(value=0), HTML(value='')))




In [12]:
# Plotting results:
plot_p2 = figure(tools="hover,pan,wheel_zoom,box_zoom,reset, save", 
                 plot_width=600, plot_height=400, 
                 background_fill_color="#fafafa",
                 x_axis_label='Number of matrices N',
                 y_axis_label='Run time [s] - log scale',
                 y_axis_type="log",
                 title='Execution time for N kr products')

plot_p2.square(n, problem2.mean(axis=0)*1e-9, size=8, color='red', legend='kr(A(1),...,A(n))')
plot_p2.line(n, problem2.mean(axis=0)*1e-9, line_width=2, color='red', legend='kr(A(1),...,A(n))')

plot_p2.legend.location = "top_left"
show(plot_p2)