In [10]:
import numpy as np
from matricesFW import FW_objective_function, FW_inface, FrankWolfe
from scipy import sparse
import matplotlib.pyplot as plt
import scipy.sparse.linalg
from scipy import stats

# Artificial Data Generation

In [4]:
def data_generation(n, m, rho):
    X_test = sparse.random(m, n, density=rho, format='csr', data_rvs=None)
    return X_test

In [None]:
n = 400
m = 200
rho = 0.1

#This is outside for loop because this way it's always the same data
X_test = data_generation(n, m, rho)


for i in range(1, 60, 4):
    delta = i
    for i in range(1,20):
        gamma1 = i*0.1
        gamma2 = (i*0.1)+0.1
        pred_ratings_reg, loss_reg, loss_track_reg, ranks_reg = FrankWolfe(X_test, FW_objective_function, delta = delta, max_iter=250, patience=1e-7, printing = False)
        pred_ratings_inface, loss_inface, loss_track_inface, ranks_inface = FW_inface(X_test, FW_objective_function, gamma1 = gamma1, gamma2 = gamma2 , delta = delta, max_iter = 250, patience = 1e-7, printing=False)
        fig = plt.figure(figsize = (20,10))
        fig.suptitle(t = 'n = %i, m = %i, δ =%i, γ1 = %.2f, γ2= %.2f, ρ = %.2f'%(n, m, delta, gamma1, gamma2, rho), fontsize=20)
        ax1 = fig.add_subplot(121)
        ax1.set_title(label = 'error vs iterations' , fontsize = 18)
        ax1.set_xlabel('iterations', size = 16)
        ax1.set_ylabel('log(f)',size = 16)
        ax1.plot(np.log10(loss_track_reg), label = 'FW', color = 'orange')
        ax1.plot(np.log10(loss_track_inface), label = 'FW_IF', color = 'blue')
        ax1.legend(loc = 'best')
        ax2 = fig.add_subplot(122)
        ax2.set_title(label = 'rank vs iterations', fontsize = 18)
        ax2.set_xlabel('iterations', size = 16)
        ax2.set_ylabel('rank',size = 16)
        ax2.plot(ranks_reg, label = 'FW', color = 'orange')
        ax2.plot(ranks_inface, label = 'FW_IF', color = 'blue')
        ax2.legend(loc = 'best')
        #plt.savefig('/home/kabo/Documents/Optimization/deltatry/n=%i_m=%i_δ =%i_γ1=%.2f_γ2= %.2f_ρ=%.2f.png'%(n, m, delta, gamma1, gamma2, rho))

## Data Generation #2

In [None]:
# Parameters
n = 400
m = 200
r = 10
rho = 0.10
SNR = 5
delta = 3.75


# taking data
U = sparse.random(m, r, density=0.1, format='csr', data_rvs=None)
V = sparse.random(r, n, density=0.1, format='csr', data_rvs=None)
E = sparse.random(m, n, density=0.1, format='csr', data_rvs=None)

VT = V.transpose(copy=True)

UVT = U*V
#print(UVT. shape)

w1 = 1/(sparse.linalg.norm(UVT, ord='fro'))

w2 = 1/(SNR*sparse.linalg.norm(E, ord='fro'))

#Finally observed data matrix is:
X_test = w1*UVT + w2*E

# Non zero values
idx_ratings = np.argwhere(X_test != 0.)
idx_rows = idx_ratings[:,0]
idx_cols = idx_ratings[:,1]

# Nuclear norm of the test set
rank = np.linalg.matrix_rank(X_test)
U_thin, s_thin, Vh_thin = sparse.linalg.svds(X_test, k = rank, which='LM')
nuc_norm = s_thin.sum()

# Print some info about the generated data
print('Nuclear norm:', nuc_norm)
print('Data shape:', np.shape(X_test))
print('Number of observed values:', len(idx_rows))
print('Rank of the matrix:', rank)
print('Minimum and maximum values:', X_test.max(), X_test.min())

# Data Generation #3

In [None]:
# Create a random sparse matrix for testing
rvs = stats.randint(1,6).rvs
X_test = sparse.random(1500, 2000,              # shape of the sparse matrix
            density = 0.05,             # density of the sparse matrix
            dtype = np.int32,           # data type
            data_rvs=rvs).toarray()     # distribution

#Normalize the values
X_test_norm = X_test/5