In [None]:
import numpy as np
from matricesFW import FW_objective_function, FW_inface, FrankWolfe
from scipy import sparse
import matplotlib.pyplot as plt
import scipy.sparse.linalg
from scipy import stats

# Artificial Data Generation

In [None]:
def data_generation(n, m, r, rho, SNR):
    U = sparse.random(m, r, density=rho, format='csr', data_rvs=None)
    V = sparse.random(r, n, density=rho, format='csr', data_rvs=None)
    E = sparse.random(m, n, density=rho, format='csr', data_rvs=None)
    VT = V.transpose(copy=True)
    UVT = U*V
    w1 = 1/(sparse.linalg.norm(UVT, ord='fro'))
    w2 = 1/(SNR*sparse.linalg.norm(E, ord='fro'))
    X_test = w1*UVT + w2*E
    return X_test

### Frank Wolfe Implementation

In [None]:
n = 400
m = 200
r = 10
SNR = 5
rho = 0.1

X_test = data_generation(n, m, r, rho, SNR)


#This for loop is testing various deltas


for i in range(1, 20):
    delta = i # change this accordingly
    for i in range(1, 10):
        gamma1 = 0.1*i
        gamma2 = (0.1*i)+0.1
        pred_ratings_reg, loss_reg, loss_track_reg, ranks_reg = FrankWolfe(X_test, FW_objective_function, delta = delta, max_iter=100, patience=1e-7, printing_res = False)
        pred_ratings_inface, loss_inface, loss_track_inface, ranks_inface = FW_inface(X_test, FW_objective_function, gamma1 = gamma1, gamma2 = gamma2 , delta = delta, THRES = 10, max_iter = 100, patience = 1e-7, printing=False)
        fig = plt.figure(figsize = (20,10))
        fig.suptitle(t = 'n = %i, m = %i, delta =%i, γ1 = %.2f, γ2 = %.2f' %(n, m, delta, gamma1, gamma2), fontsize=20)
        ax1 = fig.add_subplot(121)
        ax1.set_title(label = 'error vs iterations' , fontsize = 18)
        ax1.set_xlabel('iterations', size = 16)
        ax1.set_ylabel('log(f)',size = 16)
        ax1.plot(np.log10(loss_track_reg), label = 'FW', color = 'orange')
        ax1.plot(np.log10(loss_track_inface), label = 'FW_IF', color = 'blue')
        ax1.legend(loc = 'best')
        ax2 = fig.add_subplot(122)
        ax2.set_title(label = 'rank vs iterations', fontsize = 18)
        ax2.set_xlabel('iterations', size = 16)
        ax2.set_ylabel('rank',size = 16)
        ax2.plot(ranks_reg, label = 'FW', color = 'orange')
        ax2.plot(ranks_inface, label = 'FW_IF', color = 'blue')
        ax2.legend(loc = 'best')
        plt.savefig('n = %i_m = %i_delta =%i_γ1 = %.2f_γ2 = %.2f.png' %(n, m, delta, gamma1, gamma2))

# Goodreads Book Reviews Dataset

In [None]:
import pandas as pd

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)

In [None]:
# import the data
path = 'DATA/goodreads_cleaned.csv'

df = pd.read_csv(path, sep = ";")
df['book_id_count'] = df.groupby('book_id')['book_id'].transform('count')
df['user_id_count'] = df.groupby('user_id')['user_id'].transform('count')

book_quantile = 0.95
user_quantile = 0.9

df = df.loc[(df.book_id_count >= df.book_id.value_counts().quantile(book_quantile)) & (df.user_id_count >= df.user_id.value_counts().quantile(user_quantile)),:]

### Data exploration

In [None]:
df.user_id.value_counts().describe()

In [None]:
df.book_id.value_counts().describe()

### Matrix from data

In [None]:
df = pd.pivot_table(df, columns="book_id", index="user_id", values="rating")
df.head(10)

In [None]:
df.reset_index(drop=True)
df

In [None]:
df = (df + 1)/6
df.dropna(axis=0, how='all', inplace=True)
df.dropna(axis=1, how='all', inplace=True)
data_matrix = df.to_numpy(na_value=np.nan)
print(data_matrix)

In [None]:
idx = np.argwhere(~np.isnan(data_matrix))
print(idx)

In [None]:
new_data = np.nan_to_num(data_matrix, 0)

print(new_data)

print(np.linalg.matrix_rank(new_data))

### Frank Wolfe Implementations

In [None]:
for i in [0,1,2,3,4,5]:
    delta = 1000+(i*200)
    #for i in range(1,10):
    gamma1 = 0
    gamma2 = 1
    pred_ratings_reg, loss_reg, loss_track_reg, ranks_reg = FrankWolfe(new_data, FW_objective_function, delta = delta, max_iter=200, patience=1e-7, printing_res = False)
    pred_ratings_inface, loss_inface, loss_track_inface, ranks_inface = FW_inface(new_data, FW_objective_function, gamma1 = gamma1, gamma2 = gamma2 , delta = delta, THRES = 10, max_iter = 200, patience = 1e-7, printing=False)
    fig = plt.figure(figsize = (20,10))
    fig.suptitle(t = 'GoodReads δ =%i, γ1 = %.2f, γ2= %.2f'%(delta, gamma1, gamma2), fontsize=20)
    ax1 = fig.add_subplot(121)
    ax1.set_title(label = 'error vs iterations' , fontsize = 18)
    ax1.set_xlabel('iterations', size = 16)
    ax1.set_ylabel('log(f)',size = 16)
    ax1.plot(np.log10(loss_track_reg), label = 'FW', color = 'orange')
    ax1.plot(np.log10(loss_track_inface), label = 'FW_IF', color = 'blue')
    ax1.legend(loc = 'best')
    ax2 = fig.add_subplot(122)
    ax2.set_title(label = 'rank vs iterations', fontsize = 18)
    ax2.set_xlabel('iterations', size = 16)
    ax2.set_ylabel('rank',size = 16)
    ax2.plot(ranks_reg, label = 'FW', color = 'orange')
    ax2.plot(ranks_inface, label = 'FW_IF', color = 'blue')
    ax2.legend(loc = 'best')
    plt.savefig('goodreads_δ =%i_γ1 = %.2f_γ2= %.2f.png'%(delta, gamma1, gamma2))