In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch

In [5]:
df = pd.read_parquet("./data/data.parquet")

In [6]:
unique_ids = set(df.id.unique())
observations = (
    df
    .pipe(
        lambda x: x.assign(
            refs=x.references.map(lambda x: [x for x in x if x in unique_ids])
        )
    )
    .apply(
        lambda x:
           [(int(x['id']), e) for e in x['refs']]
           if ~np.isnan(x['refs']).any() and len(x['refs']) > 0
           else float('nan'),
        axis=1
    )
    .pipe(lambda x: x[~x.isna()])
    .pipe(lambda x: np.concatenate(x.values))
)

In [11]:
# observations = torch.tensor(observations)

In [15]:
# torch.save(observations, 'data/articles.pt')

In [39]:
observations = torch.load('data/articles.pt')

In [17]:
t0 = observations[:, 0]
t1 = observations[:, 1]

In [48]:
def transform_unique(t):
    unique = {e: i for i, e in enumerate(t.unique().sort()[0].numpy())}
    t = t.clone()
    t.apply_(lambda x: unique[x])
    return t, unique

In [49]:
# X, transform = transform_unique(observations)

In [51]:
# torch.save((X, transform), 'data/articles_prepared.pt')

In [None]:
X, transform = torch.load('data/articles_prepared.pt')

In [8]:
class SparseMatrix(torch.utils.data.Dataset):
    def __init__(self, ls, rs, v):
        self.ls = ls 
        self.rs = rs
        self.v  = v
        self.len = v.shape[0]
        
    def __getitem__(self, idx):
        return self.ls[idx], self.rs[idx], self.v[idx]
    
    def __len__(self):
        return self.len 

In [136]:
class EmbeddingWithBias(torch.nn.Module):
    def __init__(self, n, embedding_dim):
        super(EmbeddingWithBias, self).__init__()
        
        self.embedding_dim = embedding_dim 
        self.shape = (n, embedding_dim)
        self.W = torch.nn.Embedding(n, embedding_dim)
        self.b = torch.nn.Embedding(n, 1)
        
    def forward(self, idx):
        return self.W[idx], self.b[idx]

class MatrixFactorization(torch.nn.Module):
    def __init__(self, n, m, embedding_dim): 
        super(MatrixFactorization, self).__init__()
        self.embedding_dim = embedding_dim
        self.shape = (n, m, embedding_dim)
        self.L = EmbeddingWithBias(n, embedding_dim)
        self.R = EmbeddingWithBias(m, embedding_dim)
        
    def forward(self, ls, rs):
        
        LW, Lb = self.L(ls)
        RW, Rb = slef.R(rs)
        
        return (LW * RW).sum(1) + Lb + Rb
    
class SymmetricMatrixFactorization(torch.nn.Module):
    def __init__(self, n, embedding_dim):
        super(SymmetricMatrixFactorization, self).__init__()
        self.embedding_dim = embedding_dim
        self.shape = (n, embedding_dim)
        self.embeddings = EmbeddingWithBias(n, embedding_dim)
        
    def forward(self, ls, rs):
        LW, Lb = self.L(ls)
        RW, Rb = slef.R(rs)
        
        return (LW * RW).sum(1) + Lb + Rb