### setup

In [1]:
import numpy as np 
import matplotlib.pyplot as plt
import pandas as pd

import scipy


from utils import *
from pytorch_sparse_utils import *


import torch
from torch import nn
import torch.nn.functional as F

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))

Using cpu device


### loading data

In [3]:
bead_reads = pd.read_csv('/data/bioprotean/macosko_et_al/Puck_200115_08_digital_expression_sorted.csv', index_col='Unnamed: 0')

In [4]:
bead_reads.head()

Unnamed: 0,0610005C13Rik,0610007P14Rik,0610009B22Rik,0610009E02Rik,0610009L18Rik,0610009O20Rik,0610010F05Rik,0610010K14Rik,0610011F06Rik,0610012D04Rik,...,mt-Tq,mt-Tr,mt-Ts2,mt-Tt,mt-Tv,n-R5-8s1,n-R5s173,n-R5s33,n-R5s40,n-R5s95
AAAAAAAGGTAGTA,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
AAAAAAAGTCCCAA,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
AAAAAAATCTTAGT,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
AAAAAACATCTTTC,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
AAAAAACGAAATAG,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
data_scipy_sparse = scipy.sparse.coo_matrix( bead_reads.to_numpy() )

In [6]:
data_sparse_tensor = scipy_sparse_to_pytorch_sparse(data_scipy_sparse).to(device=device)

In [7]:
data_sparse_tensor

tensor(indices=tensor([[    0,     0,     0,  ..., 53207, 53207, 53207],
                       [ 2667,  2717,  6164,  ..., 23252, 23253, 23255]]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]),
       size=(53208, 23264), nnz=22396657, layout=torch.sparse_coo)

In [8]:
data_dense_tensor = data_sparse_tensor.to_dense().to(device=device)

In [23]:
mask_scipy_sparse = (data_scipy_sparse != 0).astype(np.int64).tocoo()

In [24]:
mask_sparse_tensor = scipy_sparse_to_pytorch_sparse(mask_scipy_sparse).to(device=device)

In [25]:
mask_dense_tensor = mask_sparse_tensor.to_dense().bool().to(device=device)

In [12]:
distances = np.load('../data/macosko_distance_matrix.npy')

In [13]:
adjacency = scipy.sparse.coo_matrix( distances <= 20 , dtype=np.float32)
degree = scipy.sparse.diags( adjacency.sum(axis=0).A1 )
laplacian = (degree - adjacency).tocoo()

In [14]:
adjacency_sparse_tensor = scipy_sparse_to_pytorch_sparse(adjacency)

In [15]:
laplacian_sparse_tensor = scipy_sparse_to_pytorch_sparse(laplacian).to(device=device)

laplacian_tensor = torch.tensor(laplacian.todense()).to(device=device)

In [16]:
class MatrixFactorization(torch.nn.Module):
    def __init__(self, rows, cols, n_factors=10):
        super().__init__()
        self.row_embedding = torch.nn.parameter.Parameter(torch.rand(rows, n_factors), requires_grad=True)
        self.col_embedding = torch.nn.parameter.Parameter(torch.rand(n_factors, cols), requires_grad=True)

    def forward(self):
        return torch.matmul( self.row_embedding, self.col_embedding )

In [26]:
def laplacian_loss(x, x_lap):
    
    lap_sum = 0
    for ii in range(x.shape[1]):
        lap_sum = lap_sum + bilinear_mult(x[:,ii], x_lap)
        
    return lap_sum

In [27]:
mf = MatrixFactorization(mask_sparse_tensor.shape[0], mask_sparse_tensor.shape[1], 10).to(device=device)

In [28]:
import torch.optim as optim

criterion = nn.MSELoss()
optimizer = optim.SGD(mf.parameters(), lr=0.001)

In [None]:
losses = []

for epoch in range(1000):
    
    optimizer.zero_grad()

    output = mf.forward()
    loss = criterion(output[mask_dense_tensor], data_dense_tensor[mask_dense_tensor]) + 0.001*laplacian_loss(output, laplacian_sparse_tensor)
    loss.backward()
    optimizer.step()

    if epoch % 10 == 0:
        print(epoch)
        print(loss)
        
    losses.append(loss.item())

In [1]:
losses

NameError: name 'losses' is not defined

In [None]:
loss.item()

In [14]:
(adjacency_tensor.element_size() * adjacency_tensor.nelement())/(10**9)

11.324365056