In [2]:
import sys
sys.path.insert(0, '/gpfs/software/Anaconda3/lib/python3.6/site-packages')

import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import pairwise_distances

from distance import SquaredL2, L2
from neighborhood import neighbor_graph, laplacian
from correspondence import Correspondence
from stiefel import *
from kmedoids import *

import torch
import torch.nn as nn
import torch.nn.functional as F
torch.set_default_tensor_type('torch.DoubleTensor')

In [3]:
df = pd.read_csv("data/DER-22_Single_cell_expression_raw_UMI.tsv", sep='\t')
xl = pd.read_excel("data/DER-21_Single_cell_markergenes_UMI.xlsx")

Ex1_cols = [col for col in df.columns if 'Ex1' in col]
In1_cols = [col for col in df.columns if 'In1' in col]

Ex1 = df[Ex1_cols].loc[df.index.isin(xl['All Clusters'])]
In1 = df[In1_cols].loc[df.index.isin(xl['All Clusters'])]

# Ex1.isnull().values.any()

Ex1_log = np.log2(Ex1+1)
In1_log = np.log2(In1+1)

In [4]:
"""Defines the neural network"""

class Net(nn.Module):
    def __init__(self, D_in, H1, H2, D_out):
        super(Net, self).__init__()
        self.linear1 = torch.nn.Linear(D_in, H1)
        self.linear2 = torch.nn.Linear(H1, H2)
        self.linear3 = torch.nn.Linear(H2, D_out)

    def forward(self, x):
        h1_sigmoid = self.linear1(x).sigmoid()
        h2_sigmoid = self.linear2(h1_sigmoid).sigmoid()
        y_pred = self.linear3(h2_sigmoid)
        return y_pred

In [5]:
# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in1, D_in2, H1, H2, D_out = Ex1_log.shape[0], Ex1_log.shape[1], In1_log.shape[1], 1024, 512, 3

model1 = Net(D_in1, H1, H2, D_out)
model2 = Net(D_in2, H1, H2, D_out)

In [6]:
x1_np = Ex1_log.values
x2_np = In1_log.values

x1 = torch.from_numpy(x1_np)
x2 = torch.from_numpy(x2_np)

%store x1
%store x2

Stored 'x1' (Tensor)
Stored 'x2' (Tensor)


In [7]:
# Compute Laplacian of the join datasets
adj1 = neighbor_graph(x1_np, k=5)
adj2 = neighbor_graph(x2_np, k=5)

corr = Correspondence(matrix=np.eye(N))

In [8]:
w = np.block([[corr.matrix(),adj1],
              [adj2, corr.matrix()]])

L_np, D_np = laplacian(w, normed=True, return_diag=True)
L = torch.from_numpy(L_np)

In [12]:
# Construct an Optimizer
params = list(model1.parameters()) + list(model2.parameters())
optimizer = torch.optim.Adam(params, lr = 0.00001)

for t in range(200):
    # Forward pass: Compute predicted y by passing x to the model
    y1_pred = model1(x1)
    y2_pred = model2(x2)
    
    outputs = torch.cat((y1_pred, y2_pred), 0)
    
    # Project the output onto Stiefel Manifold
    u, s, v = torch.svd(outputs, some=True)
    proj_outputs = u@v.t()
    
    # Compute and print loss
    loss = torch.trace(proj_outputs.t()@L@proj_outputs)
    print(t, loss.item())

    # Zero gradients, perform a backward pass, and update the weights.
    proj_outputs.retain_grad()
    
    optimizer.zero_grad()
    loss.backward(retain_graph=True)
    
    # Project the (Euclidean) gradient onto the tangent space of Stiefel Manifold (to get Rimannian gradient)
    rgrad = proj_stiefel(proj_outputs, proj_outputs.grad)
    grd = torch.norm(rgrad)
    print(torch.norm(rgrad))
    
    optimizer.zero_grad()
    # Backpropogate the Rimannian gradient w.r.t proj_outputs
    proj_outputs.backward(rgrad)
    
    optimizer.step()

0 0.04980313057362592
tensor(1.0658, grad_fn=<NormBackward0>)
1 0.05192166575489997
tensor(1.0695, grad_fn=<NormBackward0>)
2 0.2413292139829069
tensor(1.6175, grad_fn=<NormBackward0>)
3 0.061042807878783314
tensor(1.0865, grad_fn=<NormBackward0>)
4 0.14181981780428515
tensor(1.3185, grad_fn=<NormBackward0>)
5 0.1508774625749779
tensor(1.3609, grad_fn=<NormBackward0>)
6 0.07971493970734181
tensor(1.1836, grad_fn=<NormBackward0>)
7 0.0615825695855951
tensor(1.1515, grad_fn=<NormBackward0>)
8 0.10044740737180527
tensor(1.2788, grad_fn=<NormBackward0>)
9 0.110169884322798
tensor(1.2951, grad_fn=<NormBackward0>)
10 0.0786719779693502
tensor(1.1818, grad_fn=<NormBackward0>)
11 0.05191717240788632
tensor(1.0744, grad_fn=<NormBackward0>)
12 0.06369347724115507
tensor(1.1100, grad_fn=<NormBackward0>)
13 0.08680025977183697
tensor(1.1881, grad_fn=<NormBackward0>)
14 0.08191589209029948
tensor(1.1755, grad_fn=<NormBackward0>)
15 0.058539638414866965
tensor(1.1013, grad_fn=<NormBackward0>)
16 0.0

130 0.021462442429175235
tensor(1.0003, grad_fn=<NormBackward0>)
131 0.02120813953651994
tensor(0.9997, grad_fn=<NormBackward0>)
132 0.020953487273255786
tensor(0.9992, grad_fn=<NormBackward0>)
133 0.020698896506350845
tensor(0.9987, grad_fn=<NormBackward0>)
134 0.0204439636358337
tensor(0.9980, grad_fn=<NormBackward0>)
135 0.020188773350823808
tensor(0.9974, grad_fn=<NormBackward0>)
136 0.019933562787611922
tensor(0.9968, grad_fn=<NormBackward0>)
137 0.019678053939597026
tensor(0.9963, grad_fn=<NormBackward0>)
138 0.019422364825962837
tensor(0.9957, grad_fn=<NormBackward0>)
139 0.019166596742926145
tensor(0.9952, grad_fn=<NormBackward0>)
140 0.01891057747034204
tensor(0.9946, grad_fn=<NormBackward0>)
141 0.018654429076097614
tensor(0.9939, grad_fn=<NormBackward0>)
142 0.018398167739275204
tensor(0.9934, grad_fn=<NormBackward0>)
143 0.018141697365560983
tensor(0.9928, grad_fn=<NormBackward0>)
144 0.017885132739422813
tensor(0.9923, grad_fn=<NormBackward0>)
145 0.017628431188113997
tens

In [13]:
proj_outputs_np = proj_outputs.detach().numpy()

In [14]:
%store proj_outputs_np

Stored 'proj_outputs_np' (ndarray)


In [15]:
np.save('./data/proj_outputs_np.npy', proj_outputs_np)

In [61]:
D = pairwise_distances(proj_outputs_np, metric='euclidean')

In [22]:
proj_outputs_np[1768:3536]

array([[-0.02997048,  0.00974398, -0.01351445],
       [-0.00858181, -0.0060294 ,  0.03141839],
       [-0.01719308,  0.00673225,  0.00134394],
       ...,
       [-0.02241915,  0.00891299, -0.0099177 ],
       [-0.01241028,  0.00113306,  0.00084057],
       [-0.01583905,  0.00273086, -0.00224224]])