In [10]:
import torch 
from torch.linalg import vector_norm
from sklearn.neighbors import kneighbors_graph
from scipy.sparse.csgraph import dijkstra

from distances import *
from methods import * 
from optimizers import * 


latent_dim = 2
lr = 0.01
num_epochs = 1000
normalize = True

geodesic = True
min_dist = 1.

data = torch.randn(50, 50)
data_dist_matrix = dist_matrix(data, Euclidean)

#IsoMap-style geodesic distance for data
if geodesic:
#     truncated_matrix = torch.where(data_dist_matrix < min_dist, data_dist_matrix, torch.inf)
#     data_dist_matrix = dijkstra(truncated_matrix.detach().cpu().numpy())
#     data_dist_matrix = torch.FloatTensor(data_dist_matrix)
#     data_dist_matrix = torch.where(data_dist_matrix == torch.inf, 1000 * torch.ones_like(data_dist_matrix), data_dist_matrix)
    
    data_nn_matrix = kneighbors_graph(data, 3, mode='distance', include_self=False)
    data_nn_matrix = data_nn_matrix.toarray()
    data_dist_matrix = dijkstra(data_nn_matrix)
    data_dist_matrix = torch.FloatTensor(data_dist_matrix)
    data_dist_matrix = torch.where(data_dist_matrix == torch.inf, 1000 * torch.ones_like(data_dist_matrix), data_dist_matrix)
       
    

# torch.manual_seed(42)       
# torch.cuda.empty_cache()
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# print('Using device: ', device)



#model = MDS(data.shape[0], latent_dim, Poincare)
#model = Isomap(data.shape[0], latent_dim, Poincare)

data_binary_matrix = (data_nn_matrix > 0.).astype(int)
model = Contrastive(data.shape[0], latent_dim, Poincare)



#optimizer = StandardOptim(model, lr=lr)
optimizer = PoincareOptim(model, lr=lr)

if __name__ == "__main__":
    for i in range(num_epochs):
        if normalize:
            model.normalize()

        # print('norms', vector_norm(model.embeddings, dim=-1).mean().item(), vector_norm(model.embeddings, dim=-1).max().item())

        optimizer.zero_grad()
        #loss = model.loss_fun(data_dist_matrix)
        loss = model.loss_fun(data_binary_matrix, temperature=1)

        loss.backward()
        # print('grads', vector_norm(model.embeddings.grad, dim=-1).mean().item())
        optimizer.step()
        

        if i % 10 == 0:
            print(f'Epoch {i}, loss: {loss:.3f}')



Epoch 0, loss: 2610.674
Epoch 10, loss: 2463.013
Epoch 20, loss: 2324.259
Epoch 30, loss: 2193.678
Epoch 40, loss: 2068.025
Epoch 50, loss: 1945.178
Epoch 60, loss: 1824.658
Epoch 70, loss: 1708.096
Epoch 80, loss: 1602.005
Epoch 90, loss: 1515.582
Epoch 100, loss: 1440.092
Epoch 110, loss: 1373.528
Epoch 120, loss: 1313.496
Epoch 130, loss: 1259.835
Epoch 140, loss: 1214.785
Epoch 150, loss: 1174.679
Epoch 160, loss: 1137.427
Epoch 170, loss: 1103.070
Epoch 180, loss: 1072.207
Epoch 190, loss: 1045.363
Epoch 200, loss: 1020.172
Epoch 210, loss: 996.919
Epoch 220, loss: 975.463
Epoch 230, loss: 955.838
Epoch 240, loss: 937.442
Epoch 250, loss: 920.432
Epoch 260, loss: 904.694
Epoch 270, loss: 891.453
Epoch 280, loss: 880.922
Epoch 290, loss: 871.202
Epoch 300, loss: 862.494
Epoch 310, loss: 854.123
Epoch 320, loss: 845.112
Epoch 330, loss: 838.060
Epoch 340, loss: 830.849
Epoch 350, loss: 824.230
Epoch 360, loss: 817.352
Epoch 370, loss: 811.208
Epoch 380, loss: 804.420
Epoch 390, loss

In [11]:
data.shape[0]

50

In [None]:
from sklearn.neighbors import kneighbors_graph
from scipy.sparse.csgraph import dijkstra
from torch.utils.data import DataLoader
import random
from constants import *
from methods import *
from optimizers import * 
from utils.visulization import *
from OdorDataset import OdorMonoDataset
from utils.helpers import *
latent_dim = 2
lr = 0.1
num_epochs = 100000
normalize = False
geodesic = False
min_dist = 1.



dataset_name='gslf'
model_name = 'molformer'
batch_size =10

def set_seeds(seed):

    torch.manual_seed(seed)
    # torch.cuda.manual_seed(seed)
    # torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
def select_descriptors(dataset_name):
    if dataset_name=='sagar':
        return sagar_descriptors
    elif dataset_name=='keller':
        return keller_descriptors
    else:
        return None


# set_seeds(2025)
base_dir = '../../../T5 EVO/alignment_olfaction_datasets/curated_datasets/'
input_embeddings = f'embeddings/{model_name}/{dataset_name}_{model_name}_embeddings_13_Apr17.csv'

dataset = OdorMonoDataset(base_dir, input_embeddings, transform=None, grand_avg=False, descriptors=select_descriptors(dataset_name))
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True,drop_last=True)


    # del dataset


def geo_distance(data):
    #     truncated_matrix = torch.where(data_dist_matrix < min_dist, data_dist_matrix, torch.inf)
    #     data_dist_matrix = dijkstra(truncated_matrix.detach().cpu().numpy())
    #     data_dist_matrix = torch.FloatTensor(data_dist_matrix)
    #     data_dist_matrix = torch.where(data_dist_matrix == torch.inf, 1000 * torch.ones_like(data_dist_matrix), data_dist_matrix)

    data_nn_matrix = kneighbors_graph(data, 3, mode='distance', include_self=False)
    data_nn_matrix = data_nn_matrix.toarray()
    data_dist_matrix = dijkstra(data_nn_matrix)
    data_dist_matrix = torch.FloatTensor(data_dist_matrix)
    data_dist_matrix = torch.where(data_dist_matrix == torch.inf, 1000 * torch.ones_like(data_dist_matrix),data_dist_matrix)
    return data_dist_matrix


#IsoMap-style geodesic distance for data


# torch.manual_seed(42)
# torch.cuda.empty_cache()
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# print('Using device: ', device)

    


#model = MDS(data.shape[0], latent_dim, Poincare)
# print(len(dataset))
model = Isomap(len(dataset), latent_dim, Euclidean)

#optimizer = StandardOptim(model, lr=lr)
optimizer = StandardOptim(model, lr=lr)

if __name__ == "__main__":
    for i in range(num_epochs):
        total_loss=0
        if normalize:
            model.normalize()
        for idx, batch in data_loader:

            if geodesic:
                data_dist_matrix = geo_distance(batch)
            else:
                data_dist_matrix = dist_matrix(batch, Euclidean)

            optimizer.zero_grad()
            loss = model.loss_fun(data_dist_matrix,idx)
            loss.backward()
            optimizer.step(idx)
            total_loss += loss.item()



            # print('norms', vector_norm(model.embeddings, dim=-1).mean().item(), vector_norm(model.embeddings, dim=-1).max().item())


        if i % 10 == 0:
            print(f'Epoch {i}, loss: {total_loss/len(data_loader):.3f}')

    scatterplot_2d(model.embeddings.detach().cpu().numpy(), labels=None, title='Poincare Embeddings')



    size1 = (0.3, 0.28)
    size2 = (0.6, 1)
    size3 = (1, 0.35)

    plt.rcParams["font.size"] = 35
    df_gslf_mols = prepare_goodscentleffignwell_mols(base_dir)
    pom_frame(np.asarray(model.embeddings.detach().cpu().numpy().values.tolist()),
              np.asarray(df_gslf_mols.y.values.tolist()), "/kaggle/working/", gs_lf_tasks, "molformer", size1, size2,
              size3)




In [1]:
from sklearn.neighbors import kneighbors_graph
from scipy.sparse.csgraph import dijkstra
from torch.utils.data import DataLoader
import random
from constants import *
from methods import *
from optimizers import * 
from utils.visulization import *
from OdorDataset import OdorMonoDataset
from utils.helpers import *
latent_dim = 2
lr = 0.1
num_epochs = 100000
normalize = False
geodesic = False
min_dist = 1.



dataset_name='gslf'
model_name = 'molformer'
batch_size =10

def set_seeds(seed):

    torch.manual_seed(seed)
    # torch.cuda.manual_seed(seed)
    # torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
def select_descriptors(dataset_name):
    if dataset_name=='sagar':
        return sagar_descriptors
    elif dataset_name=='keller':
        return keller_descriptors
    else:
        return None


# set_seeds(2025)
base_dir = '../../../T5 EVO/alignment_olfaction_datasets/curated_datasets/'
input_embeddings = f'embeddings/{model_name}/{dataset_name}_{model_name}_embeddings_13_Apr17.csv'

dataset = OdorMonoDataset(base_dir, None, transform=None, grand_avg=False, descriptors=select_descriptors(dataset_name))
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True,drop_last=True)


    # del dataset


def geo_distance(data):
    #     truncated_matrix = torch.where(data_dist_matrix < min_dist, data_dist_matrix, torch.inf)
    #     data_dist_matrix = dijkstra(truncated_matrix.detach().cpu().numpy())
    #     data_dist_matrix = torch.FloatTensor(data_dist_matrix)
    #     data_dist_matrix = torch.where(data_dist_matrix == torch.inf, 1000 * torch.ones_like(data_dist_matrix), data_dist_matrix)

    data_nn_matrix = kneighbors_graph(data, 3, mode='distance', include_self=False)
    data_nn_matrix = data_nn_matrix.toarray()
    data_dist_matrix = dijkstra(data_nn_matrix)
    data_dist_matrix = torch.FloatTensor(data_dist_matrix)
    data_dist_matrix = torch.where(data_dist_matrix == torch.inf, 1000 * torch.ones_like(data_dist_matrix),data_dist_matrix)
    return data_dist_matrix

def nn_g(data):
    data_nn_matrix = kneighbors_graph(data, 3, mode='distance', include_self=False)
    data_nn_matrix = data_nn_matrix.toarray()
    return data_nn_matrix

#IsoMap-style geodesic distance for data


# torch.manual_seed(42)
# torch.cuda.empty_cache()
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# print('Using device: ', device)

    


#model = MDS(data.shape[0], latent_dim, Poincare)
# print(len(dataset))
# model = Isomap(len(dataset), latent_dim, Euclidean)
# model = Contrastive(len(dataset), latent_dim, Euclidean)


model = Contrastive(len(dataset), latent_dim, Poincare)

#optimizer = StandardOptim(model, lr=lr)
optimizer = PoincareOptim(model, lr=lr)

if __name__ == "__main__":
    for i in range(num_epochs):
        total_loss=0

        for idx, batch in data_loader:
            if normalize:
                model.normalize()
            # if geodesic:
            #     data_dist_matrix = geo_distance(batch)
            # else:
            #     data_dist_matrix = dist_matrix(batch, Euclidean)
            data_nn_matrix = nn_g(batch)

            #binary matrix
            data_dist_matrix = (data_nn_matrix > 0).astype(int)

            optimizer.zero_grad()
            loss = model.loss_fun(data_dist_matrix,idx)
            loss.backward()
            optimizer.step(idx)
            total_loss += loss.item()



            # print('norms', vector_norm(model.embeddings, dim=-1).mean().item(), vector_norm(model.embeddings, dim=-1).max().item())


        if i % 10 == 0:
            print(f'Epoch {i}, loss: {total_loss/len(data_loader):.3f}')

    scatterplot_2d(model.embeddings.detach().cpu().numpy(), labels=None, title='Poincare Embeddings')



    size1 = (0.3, 0.28)
    size2 = (0.6, 1)
    size3 = (1, 0.35)

    plt.rcParams["font.size"] = 35
    df_gslf_mols = prepare_goodscentleffignwell_mols(base_dir)
    pom_frame(np.asarray(model.embeddings.detach().cpu().numpy().values.tolist()),
              np.asarray(df_gslf_mols.y.values.tolist()), "/kaggle/working/", gs_lf_tasks, "molformer", size1, size2,
              size3)




Epoch 0, loss: 289.714
Epoch 10, loss: 83.460
Epoch 20, loss: 48.773
Epoch 30, loss: 49.475
Epoch 40, loss: 137.450
Epoch 50, loss: 40.684
Epoch 60, loss: 95.005
Epoch 70, loss: 72.771
Epoch 80, loss: 106.791
Epoch 90, loss: 103.854
Epoch 100, loss: 107.954
Epoch 110, loss: 118.721
Epoch 120, loss: 76.412
Epoch 130, loss: 49.903
Epoch 140, loss: 126.539
Epoch 150, loss: 70.090
Epoch 160, loss: 37.535
Epoch 170, loss: 119.115
Epoch 180, loss: 70.411
Epoch 190, loss: 41.788
Epoch 200, loss: 68.861
Epoch 210, loss: 67.636
Epoch 220, loss: 32.539
Epoch 230, loss: 65.456
Epoch 240, loss: 106.144
Epoch 250, loss: 30.989
Epoch 260, loss: 37.801
Epoch 270, loss: 61.228
Epoch 280, loss: 60.713
Epoch 290, loss: 29.083
Epoch 300, loss: 73.192
Epoch 310, loss: 94.391
Epoch 320, loss: 28.307
Epoch 330, loss: 27.407
Epoch 340, loss: 55.901
Epoch 350, loss: 33.678
Epoch 360, loss: 53.649
Epoch 370, loss: 26.198
Epoch 380, loss: 84.631
Epoch 390, loss: 50.512
Epoch 400, loss: 80.305
Epoch 410, loss: 3

Epoch 3410, loss: 9.398
Epoch 3420, loss: 8.018
Epoch 3430, loss: 10.669
Epoch 3440, loss: 9.356
Epoch 3450, loss: 8.981
Epoch 3460, loss: 9.070
Epoch 3470, loss: 11.662
Epoch 3480, loss: 7.539
Epoch 3490, loss: 10.275
Epoch 3500, loss: 11.554
Epoch 3510, loss: 9.858
Epoch 3520, loss: 11.554
Epoch 3530, loss: 9.180
Epoch 3540, loss: 12.363
Epoch 3550, loss: 10.033
Epoch 3560, loss: 8.447
Epoch 3570, loss: 8.404
Epoch 3580, loss: 7.932
Epoch 3590, loss: 9.223
Epoch 3600, loss: 12.363
Epoch 3610, loss: 9.815
Epoch 3620, loss: 7.756
Epoch 3630, loss: 8.107
Epoch 3640, loss: 10.961
Epoch 3650, loss: 12.013
Epoch 3660, loss: 10.778
Epoch 3670, loss: 8.677
Epoch 3680, loss: 8.853
Epoch 3690, loss: 9.247
Epoch 3700, loss: 10.603
Epoch 3710, loss: 9.223
Epoch 3720, loss: 8.041
Epoch 3730, loss: 10.736
Epoch 3740, loss: 7.363
Epoch 3750, loss: 8.084
Epoch 3760, loss: 8.622
Epoch 3770, loss: 8.018
Epoch 3780, loss: 9.659
Epoch 3790, loss: 7.932
Epoch 3800, loss: 9.924
Epoch 3810, loss: 10.603
Ep

Epoch 6940, loss: 11.445
Epoch 6950, loss: 7.932
Epoch 6960, loss: 8.875
Epoch 6970, loss: 8.864
Epoch 6980, loss: 10.494
Epoch 6990, loss: 7.297
Epoch 7000, loss: 9.640
Epoch 7010, loss: 8.349
Epoch 7020, loss: 8.151
Epoch 7030, loss: 10.208
Epoch 7040, loss: 9.070
Epoch 7050, loss: 9.247
Epoch 7060, loss: 9.968
Epoch 7070, loss: 8.938
Epoch 7080, loss: 8.174
Epoch 7090, loss: 8.151
Epoch 7100, loss: 8.127
Epoch 7110, loss: 9.398
Epoch 7120, loss: 7.998
Epoch 7130, loss: 7.998
Epoch 7140, loss: 8.349
Epoch 7150, loss: 11.904
Epoch 7160, loss: 8.240
Epoch 7170, loss: 12.255
Epoch 7180, loss: 9.815
Epoch 7190, loss: 8.349
Epoch 7200, loss: 10.185
Epoch 7210, loss: 9.659
Epoch 7220, loss: 8.127
Epoch 7230, loss: 8.696
Epoch 7240, loss: 8.502
Epoch 7250, loss: 7.823
Epoch 7260, loss: 7.890
Epoch 7270, loss: 7.823
Epoch 7280, loss: 7.823
Epoch 7290, loss: 7.055
Epoch 7300, loss: 9.815
Epoch 7310, loss: 9.398
Epoch 7320, loss: 10.294
Epoch 7330, loss: 8.689
Epoch 7340, loss: 10.736
Epoch 73

Epoch 10320, loss: 8.107
Epoch 10330, loss: 9.223
Epoch 10340, loss: 8.633
Epoch 10350, loss: 8.731
Epoch 10360, loss: 9.356
Epoch 10370, loss: 7.890
Epoch 10380, loss: 12.013
Epoch 10390, loss: 8.349
Epoch 10400, loss: 11.554
Epoch 10410, loss: 7.012
Epoch 10420, loss: 8.458
Epoch 10430, loss: 9.114
Epoch 10440, loss: 8.853
Epoch 10450, loss: 8.513
Epoch 10460, loss: 7.055
Epoch 10470, loss: 7.998
Epoch 10480, loss: 11.270
Epoch 10490, loss: 7.363
Epoch 10500, loss: 9.247
Epoch 10510, loss: 8.938
Epoch 10520, loss: 9.440
Epoch 10530, loss: 8.271
Epoch 10540, loss: 9.815
Epoch 10550, loss: 11.312
Epoch 10560, loss: 9.598
Epoch 10570, loss: 8.041
Epoch 10580, loss: 7.648
Epoch 10590, loss: 10.887
Epoch 10600, loss: 7.998
Epoch 10610, loss: 7.678
Epoch 10620, loss: 10.294
Epoch 10630, loss: 7.823
Epoch 10640, loss: 7.998
Epoch 10650, loss: 9.398
Epoch 10660, loss: 7.823
Epoch 10670, loss: 11.554
Epoch 10680, loss: 8.349
Epoch 10690, loss: 8.513
Epoch 10700, loss: 10.252
Epoch 10710, loss

Epoch 13630, loss: 7.998
Epoch 13640, loss: 8.525
Epoch 13650, loss: 8.174
Epoch 13660, loss: 9.682
Epoch 13670, loss: 7.812
Epoch 13680, loss: 8.283
Epoch 13690, loss: 7.363
Epoch 13700, loss: 8.458
Epoch 13710, loss: 9.398
Epoch 13720, loss: 9.331
Epoch 13730, loss: 10.210
Epoch 13740, loss: 9.070
Epoch 13750, loss: 7.823
Epoch 13760, loss: 11.554
Epoch 13770, loss: 7.998
Epoch 13780, loss: 8.447
Epoch 13790, loss: 9.598
Epoch 13800, loss: 9.616
Epoch 13810, loss: 9.247
Epoch 13820, loss: 8.786
Epoch 13830, loss: 8.696
Epoch 13840, loss: 8.786
Epoch 13850, loss: 12.363
Epoch 13860, loss: 7.920
Epoch 13870, loss: 8.696
Epoch 13880, loss: 10.057
Epoch 13890, loss: 8.544
Epoch 13900, loss: 10.185
Epoch 13910, loss: 7.920
Epoch 13920, loss: 12.363
Epoch 13930, loss: 7.998
Epoch 13940, loss: 11.312
Epoch 13950, loss: 7.932
Epoch 13960, loss: 8.283
Epoch 13970, loss: 9.968
Epoch 13980, loss: 9.598
Epoch 13990, loss: 9.815
Epoch 14000, loss: 9.356
Epoch 14010, loss: 11.203
Epoch 14020, loss

KeyboardInterrupt: 

In [3]:
import numpy as np

def hasone(node_index, dim_index):
    bin_i, bin_j = np.binary_repr(node_index), np.binary_repr(dim_index)
    length = len(bin_j)
    return (bin_i[:length] == bin_j) * 1

def get_data(depth, dtype=np.float32):
    n = 2**depth - 1
    x = np.fromfunction(lambda i, j: np.vectorize(hasone)(i + 1, j + 1),
                        (n, n), dtype=np.int32).astype(dtype)
    return x

#Load the data
depth = 11
binary_tree = get_data(depth)