In [None]:
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader


df = pd.read_csv("../files/atchley.csv")

# PyTorch Dataset
class AminoAcidDataset(Dataset):
    def __init__(self, dataframe):
        self.dataframe = dataframe
        self.features = dataframe.iloc[:, 1:].values  # Atchley features
        self.aa_to_features_dict = self.dataframe.set_index('amino.acid').T.to_dict('list')
    
    def __len__(self):
        return len(self.dataframe)
    
    def __getitem__(self, aa):
        if aa in self.aa_to_features_dict:
            return self.aa_to_features_dict[aa]
        else:
            raise KeyError(f"Amino acid '{aa}' not found in dataset.")


# Create Dataset and DataLoader
dataset = AminoAcidDataset(df)
dataloader = DataLoader(dataset, batch_size=2, shuffle=True)


In [50]:
dataset["B"]

KeyError: 'B'

In [4]:
import geoopt
import torch.nn as nn
# import geoopt.manifolds.poincare.math as pmath

class HyperbolicNN(nn.Module):
    def __init__(self, input_dim, embedding_dim):
        super(HyperbolicNN, self).__init__()
        self.fc = nn.Linear(input_dim, embedding_dim)
        self.manifold = geoopt.PoincareBall()  # Poincaré ball for hyperbolic geometry
    
    def forward(self, x):
        x = self.fc(x)
        x = self.manifold.expmap0(x)  # Exponential map from Euclidean to hyperbolic space
        return x

# Initialize the model
input_dim = 5  # Number of Atchley features
embedding_dim = 2  # Dimension of hyperbolic embedding
model = HyperbolicNN(input_dim, embedding_dim)


In [5]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_fn = nn.MSELoss()

# Dummy target embeddings (you can modify this according to your task)
targets = torch.tensor([[0.1, 0.2], [-0.1, -0.2], [0.3, 0.4], [-0.3, -0.4]], dtype=torch.float32)

for epoch in range(10):
    for _, features in dataloader:
        optimizer.zero_grad()
        
        # Forward pass: features to hyperbolic space
        embeddings = model(features)
        
        # Compute loss (using dummy targets here)
        loss = loss_fn(embeddings, targets[:embeddings.size(0)])  # Adjust target size
        
        # Backward pass
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch+1}, Loss: {loss.item()}")


Epoch 1, Loss: 0.28650596737861633
Epoch 2, Loss: 0.11376485973596573
Epoch 3, Loss: 0.06419456750154495
Epoch 4, Loss: 0.04249171167612076
Epoch 5, Loss: 0.042479388415813446
Epoch 6, Loss: 0.040920890867710114
Epoch 7, Loss: 0.030948229134082794
Epoch 8, Loss: 0.023223459720611572
Epoch 9, Loss: 0.042069703340530396
Epoch 10, Loss: 0.015929531306028366


In [None]:

# Example of calculating distances between amino acid embeddings
with torch.no_grad():
    for _, features in dataloader:
        embeddings = model(features)
        print("Hyperbolic Embeddings:", embeddings)

        # Calculate distances between embeddings
        for i in range(embeddings.size(0)):
            for j in range(i + 1, embeddings.size(0)):
                # Calculate the distance directly with the manifold object (no need for 'pmath')
                dist = model.manifold.dist(embeddings[i], embeddings[j])
                print(f"Distance between amino acid {i} and {j}: {dist.item()}")


In [23]:
dataset["A"]

AttributeError: 'AminoAcidDataset' object has no attribute 'features_dict'

In [None]:
import torch

# Amino acid sequences
sequence1 = "AAA"
sequence2 = "AAC"

# Convert sequences to Atchley factors
sequence1_features = torch.tensor([atchley_factors[aa] for aa in sequence1], dtype=torch.float32)
sequence2_features = torch.tensor([atchley_factors[aa] for aa in sequence2], dtype=torch.float32)


In [4]:
from collections import defaultdict

def make_symmetric_neighbors(neighbors):
    combined = defaultdict(set, neighbors)
    for key, values in neighbors.items():
        for seq, value in values:
            combined[seq].add((key, value))  # Add the inverted edge
    return combined

In [5]:
d = {
    "AAA":{("BBB", 3), ("CAC", 2)},
    "BBB":{("CAC", 3), ("DDD", 3)},
    "EEE":{("FFF", 3)}

}
make_symmetric_neighbors(d)

defaultdict(set,
            {'AAA': {('BBB', 3), ('CAC', 2)},
             'BBB': {('AAA', 3), ('CAC', 3), ('DDD', 3)},
             'EEE': {('FFF', 3)},
             'CAC': {('AAA', 2), ('BBB', 3)},
             'DDD': {('BBB', 3)},
             'FFF': {('EEE', 3)}})