In [1]:
pip install torch
pip install torchviz
pip install torchinfo

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [4]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchinfo import summary
import math
import numpy as np
import pandas as pd

In [5]:
def load_csv_data(file_path, **kwargs):
    """
    Load data from a CSV file into a pandas DataFrame.
    
    Parameters:
    file_path (str): Path to the CSV file
    **kwargs: Additional arguments to pass to pd.read_csv()
    
    Returns:
    pandas.DataFrame: The loaded data
    """
    try:
        df = pd.read_csv(file_path, **kwargs)
        print(f"Successfully loaded {len(df)} rows and {len(df.columns)} columns")
        return df
    except Exception as e:
        print(f"Error loading file: {str(e)}")
        return None

class CSVDataset(Dataset):
    """
    Custom Dataset for loading CSV data into PyTorch.
    """
    def __init__(self, file_path, target_column, feature_columns=None, 
                 transform=None, target_transform=None):
        """
        Args:
            file_path (str): Path to the CSV file
            target_column (str): Name of the target column
            feature_columns (list): List of feature column names. If None, uses all columns except target
            transform (callable, optional): Transform to apply to features
            target_transform (callable, optional): Transform to apply to target
        """
        self.data = load_csv_data(file_path)
        if self.data is None:
            raise ValueError("Failed to load data")
            
        # Handle feature columns
        if feature_columns is None:
            self.feature_columns = [col for col in self.data.columns 
                                  if col != target_column]
        else:
            self.feature_columns = feature_columns
            
        self.target_column = target_column
        self.transform = transform
        self.target_transform = target_transform
        
        # Convert data to float32 for PyTorch compatibility
        self.X = self.data[self.feature_columns].astype('float32').values
        self.y = self.data[self.target_column].astype('float32').values
        
        # Convert to tensors
        self.X = torch.FloatTensor(self.X)
        self.y = torch.FloatTensor(self.y)
        
        print(f"Features shape: {self.X.shape}")
        print(f"Target shape: {self.y.shape}")

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        features = self.X[idx]
        target = self.y[idx]
        
        if self.transform:
            features = self.transform(features)
        if self.target_transform:
            target = self.target_transform(target)
            
        return features, target

def create_data_loaders(dataset, batch_size=32, train_split=0.8, 
                       shuffle=True, num_workers=4):
    """
    Create train and validation DataLoaders from a dataset.
    
    Args:
        dataset (Dataset): The full dataset
        batch_size (int): Size of each batch
        train_split (float): Proportion of data to use for training
        shuffle (bool): Whether to shuffle the data
        num_workers (int): Number of worker processes for data loading
        
    Returns:
        tuple: (train_loader, val_loader)
    """
    # Calculate split indices
    train_size = int(train_split * len(dataset))
    val_size = len(dataset) - train_size
    
    # Split the dataset
    train_dataset, val_dataset = torch.utils.data.random_split(
        dataset, [train_size, val_size])
    
    # Create data loaders
    train_loader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=shuffle,
        num_workers=num_workers
    )
    
    val_loader = DataLoader(
        val_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers
    )
    
    return train_loader, val_loader

In [8]:
# Create a dataset
dataset = CSVDataset(
    file_path='all_compounds.csv',
    target_column='smiles',
)

# Create train and validation data loaders
train_loader, val_loader = create_data_loaders(
    dataset,
    batch_size=32,
    train_split=0.8
)

# Use in training loop
for epoch in range(num_epochs):
    for batch_features, batch_targets in train_loader:
        # Your training code here
        pass
print(dataset)

Error loading file: Error tokenizing data. C error: Expected 4 fields in line 199553, saw 63



ValueError: Failed to load data

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = 
model = model.to(device)

In [5]:
# Model parameters
d_model = 1024   # Embedding dimension
nhead = 8       # Number of attention heads
n_enc = 4 
n_dec = 8
batch_size = 32
seq_length = 512

# Create transformer
transformer = nn.Transformer(
    d_model=d_model,
    nhead=nhead,
    num_encoder_layers=n_enc,
    num_decoder_layers=n_dec,
    batch_first=True,  # Use (batch, seq, feature) format
    norm_first=True
).to(torch.bfloat16)

# Create random input tensors
src = torch.rand(batch_size, seq_length, d_model).to(torch.bfloat16)
tgt = torch.rand(batch_size, seq_length, d_model).to(torch.bfloat16)

# Create attention masks
tgt_mask = torch.triu(torch.ones(seq_length, seq_length) * float('-inf'), diagonal=1)

"""
# Forward pass
output = transformer(
    src,
    tgt,
    tgt_mask=tgt_mask
)

print(f"Input shape: {src.shape}")
print(f"Output shape: {output.shape}")

# Check a sample of the output
print("\nSample output values:")
print(output[0, 0, :10])  # First 10 values of first sequence in batch
"""
#print(transformer)
summary(transformer,
        input_size=[(batch_size, seq_length, d_model), (batch_size, seq_length, d_model)],
        dtypes=[torch.bfloat16, torch.bfloat16])

  validate_user_params(
  validate_user_params(


KeyboardInterrupt: 

In [20]:
class swish(nn.Module):
    def __init__(self, init_beta=1.0, trainable=False):
        super().__init__()
        b = torch.tensor(data=init_beta, dtype=torch.float32)
        self.beta = torch.nn.Parameter(b, requires_grad=trainable)
        
    def forward(self, x):
        return x * torch.sigmoid(self.beta * x)

class MultiHeadAttention(nn.Module):
    def __init__(self, d_model, num_heads):
        super().__init__()
        self.d_model = d_model
        self.num_heads = num_heads
        self.head_dim = d_model // num_heads
        
        self.q_linear = nn.Linear(d_model, d_model)
        self.k_linear = nn.Linear(d_model, d_model)
        self.v_linear = nn.Linear(d_model, d_model)
        self.out = nn.Linear(d_model, d_model)
        
        self.activation = nn.Softmax()
        
    def forward(self, q, k, v, mask=None):
        batch_size = q.size(0)
        
        q = self.activation(self.q_linear(q)).view(batch_size, -1, self.num_heads, self.head_dim).transpose(1, 2)
        k = self.activation(self.k_linear(k)).view(batch_size, -1, self.num_heads, self.head_dim).transpose(1, 2)
        v = self.activation(self.v_linear(v)).view(batch_size, -1, self.num_heads, self.head_dim).transpose(1, 2)
        
        scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.head_dim)
        
        if mask is not None:
            scores = scores.masked_fill(mask == 0, float('-inf'))
        
        attention = torch.softmax(scores, dim=-1)
        out = torch.matmul(attention, v)
        out = out.transpose(1, 2).contiguous().view(batch_size, -1, self.d_model)
        return self.activation(self.out(out))

class PositionwiseFeedforward(nn.Module):
    def __init__(self, d_model, d_ff):
        super().__init__()
        self.linear1 = nn.Linear(d_model, d_ff)
        self.linear2 = nn.Linear(d_ff, d_model)
        self.activation = swish()
        
    def forward(self, x):
        return self.linear2(self.activation(self.linear1(x)))

class EncoderLayer(nn.Module):
    def __init__(self, d_model, num_heads, d_ff, dropout=0.1):
        super().__init__()
        self.self_attn = MultiHeadAttention(d_model, num_heads)
        self.feed_forward = PositionwiseFeedforward(d_model, d_ff)
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, x, mask=None):
        attn_output = self.self_attn(x, x, x, mask)
        x = self.norm1(x + self.dropout(attn_output))
        ff_output = self.feed_forward(x)
        x = self.norm2(x + self.dropout(ff_output))
        return x

class DecoderLayer(nn.Module):
    def __init__(self, d_model, num_heads, d_ff, dropout=0.1):
        super().__init__()
        self.self_attn = MultiHeadAttention(d_model, num_heads)
        self.cross_attn = MultiHeadAttention(d_model, num_heads)
        self.feed_forward = PositionwiseFeedforward(d_model, d_ff)
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.norm3 = nn.LayerNorm(d_model)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, x, enc_output, src_mask=None, tgt_mask=None):
        attn_output = self.self_attn(x, x, x, tgt_mask)
        x = self.norm1(x + self.dropout(attn_output))
        
        attn_output = self.cross_attn(x, enc_output, enc_output, src_mask)
        x = self.norm2(x + self.dropout(attn_output))
        
        ff_output = self.feed_forward(x)
        x = self.norm3(x + self.dropout(ff_output))
        return x

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_seq_length=5000):
        super().__init__()
        pe = torch.zeros(max_seq_length, d_model)
        position = torch.arange(0, max_seq_length, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)
        
    def forward(self, x):
        return x + self.pe[:, :x.size(1)]

class transformer(nn.Module):
    def __init__(self, src_vocab_size, tgt_vocab_size, d_model=512, num_heads=8, num_layers=6, d_ff=2048, max_seq_length=5000, dropout=0.1):
        super().__init__()
        self.encoder_embedding = nn.Embedding(src_vocab_size, d_model)
        self.decoder_embedding = nn.Embedding(tgt_vocab_size, d_model)
        self.positional_encoding = PositionalEncoding(d_model, max_seq_length)
        
        self.encoder_layers = nn.ModuleList([EncoderLayer(d_model, num_heads, d_ff, dropout) for _ in range(num_layers)])
        self.decoder_layers = nn.ModuleList([DecoderLayer(d_model, num_heads, d_ff, dropout) for _ in range(num_layers)])
        
        self.output_linear = nn.Linear(d_model, tgt_vocab_size)
        self.dropout = nn.Dropout(dropout)
        self.activation = swish()
        
    def encode(self, src, src_mask=None):
        src = self.dropout(self.positional_encoding(self.encoder_embedding(src)))
        for layer in self.encoder_layers:
            src = layer(src, src_mask)
        return src
    
    def decode(self, tgt, memory, src_mask=None, tgt_mask=None):
        tgt = self.dropout(self.positional_encoding(self.decoder_embedding(tgt)))
        for layer in self.decoder_layers:
            tgt = layer(tgt, memory, src_mask, tgt_mask)
        return tgt
    
    def forward(self, src, tgt, src_mask=None, tgt_mask=None):
        encoder_output = self.encode(src, src_mask)
        decoder_output = self.decode(tgt, encoder_output, src_mask, tgt_mask)
        output = self.activation(self.output_linear(decoder_output))
        return output

def create_mask(seq_len):
    mask = torch.triu(torch.ones(seq_len, seq_len), diagonal=1).bool()
    return mask.unsqueeze(0)

In [28]:
def create_mask(seq_len):
    mask = torch.triu(torch.ones(seq_len, seq_len), diagonal=1).bool()
    return mask.unsqueeze(0)
create_mask(5)

tensor([[[False,  True,  True,  True,  True],
         [False, False,  True,  True,  True],
         [False, False, False,  True,  True],
         [False, False, False, False,  True],
         [False, False, False, False, False]]])

In [21]:
model = SwishTransformer(
    src_vocab_size=10000,
    tgt_vocab_size=10000,
    d_model=512,
    num_heads=8
)

src = torch.randint(1, 10000, (32, 20))
tgt = torch.randint(1, 10000, (32, 20))
tgt_mask = create_mask(tgt.size(1))

output = model(src, tgt, tgt_mask=tgt_mask)

  return self._call_impl(*args, **kwargs)


In [9]:
from torchviz import make_dot
import numpy as np

# Assuming you have a PyTorch model (e.g., 'model') and some input data (e.g., 'x')
y = transformer_model(np.random(512))

# Generate a model architecture visualization
make_dot(y.mean(), params=dict(model.named_parameters())).render("model_arch", format="png")

TypeError: 'module' object is not callable