### **ResNet50 Demo**

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import logging
import matplotlib.pyplot as plt
from torchvision.transforms.functional import to_pil_image, to_tensor
from  torchvision import transforms
from pkg import c, m, f

In [2]:
"""Instances"""
paths = c.PathManager()
dataset = c.PeakImageDataset(paths=paths, transform=None, augment=True)
prep = c.DataPreparation(paths=paths, batch_size=5)
p = c.PeakThresholdProcessor(threshold_value=100)

"""Clean sim/ directory"""
paths.clean_sim() # moves all .err, .out, .sh files sim_specs

"""Checks"""
peak_paths = paths.__get_peak_images_paths__()
water_paths = paths.__get_water_images_paths__()
print('Number of Peak Images: ', len(peak_paths), 'Number of Water Images', len(water_paths))

print("Peak images path:", paths.peak_images_dir)
print("Water images path:", paths.water_images_dir)

clean_sim did not move any files


Number of Peak Images:  54 Number of Water Images 54
Peak images path: /Users/adamkurth/Documents/vscode/CXFEL/cxls_hitfinder/images/peaks
Water images path: /Users/adamkurth/Documents/vscode/CXFEL/cxls_hitfinder/images/data


In [3]:
"""Train/Test Data Loaders"""
train_loader, test_loader = prep.prep_data()

"""Protein Mapping"""
protein_to_idx = {
    '1IC6': 0,
    # To be developed
}

"""Models"""
model_res50 = m.CustomResNet50(num_proteins=3, num_camlengths=3, heatmap_size=(2163,2069))

"""Loss/Optimizer"""
criterion_protein = criterion_camlength = torch.nn.CrossEntropyLoss()
criterion_peak = torch.nn.MSELoss() # for heatmap prediction, MSE is more appropriate
optimizer = torch.optim.Adam(model_res50.parameters(), lr=0.001)

print("Criterion: ", criterion_protein, criterion_camlength)
print("Optimizer: \n", optimizer)
print("Learning rate: ", optimizer.param_groups[0]['lr'])

"""Initial Setup"""
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
train_losses = []
test_losses = []

Data prepared.
Train size: 43
Test size: 11
Batch size: 5
Number of batches: 9 


Criterion:  CrossEntropyLoss() CrossEntropyLoss()
Optimizer: 
 Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.001
    maximize: False
    weight_decay: 0
)
Learning rate:  0.001


In [22]:
import numpy as np
def generate_heatmaps(batch_images, processor):
    batch_heatmaps = []
    for image_tensor in batch_images:
        peak_coords = processor._process_image(image_tensor)  # Process each image
        heatmap = np.zeros(image_tensor.squeeze().shape)
        for y, x in peak_coords:
            heatmap[y, x] = 1  # Set peak positions to 1
        heatmap_tensor = torch.tensor(heatmap).unsqueeze(0)  # Convert to tensor and adjust shape
        batch_heatmaps.append(heatmap_tensor)
    return torch.stack(batch_heatmaps) # return batch of heatmaps

In [5]:
"""Training"""
logging.info('Staring training...')
num_epochs = 1

for epoch in range(num_epochs):
    model_res50.train()
    running_loss = 0.0
    batch_counter = 0
    
    for batch_index, ((peak_images, water_images), labels) in enumerate(train_loader, start=1):
        if batch_index == 1:
            logging.info(f"Epoch {epoch+1}/{num_epochs} - First batch label structure: {labels[0]} with type {type(labels[0])}")
        
        for label in labels:
            print(f"Label structure: {label} {type(label)}\n\n")
            print(labels) # gives tuple of 3 tensors
        
        # Extract the protein identifiers assuming they are always the first element in the label tuple
        protein_identifiers = labels[0] # gives tuple ('1IC6', '1IC6', '1IC6', '1IC6', '1IC6')

        try:            
            # generate heatmaps for the batch
            # label_heatmaps = prep.generate_heatmaps(batch_images=peak_images, processor=p)
            label_heatmaps = prep.generate_heatmaps(batch_images=peak_images, processor=p)
            label_heatmaps = label_heatmaps.to(peak_images.device) # ensure the heatmap tensor is on the correct device
            
            # protein/camlen 
            label_protein = torch.tensor([protein_to_idx[label] for label in labels[0]], dtype=torch.long).to(peak_images.device)
            label_cam_len = labels[2].to(dtype=torch.long)
            
        except KeyError as e:
            logging.error(f"KeyError with label: {e}")
            print(f"KeyError with label: {e}")
            print(labels[:5])
            continue

        optimizer.zero_grad()

        # multi-task learning: predicting protein and camlength
        protein_pred, camlen_pred, peak_heatmap_pred = model_res50((peak_images, water_images))
        
        # losses
        protein_loss = criterion_protein(protein_pred, label_protein)
        camlength_loss = criterion_camlength(camlen_pred, label_cam_len)
        peak_heatmap_loss = criterion_peak(peak_heatmap_pred, label_heatmaps)
        
        # compute total loss  
        total_loss = protein_loss + camlength_loss + peak_heatmap_loss
        total_loss.backward()
        optimizer.step()
        running_loss += total_loss.item()
        batch_counter += 1

        if (batch_index + 1) % 10 == 0:  # Log every 10 batches
            logging.info(f'Epoch {epoch+1}, Batch {batch_index + 1}: Loss = {running_loss/(batch_index+1)}')

    avg_loss_train = running_loss / len(train_loader)
    train_losses.append(avg_loss_train)
    logging.info(f'Epoch {epoch+1} Training Completed. Avg Loss: {avg_loss_train:.4f}')

2024-03-11 17:40:20,567 - INFO - Staring training...


2024-03-11 17:40:21,151 - INFO - Epoch 1/1 - First batch label structure: ('1IC6', '1IC6', '1IC6', '1IC6', '1IC6') with type <class 'tuple'>


Label structure: ('1IC6', '1IC6', '1IC6', '1IC6', '1IC6') <class 'tuple'>


[('1IC6', '1IC6', '1IC6', '1IC6', '1IC6'), tensor([0.0100, 0.0100, 0.0100, 0.0100, 0.0100], dtype=torch.float64), tensor([0, 0, 0, 0, 0])]
Label structure: tensor([0.0100, 0.0100, 0.0100, 0.0100, 0.0100], dtype=torch.float64) <class 'torch.Tensor'>


[('1IC6', '1IC6', '1IC6', '1IC6', '1IC6'), tensor([0.0100, 0.0100, 0.0100, 0.0100, 0.0100], dtype=torch.float64), tensor([0, 0, 0, 0, 0])]
Label structure: tensor([0, 0, 0, 0, 0]) <class 'torch.Tensor'>


[('1IC6', '1IC6', '1IC6', '1IC6', '1IC6'), tensor([0.0100, 0.0100, 0.0100, 0.0100, 0.0100], dtype=torch.float64), tensor([0, 0, 0, 0, 0])]


  return F.mse_loss(input, target, reduction=self.reduction)


RuntimeError: Found dtype Double but expected Float

In [None]:
"""Testing"""
logging.info('Starting testing...')
model_res50.eval()
test_loss = 0 
correct_protein = 0
correct_camlen = 0
total = 0

with torch.no_grad():
    for (peak_images, water_images), labels in test_loader:
        # labels
        labels_protein = torch.tensor([protein_to_idx[protein] for protein in labels[0]], dtype=torch.long).to(peak_images.device)
        labels_camlen = labels[2].to(dtype=torch.long).to(peak_images.device)
        label_heatmaps = prep.generate_heatmaps(batch_images=peak_images, processor=p).to(peak_images.device)
        # model outputs
        protein_pred, camlength_pred, peak_heatmap_pred = model_res50((peak_images, water_images))
        # losses
        loss_protein = criterion_protein(protein_pred, labels_protein)
        loss_camlen = criterion_camlength(camlength_pred, labels_camlen)
        loss_peak = criterion_peak(peak_heatmap_pred, label_heatmaps)
        
        # adjust
        test_loss = loss_protein + loss_camlen # camlen and protein
        total_peak_loss = loss_peak.item() # peaks 
        
        # predictions
        _, predicted_protein = torch.max(protein_pred, 1)
        _, predicted_camlen = torch.max(camlength_pred, 1)
        # _, predicted_peak = torch.max(peak_heatmap_pred, 1)
        
        # calculate accuracy
        correct_protein += (predicted_protein == labels_protein).sum().item()
        correct_camlen += (predicted_camlen == labels_camlen).sum().item()
        total += labels_protein.size(0)
    
    avg_test_loss = test_loss / len(test_loader)
    test_losses.append(avg_test_loss)
    protein_accuracy = correct_protein / total
    camlength_accuracy = correct_camlen / total

    logging.info(f"Test Loss: {avg_test_loss:.4f}, Protein Accuracy: {protein_accuracy:.4f}, Camera Length Accuracy: {camlength_accuracy:.4f}")
    
logging.info('Testing completed.')


Peak Image Shape: torch.Size([1, 2163, 2069]), Water Image Shape: torch.Size([1, 2163, 2069])
Protein: 1IC6, Camera Length: 0.01, Label Camera Length: 0
Peak Image Shape: torch.Size([1, 2163, 2069]), Water Image Shape: torch.Size([1, 2163, 2069])
Protein: 1IC6, Camera Length: 0.01, Label Camera Length: 0
Peak Image Shape: torch.Size([1, 2163, 2069]), Water Image Shape: torch.Size([1, 2163, 2069])
Protein: 1IC6, Camera Length: 0.01, Label Camera Length: 0
Peak Image Shape: torch.Size([1, 2163, 2069]), Water Image Shape: torch.Size([1, 2163, 2069])
Protein: 1IC6, Camera Length: 0.01, Label Camera Length: 0
Peak Image Shape: torch.Size([1, 2163, 2069]), Water Image Shape: torch.Size([1, 2163, 2069])
Protein: 1IC6, Camera Length: 0.01, Label Camera Length: 0
Peak Image Shape: torch.Size([1, 2163, 2069]), Water Image Shape: torch.Size([1, 2163, 2069])
Protein: 1IC6, Camera Length: 0.01, Label Camera Length: 0


2024-03-05 11:49:12,816 - INFO - Test Loss: 0.0000, Protein Accuracy: 1.0000, Camera Length Accuracy: 1.0000
2024-03-05 11:49:12,817 - INFO - Training completed.


In [None]:
"""Plotting"""
plt.figure(figsize=(10, 5))
plt.plot(range(1, num_epochs+1), train_losses, label='Training Loss')
plt.plot(range(1, num_epochs+1), test_losses, label='Testing Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training vs Testing Loss')
plt.legend()
plt.show()

NameError: name 'train_losses' is not defined

<Figure size 1000x500 with 0 Axes>