In [None]:
import timm
import torch
from torch.utils.tensorboard import SummaryWriter
# from torchvision import datasets
# from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
# from tensorflow.keras.models import Model
# from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Input
import tensorflow_datasets as tfds
import pandas as pd
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import f1_score


In [29]:
training_solutions = pd.read_csv('/Users/ruxi/Desktop/School/MY1/S2/DLiP/A3/data/training_solutions_rev1.csv', dtype=str)
image_dir = '//Users/ruxi/Desktop/School/MY1/S2/DLiP/A3/data/images_training_rev1'

# we ant to create a link between the image filenames and the actual labels 
# convert data to string for ease of doing that by adding jpg at the end of GalaxyID
training_solutions['filename'] = training_solutions['GalaxyID'].apply(lambda x: x + '.jpg')
training_labels = training_solutions.columns[1:-1].tolist()
training_solutions[training_labels] = training_solutions[training_labels].astype(float)

In [39]:
training_solutions.head

<bound method NDFrame.head of       GalaxyID  Class1.1  Class1.2  Class1.3  Class2.1  Class2.2  Class3.1  \
0       100008  0.383147  0.616853  0.000000  0.000000  0.616853  0.038452   
1       100023  0.327001  0.663777  0.009222  0.031178  0.632599  0.467370   
2       100053  0.765717  0.177352  0.056931  0.000000  0.177352  0.000000   
3       100078  0.693377  0.238564  0.068059  0.000000  0.238564  0.109493   
4       100090  0.933839  0.000000  0.066161  0.000000  0.000000  0.000000   
...        ...       ...       ...       ...       ...       ...       ...   
61573   999948  0.510379  0.489621  0.000000  0.059207  0.430414  0.000000   
61574   999950  0.901216  0.098784  0.000000  0.000000  0.098784  0.000000   
61575   999958  0.202841  0.777376  0.019783  0.116962  0.660414  0.067245   
61576   999964  0.091000  0.909000  0.000000  0.045450  0.863550  0.022452   
61577   999967  0.767000  0.140000  0.093000  0.000000  0.140000  0.000000   

       Class3.2  Class4.1  Class4

In [41]:
batch_size = 32
pixels = 224
channels = 3
num_epochs = 2

In [31]:
datagen = ImageDataGenerator(rescale=1./255., validation_split=0.25)

train_generator = datagen.flow_from_dataframe(
    dataframe=training_solutions,
    directory=image_dir,
    x_col='filename',
    y_col=training_labels,  
    subset='training',
    batch_size=batch_size,
    seed=42,
    shuffle=True,
    class_mode='raw',  #data is not one-hot encoded! it's probabilities of having a feature or not
    target_size=(pixels, pixels)  # resizes the image to fit model input whihc is 224x224 for levit 128s
)

validation_generator = datagen.flow_from_dataframe(
    dataframe=training_solutions,
    directory=image_dir,
    x_col='filename',
    y_col=training_labels,
    subset='validation',
    batch_size=batch_size,
    seed=42,
    shuffle=True,
    class_mode='raw',
    target_size=(pixels, pixels)
)

# a batch from this will be:
# input images (x): np.array (batch_size, 224, 224, 3)
# labels (y): np.array (batch_size, 37)

Found 46184 validated image filenames.
Found 15394 validated image filenames.


In [32]:

# we ARE working with torch because this is what LeViT was built on, but we are NOOB so we change the data to match expectations:

def convert_channels(generator):
    """
    converts the images from channel last to channel first because this is what pytorch expects
    """

    while True:
        x, y = next(generator)
        x = np.transpose(x, (0, 3, 1, 2))
        yield x, y

In [33]:
train_generator_conv = convert_channels(train_generator)
validation_generator_conv = convert_channels(validation_generator)

In [34]:
pretrained_model = timm.create_model('levit_128s', pretrained=True, num_classes=37)

for param in pretrained_model.parameters():
    param.requires_grad = False

for param in pretrained_model.head.parameters():
    param.requires_grad = True    

for param in pretrained_model.head_dist.parameters():
    param.requires_grad = True  

# pretrained_model.eval()

In [35]:
optimizer = torch.optim.Adam([
    {'params': pretrained_model.head.linear.parameters()},
    {'params': pretrained_model.head_dist.linear.parameters()}
], lr=1e-3)

loss_fn = torch.nn.BCEWithLogitsLoss()

In [36]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
experiment_number = "1"
pretrained_model.eval() # this is supposed to ensure that the dropout and batch renormalization do not get altered
log_dir = f"runs/experiment{experiment_number}"
model_weights_path = f"weights/model_weights_experiment{experiment_number}.pth"
writer = SummaryWriter(log_dir)

for epoch in range(num_epochs):
    epoch_train_loss = 0.0
    correct_train = 0
    total_train = 0
    steps_per_epoch = len(train_generator)

    for step in range(steps_per_epoch):
        x_batch, y_batch = next(train_generator_conv)
        x_tensor = torch.tensor(x_batch).float().to(device)
        y_tensor = torch.tensor(y_batch).float().to(device)
    
        # Now use x_tensor and y_tensor in your training step
        optimizer.zero_grad()
        outputs = pretrained_model(x_tensor)
        loss = loss_fn(outputs, y_tensor)
        loss.backward()
        optimizer.step()

        epoch_train_loss += loss.item()

        predicted = (torch.sigmoid(outputs) > 0.5).float()  # Threshold at 0.5

        print(f"Step {step}:")
        print(f"  x_batch shape: {x_batch.shape}")
        print(f"  y_batch shape: {y_batch.shape}")
        print(f"  Model output shape: {outputs.shape}")
        print(f"  Predicted shape: {predicted.shape}")
        print(f"  y_tensor shape: {y_tensor.shape}")
        correct_train += (predicted == y_tensor).sum().item()
        total_train +=y_tensor.numel()


    avg_train_loss = epoch_train_loss / steps_per_epoch
    train_accuracy = correct_train / total_train

    epoch_val_loss = 0.0
    correct_val = 0
    total_val = 0
    steps_val = len(validation_generator)

    with torch.no_grad():
        for step in range(steps_val):
            x_val, y_val = next(validation_generator_conv)
            x_val_tensor = torch.tensor(x_val).float().to(device)
            y_val_tensor = torch.tensor(y_val).float().to(device)
            
            outputs_val = pretrained_model(x_val_tensor)
            loss_val = loss_fn(outputs_val, y_val_tensor)
            epoch_val_loss += loss_val.item()
            
            predicted_val = (torch.sigmoid(outputs_val) > 0.5).float()
            correct_val += (predicted_val == y_val_tensor).sum().item()
            total_val += y_val_tensor.numel()
    
    avg_val_loss = epoch_val_loss / steps_val
    val_accuracy = correct_val / total_val


    writer.add_scalar('Loss/Training', avg_train_loss, epoch)
    writer.add_scalar('Loss/Validation', avg_val_loss, epoch)
    writer.add_scalar('Accuracy/Training', train_accuracy, epoch)
    writer.add_scalar('Accuracy/Validation', val_accuracy, epoch)

writer.close()
torch.save(pretrained_model.state_dict(), model_weights_path)

Step 0:
  x_batch shape: (32, 3, 224, 224)
  y_batch shape: (32, 37)
  Model output shape: torch.Size([32, 37])
  Predicted shape: torch.Size([32, 37])
  y_tensor shape: torch.Size([32, 37])
Step 1:
  x_batch shape: (32, 3, 224, 224)
  y_batch shape: (32, 37)
  Model output shape: torch.Size([32, 37])
  Predicted shape: torch.Size([32, 37])
  y_tensor shape: torch.Size([32, 37])
Step 2:
  x_batch shape: (32, 3, 224, 224)
  y_batch shape: (32, 37)
  Model output shape: torch.Size([32, 37])
  Predicted shape: torch.Size([32, 37])
  y_tensor shape: torch.Size([32, 37])
Step 3:
  x_batch shape: (32, 3, 224, 224)
  y_batch shape: (32, 37)
  Model output shape: torch.Size([32, 37])
  Predicted shape: torch.Size([32, 37])
  y_tensor shape: torch.Size([32, 37])
Step 4:
  x_batch shape: (32, 3, 224, 224)
  y_batch shape: (32, 37)
  Model output shape: torch.Size([32, 37])
  Predicted shape: torch.Size([32, 37])
  y_tensor shape: torch.Size([32, 37])
Step 5:
  x_batch shape: (32, 3, 224, 224)
  

RuntimeError: result type Float can't be cast to the desired output type Long

In [54]:
# print(pretrained_model)