In [7]:
import sys
sys.path.append('models')

In [8]:
from mobilevitv1 import MobileViT
from mobilevitv2 import MobileViTv2

In [9]:
image_size = (232,232)
num_classes = 10
batch_size = 16
num_epochs = 50
lr = 1e-3

In [10]:
from torchvision import transforms
# Additional augmentations
transform = transforms.Compose([
    transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0)),
    transforms.RandomRotation(degrees=10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    transforms.RandomAffine(degrees=0, translate=(0.2, 0.2)),
    transforms.RandomGrayscale(p=0.1),
    transforms.RandomPerspective(distortion_scale=0.2, p=0.5),
    # transforms.Resize(image_size),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [11]:
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, random_split

# Define the path to your dataset
# train_path = '/home/hung/auc.distracted.driver.dataset_v2/v2/cam1/train/'
# test_path = '/home/hung/auc.distracted.driver.dataset_v2/v2/cam1/test/'
train_path = '/home/hung/auc.distracted.driver.dataset_v2/v1/train/'
test_path = '/home/hung/auc.distracted.driver.dataset_v2/v1/test/'

# Create the ImageFolder dataset
train_dataset = ImageFolder(root=train_path, transform=transform)
test_dataset = ImageFolder(root=test_path, transform=transform)

# Create data loaders
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [12]:
import torch
import torch.nn as nn 
from torchvision import models
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Define your model architecture (make sure it matches the one used during training)

# model = models.resnet50(pretrained=True)
# model.fc = nn.Sequential(
#     nn.Dropout(p=0.2, inplace=False),
#     nn.Linear(in_features=model.fc.in_features, out_features=num_cls, bias=True)
# )

# model = models.mnasnet0_75(weights=True)
# model.classifier = nn.Sequential(
#     nn.Dropout(p=0.2, inplace=True),
#     nn.Linear(in_features=model.classifier[1].in_features, out_features=num_classes, bias=True)
# )
# print(model)

model = MobileViTv2(
    image_size = image_size, 
    width_multiplier = 0.5,             # support [0.5, 0.75, 1, 1.25, 1.5, 1.75, 2] as shown in paper
    num_classes=1000, 
    patch_size=(2, 2)
)
pretrained = 'pretrained/mobilevitv2-0.5.pt'
state_dict = torch.load(pretrained, map_location=device)
model.load_state_dict(state_dict)
model.classifier = nn.Sequential(
    nn.Linear(in_features=model.classifier[0].in_features, out_features=num_classes,bias=True)
)

model = model.to(device)

In [13]:
import torch.nn as nn
import torch.optim as optim

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.01)
scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

In [14]:
import json
# Assuming you have a list to store your logs
training_logs = []

In [15]:
!nvidia-smi

Wed Nov  1 18:07:47 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.125.06   Driver Version: 525.125.06   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  Off  | 00000000:01:00.0  On |                  N/A |
| N/A   41C    P0    16W /  50W |    926MiB /  4096MiB |     20%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [16]:
# Train the model
for epoch in range(num_epochs):
    model.train()

    total_loss = 0
    total_correct = 0
    total_samples = 0

    for step, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

        _, predicted = torch.max(outputs, 1)
        total_samples += labels.size(0)
        total_correct += (predicted == labels).sum().item()
        
#         print(f'Epoch {epoch+1}/{num_epochs}, Step {step+1}/{len(train_loader)}, '
#               f'Train Loss: {total_loss:.4f}, Train Acc: {total_correct}', end='\r')
        
    scheduler.step()
    train_loss = total_loss / len(train_loader)
    train_accuracy = total_correct / total_samples

    # Validation
    model.eval()
    total_correct = 0
    total_samples = 0
    val_loss = 0

    with torch.no_grad():
        for step, (inputs, labels) in enumerate(val_loader):
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            total_samples += labels.size(0)
            total_correct += (predicted == labels).sum().item()

#             print(f'Epoch {epoch+1}/{num_epochs}, Step {step+1}/{len(val_loader)}, '
#               f'Val Loss: {val_loss:.4f}, Val Acc: {total_correct}', end='\r')

    val_loss /= len(val_loader)
    val_accuracy = total_correct / total_samples
    
    log_entry = {
        'epoch': epoch + 1,
        'train_loss': train_loss,
        'train_accuracy': train_accuracy,
        'val_loss': val_loss,
        'val_accuracy': val_accuracy,
        'learning_rate': optimizer.param_groups[0]["lr"]
    }

    training_logs.append(log_entry)
    
    print(f'Epoch {epoch+1}/{num_epochs}, '
          f'Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}, '
          f'Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}',
          f'Learning Rate: {optimizer.param_groups[0]["lr"]:.6f}')

Epoch 1/50, Train Loss: 1.9580, Train Accuracy: 0.3014, Val Loss: 2.8168, Val Accuracy: 0.1011 Learning Rate: 0.000900
Epoch 2/50, Train Loss: 1.5404, Train Accuracy: 0.4737, Val Loss: 14.9249, Val Accuracy: 0.0704 Learning Rate: 0.000810
Epoch 3/50, Train Loss: 1.4292, Train Accuracy: 0.5221, Val Loss: 9.5386, Val Accuracy: 0.2669 Learning Rate: 0.000729
Epoch 4/50, Train Loss: 1.3218, Train Accuracy: 0.5635, Val Loss: 21.6727, Val Accuracy: 0.0750 Learning Rate: 0.000656
Epoch 5/50, Step 13/812, Train Loss: 16.2731, Train Acc: 120

In [None]:
# Save the logs to a JSON file
with open('training_logs.json', 'w') as json_file:
    json.dump(training_logs, json_file)
# Save the trained model
torch.save(model.state_dict(), f'mobilevit_{num_epochs}.pt')