In [1]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt 
import torch 
from data_loader import Loader
from torch.utils.data import DataLoader, Dataset
from hutils import *
import os

  "class": algorithms.Blowfish,


In [2]:
root = 'SynthHands_Release'
df = pd.read_csv('synthetic_hand.csv')
df.head()

Unnamed: 0,images,imageID,abs_path,labels
0,SynthHands_Release/male_noobject/seq05/cam04/0...,167,SynthHands_Release/male_noobject/seq05/cam04/02,male_noobject
1,SynthHands_Release/female_object/seq06/cam03/0...,396,SynthHands_Release/female_object/seq06/cam03/02,female_object
2,SynthHands_Release/female_noobject/seq04/cam05...,106,SynthHands_Release/female_noobject/seq04/cam05/02,female_noobject
3,SynthHands_Release/male_object/seq02/cam03/03/...,125,SynthHands_Release/male_object/seq02/cam03/03,male_object
4,SynthHands_Release/male_noobject/seq07/cam02/0...,450,SynthHands_Release/male_noobject/seq07/cam02/01,male_noobject


In [3]:
df.shape[0] 

167070

In [4]:
# SEED = 123
# img_size = (299,299)
# train_loader = Loader(df, img_size, transform = crop_and_resize, seed = SEED, subset = 'train', val_split = .1)
# val_loader   = Loader(df, img_size, transform = crop_and_resize, seed = SEED, subset = 'val', val_split = .1)

# BATCH = 128
# train_data   = DataLoader(train_loader, shuffle = True, batch_size = BATCH)
# val_data     = DataLoader(val_loader, shuffle = False, batch_size = BATCH)

In [5]:
# len(train_loader), len(val_loader), df.shape[0] - 16707.0

In [4]:
from pytorch_lightning import Trainer  
from pytorch_lightning.callbacks import ModelCheckpoint
import torchvision.models as models
import pytorch_lightning as pl
from torch.nn import functional as F
import torch.nn as nn
import gc
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import ModelCheckpoint

In [14]:
torch.set_float32_matmul_precision('medium')
class HNet(pl.LightningModule):
    def __init__(self, df = None, k = 21*2, img_size = (299,299), test_size = 0.2, verbose = False, seed = 0, batch = 64):
        super(HNet, self).__init__()
        torch.cuda.empty_cache()
        self.test_size = test_size
        self.verbose = verbose
        self.df = df
        self.SEED = seed
        self.BATCH = batch
        self.img_size = img_size
        self.criterion = nn.MSELoss().cuda()
        self.pt_path = os.path.join('models', 'inception_v3_google-0cc3c7bd.pth')
        self.model = models.inception_v3(weights= models.Inception_V3_Weights.DEFAULT, progress = True)
        num_features = self.model.fc.in_features
        if verbose:
            print(f'Number of features in the last layer : {num_features}')
            print(f'Number of keypoints to detect : {k}')
        self.model.fc = nn.Sequential(
             nn.Linear(num_features, num_features // 2, bias = False),
             nn.ReLU(inplace=True),
             nn.Dropout(p = .2),
             nn.Linear(num_features // 2, 256, bias = False),
             nn.ReLU(inplace=True),
             nn.Linear(256, k, bias = False),
             nn.ReLU(inplace=True),
            )
        if verbose:
            print(self.model)
    
    def forward(self, image):
        return self.model(image)
    
    def training_step(self, batch, batch_idx):
        loss, _ = self.tr_process_(batch, batch_idx, 'train')
        self.log('loss', loss, sync_dist=True, prog_bar=True, logger=True, on_epoch=True)
        return loss
    
    def validation_step(self, batch, batch_idx):
        loss, preds = self.tr_process_(batch, batch_idx, 'val')
        self.log('val_loss', loss, sync_dist=True, prog_bar=True, logger=True, on_epoch=True)
        return loss
    
    def tr_process_(self, batch, batch_idx, subset):
        
        imgs, kpts = batch 
        kpts = torch.autograd.Variable(kpts)
        preds = self.forward(imgs)
        
        if subset == 'train':
            preds = preds.logits
            
        loss = self.criterion(preds, kpts) 
        
        return loss, preds
    
    def configure_optimizers(self):
        optimizer = torch.optim.RMSprop(self.parameters(), lr = 0.01)
        scheduler = {
        'scheduler': torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, verbose = True),
        'monitor': 'val_loss',
        'interval': 'epoch',
        'frequency': 2
        }
        return [optimizer], [scheduler]
    
    def train_dataloader(self):
        train_loader = Loader(self.df, self.img_size, transform = crop_and_resize, seed = self.SEED, subset = 'train', val_split = self.test_size)
        train_data   = DataLoader(train_loader, shuffle = True, batch_size = self.BATCH, num_workers = 16)
        return train_data
    
    def val_dataloader(self):
        val_loader   = Loader(self.df, self.img_size, transform = crop_and_resize, seed = self.SEED, subset = 'val', val_split = self.test_size)
        val_data     = DataLoader(val_loader, shuffle = False, batch_size = self.BATCH, num_workers = 16)
        return val_data
    
    
def save_model(model, mname):
    save_path       = os.path.join('models', mname)
    torch.save(model.state_dict(), save_path)

In [15]:
### checkpoint callback 

dirpath = 'models'
mname   = 'syn_hnet'  
pt_path = os.path.join(dirpath, mname)
checkpoint_callback = ModelCheckpoint(
    dirpath = 'models',
    filename = mname + '-{epoch:02d}-{val_loss:.2f}',
    save_top_k = 2,  
    monitor = 'val_loss',
    mode = 'min',
)

In [16]:
### configs 
kpts       = 21 
epochs     = 5
device     = torch.cuda.device_count()
sample_run = True if epochs < 2 else False
img_size   = (299,299)
mname     = 'syn_hnet.pt-epoch=04-val_loss=0.09.ckpt'
ckpt_path = os.path.join(dirpath, mname)

logger = TensorBoardLogger("lightning_logs", name="hnet_model_12_22")
trainer    = Trainer(fast_dev_run = sample_run, log_every_n_steps = 1, accelerator='gpu' if device == 1 else 'cpu',
                     max_epochs = epochs, logger = logger, callbacks=[checkpoint_callback])
model      = HNet(pd.read_csv('synthetic_hand.csv'), k = 21 * 2, verbose = True, img_size = img_size, test_size = 0.1, seed = 123, batch = 16)
# model.load_state_dict(torch.load(pt_path))
model.load_from_checkpoint(ckpt_path)
print(f'DEV run : {sample_run}')
print(f"Accelerator : {'gpu' if device == 1 else 'cpu'}")

gc.collect()

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Number of features in the last layer : 2048
Number of keypoints to detect : 42
Inception3(
  (Conv2d_1a_3x3): BasicConv2d(
    (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2a_3x3): BasicConv2d(
    (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2b_3x3): BasicConv2d(
    (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (Conv2d_3b_1x1): BasicConv2d(
    (conv): Conv2d(64, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(80, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Co

DEV run : False
Accelerator : gpu


8821

In [6]:
print(torch.cuda.get_device_name())
torch.cuda.is_available()

NVIDIA GeForce RTX 3060 Laptop GPU


True

In [None]:
torch.cuda.empty_cache()
trainer.fit(model)

Missing logger folder: lightning_logs\hnet_model_12_22
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type       | Params
-----------------------------------------
0 | criterion | MSELoss    | 0     
1 | model     | Inception3 | 27.5 M
-----------------------------------------
27.5 M    Trainable params
0         Non-trainable params
27.5 M    Total params
109.929   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Length of training data : 150363
Length of testing data : 16707
Length of training data : 150363
Length of testing data : 16707


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

In [8]:
%load_ext tensorboard
%tensorboard --logdir=lightning_logs --host localhost

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 17984), started 0:01:25 ago. (Use '!kill 17984' to kill it.)

In [11]:
mname

'syn_hnet.pt'

In [None]:
torch.save(model, mname)