In [1]:
import torch
import torch.nn as nn
from tqdm import tqdm
import random 
random_seed = 123  
random.seed(random_seed)
from sklearn.metrics import f1_score
from datasetSSL import VideoDatasetSSL
from utils import *
from models.pytorch_i3d import InceptionI3d
from opts import *
#from mmcv.runner import freeze_stages

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
torch.cuda.empty_cache()
train_batch_size = 6
num_workers = 8
#save_model = 'ssl_ohp__'
save_model = 'ssl_squat'

Using device: cuda


# SSL

In [3]:
input_path = "./FAQA/OHP/Unlabeled/"
dataloaders = {}
dataloaders['train'] = torch.utils.data.DataLoader(VideoDatasetSSL(input_path, 5970),
                                                    batch_size=train_batch_size,
                                                    num_workers=num_workers,
                                                    shuffle=True,
                                                    pin_memory=True,
                                                    worker_init_fn=worker_init_fn)

In [4]:
class MotionDisentangling(torch.nn.Module):
    def __init__(self, f=1024):
        super().__init__()
        self.backbone = InceptionI3d()
        self.backbone.load_state_dict(torch.load(i3d_pretrained_path))
        
        self.head = torch.nn.Sequential(
            torch.nn.Linear(f, 512),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.5),
            torch.nn.Linear(512, 512)
        )

    def forward(self, x):
        features = self.backbone(x)
        features = features.squeeze() #[B,1024,1]=>[B,1024]
        x = self.head(features)

        return x

In [5]:

class DistanceRatioLoss(nn.Module):
    
    def __init__(self):
        super(DistanceRatioLoss, self).__init__()
    
    def forward(self, anchor, positive, negative):
        dist_pos_sq = torch.sum(torch.pow(anchor - positive, 2), dim=1)
        dist_neg_sq = torch.sum(torch.pow(anchor - negative, 2), dim=1)
        
        dist_pos = torch.exp(-torch.sqrt(dist_pos_sq))
        dist_neg = torch.exp(-torch.sqrt(dist_neg_sq))
        loss = -torch.log( dist_pos / (dist_pos + dist_neg))
        
        loss = torch.mean(loss)
        return loss

In [6]:
def train_ssl(model, num_epochs, optimizer, criterion, dataloaders):
    model.to(device)
    best = 100
    for epoch in range(num_epochs):        
        model.train()
        torch.set_grad_enabled(True)
        train_loss = 0.0

        with tqdm(total=len(dataloaders['train']), unit="batch", desc=f"Epoch {epoch}/{num_epochs}") as tepoch:
            for batch_idx, (anchor, positive, negative) in enumerate(dataloaders['train']):
                optimizer.zero_grad()
                anchor_emb = model(anchor.to(device))
                positive_emb = model(positive.to(device))
                negative_emb = model(negative.to(device))
                loss = criterion(anchor_emb, positive_emb, negative_emb)
                loss.backward()
                optimizer.step()
                train_loss += loss.item()
                tepoch.set_postfix(loss=loss.item())
                tepoch.update(1)

        train_loss /= len(dataloaders['train'])
        print('Epoch [{}/{}], Train Loss: {:.4f}'.format(epoch, num_epochs, train_loss))
        if train_loss < best:
            best = train_loss
            ruta_guardado = '{0}.pt'.format(save_model)
            torch.save(model.state_dict(), ruta_guardado)

ssl_model = MotionDisentangling()
lr = 1e-4
criterion = DistanceRatioLoss()
optimizer = torch.optim.Adam(ssl_model.parameters(),lr=lr, weight_decay=1e-5)
num_epochs = 20
train_ssl(ssl_model, num_epochs,optimizer,criterion, dataloaders)

# SS

In [7]:
from dataset import VideoDataset
from config import get_parser
from logger import Logger

from utils import *
import torch.nn.init as init

model_name = 'squat_kf'
#model_name = 'squat_ki'
#model_name = 'ohp_e'
#model_name = 'ohp_k'
#data = 'error_knees_inward.json'
data = 'error_knees_forward.json'
#model_name = 'ohp_k'
#model_name = 'ohp_e'
#data = 'error_elbows.json'
#data = 'error_knees.json'
#dataset_path = './FAQA/OHP/Labeled/'
dataset_path = './FAQA/Squat/Labeled/'

In [8]:
class Args:
    def __init__(self, dataset_path):
        self.dataset_path = dataset_path

In [9]:
args = Args(dataset_path)
s_train = VideoDataset('train', args, data)
s_train_loader = torch.utils.data.DataLoader(s_train,
                                                       batch_size=4,
                                                       num_workers=8,
                                                       shuffle=True,
                                                       pin_memory=True,
                                                       worker_init_fn=worker_init_fn)
s_val = VideoDataset('val', args, data)
s_val_loader  = torch.utils.data.DataLoader(s_val,
                                                      batch_size=4,
                                                      num_workers=8,
                                                      shuffle=False,
                                                      pin_memory=True,
                                                      worker_init_fn=worker_init_fn)
print(s_train.__getitem__(0)['video'].shape)
print(s_train.__len__())
print(s_val.__len__())

labels = np.array(s_train.getlabels()) #sin encabezado
class_frequencies = torch.bincount(torch.IntTensor(labels))
class_weights = 1.0 / class_frequencies
print(class_weights)


num_positive = np.sum(labels == 1)
num_negative = np.sum(labels == 0)
weight_positive = num_negative / (num_positive + num_negative)
weight_negative = num_positive / (num_positive + num_negative)
#print(weight_positive, weight_negative)
weights = torch.FloatTensor ([num_negative / num_positive]).to(device)
print(weights)

torch.Size([64, 3, 224, 224])
1136
243
tensor([0.0028, 0.0013])
tensor([0.4527], device='cuda:0')


In [10]:
class W_BCEWithLogitsLoss(torch.nn.Module): 
    
    def __init__(self, w_p = None, w_n = None):
        super(W_BCEWithLogitsLoss, self).__init__()
        
        self.w_p = w_p
        self.w_n = w_n
        
    def forward(self, ps, labels, epsilon = 1e-7):
        
        loss_pos = -1 * torch.mean(self.w_p * labels * torch.log(ps + epsilon))
        loss_neg = -1 * torch.mean(self.w_n * (1-labels) * torch.log((1-ps) + epsilon))
        
        loss = loss_pos + loss_neg
        
        return loss

In [11]:
class FTModel(torch.nn.Module):
    def __init__(self, n_outputs=1):
        super().__init__()
        self.backbone = InceptionI3d()
        
        #state_dict = torch.load('./models/{0}.pt'.format(save_model))
        
        #mapped_state_dict = {}
        #for k, v in state_dict.items():
        #    if k.startswith('backbone.'):
        #        k = k[len('backbone.'):]  # Remove the 'backbone.' prefix
        #        mapped_state_dict[k] = v

        #self.backbone.load_state_dict(mapped_state_dict)

        #for i, param in enumerate(self.backbone.parameters()):
        #    param.requires_grad = False 
       
        #for name, param in self.backbone.named_parameters():
        #    if 'Conv3d_1a_7x7' in name or 'Conv3d_2b_1x1' in name or 'Conv3d_2c_3x3' in name or 'Mixed_3b' in name:
        #        param.requires_grad = False

        path = "./models/rgb_i3d_pretrained.pt"
        self.backbone.load_state_dict(torch.load(path))
            
        feature_dim = 1024 #1024
        self.head = torch.nn.Sequential(
            torch.nn.Linear(feature_dim, 256),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.5),
            torch.nn.Linear(256, 128),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.5),
            torch.nn.Linear(128, n_outputs)
        )

        self.getprob = nn.Sigmoid() 

    def forward(self, x):
        x = self.backbone(x)
        x = torch.mean(x,-1)
        x = self.head(x)
        #x = self.getprob(x) #BCEwithlogits already has
        return x

In [12]:
def train_FTModel(model, num_epochs, optimizer, criterion, train_loader, val_loader, scheduler, model_name):
    hist = {'loss': [], 'acc': [], 'test_acc': []}
    best = 0
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        #true_scores, pred_scores, keys_list = [], [], []
        with tqdm(train_loader, unit="batch") as tepoch:
            for data in tepoch:
                videos = data['video'].to(device)
                videos.transpose_(1, 2)
                batch_size, C, frames, H, W = videos.shape
                labels = torch.tensor(data['final_score'].numpy().reshape((batch_size, -1))).to(device).float()
                tepoch.set_description(f"Epoch {epoch+1}/{num_epochs}")
                optimizer.zero_grad()
                outputs = model(videos)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                train_loss += loss.item()
                tepoch.set_postfix(loss=loss.item())

        train_loss /= len(train_loader)
        model.eval()
        val_loss = 0.0
        
        with torch.no_grad():
            y_true = []
            y_pred = []
            
            with tqdm(val_loader, unit="batch") as tepoch:
                for data in tepoch:
                    videos = data['video'].to(device)
                    videos.transpose_(1, 2)
                    batch_size, C, frames, H, W = videos.shape
                    tepoch.set_description(f"Epoch {epoch+1}/{num_epochs}")
                    #labels = torch.tensor(data['final_score'].numpy()).to(device)
                    labels = torch.tensor(data['final_score'].numpy().reshape((batch_size, -1))).to(device).float()
                    outputs = model(videos) #Logits
                    loss = criterion(outputs, labels)
                    val_loss += loss.item()
                    tepoch.set_postfix(loss=loss.item())
                    pred_cls = []
                    m = nn.Sigmoid() 
                    outputs = m(outputs)
                    for i in range(len(outputs)):
                        pred_cls.append(1 if outputs[i] > 0.5 else 0)
                    y_true.extend(data['final_score'].numpy().reshape((batch_size, -1)).flatten().tolist())
                    y_pred.extend(pred_cls)
                
        
        val_loss /= len(val_loader)
        #scheduler.step(val_loss)
        #scheduler.step()
        print('Epoch [{}/{}], Train Loss: {:.7f} ,Val Loss: {:.7f}'.format(epoch+1, num_epochs, train_loss, val_loss))
        f1 = f1_score(y_true, y_pred,average='macro')
        f1score_class_1 = f1_score(y_true, y_pred, pos_label=1)
        f1score_class_0 = f1_score(y_true, y_pred, pos_label=0)
        print('F1 score on the val: {:.7f}, F1 Class 1: {:.7f}, F1 Class 0: {:.7f}'.format(f1, f1score_class_1, f1score_class_0))

        if f1 > best:
            best = f1
            print('-----New best found!-----')
            checkpoint = {
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict()
            }

            torch.save(checkpoint, 'checkpoint_supervied_{0}.pt'.format(model_name))

In [13]:
def SSLeval():
    ft_model = FTModel()
    
    def init_weights(m):
        if isinstance(m, nn.Linear):
            init.kaiming_uniform_(m.weight)
            if m.bias is not None:
                init.constant_(m.bias, 0)

    ft_model.apply(init_weights)
    ft_model.to(device)
    optimizer = torch.optim.Adam(ft_model.parameters(), lr=1e-4, weight_decay=1e-5)    
    criterion = nn.BCEWithLogitsLoss(pos_weight=weights)
    #criterion = nn.BCELoss()
    lambda1 = lambda epoch: 0.95
    #scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor = 0.1, patience=2, verbose = True)
    #scheduler = torch.optim.lr_scheduler.MultiplicativeLR(optimizer, lambda1, verbose = True)
    scheduler = None
    num_epochs = 20
    train_FTModel(ft_model, num_epochs, optimizer, criterion,s_train_loader, s_val_loader, scheduler, model_name)

In [14]:
SSLeval()

Epoch 1/20: 100%|██████████| 284/284 [05:14<00:00,  1.11s/batch, loss=0.46] 
Epoch 1/20: 100%|██████████| 61/61 [00:30<00:00,  1.97batch/s, loss=0.299]


Epoch [1/20], Train Loss: 0.4590028 ,Val Loss: 0.4417790
F1 score on the val: 0.5726011, F1 Class 1: 0.7731092, F1 Class 0: 0.3720930
-----New best found!-----


Epoch 2/20: 100%|██████████| 284/284 [05:14<00:00,  1.11s/batch, loss=0.516]
Epoch 2/20: 100%|██████████| 61/61 [00:21<00:00,  2.78batch/s, loss=0.299]


Epoch [2/20], Train Loss: 0.4349379 ,Val Loss: 0.4267547
F1 score on the val: 0.6578942, F1 Class 1: 0.7648903, F1 Class 0: 0.5508982
-----New best found!-----


Epoch 3/20: 100%|██████████| 284/284 [05:13<00:00,  1.10s/batch, loss=0.361]
Epoch 3/20: 100%|██████████| 61/61 [00:21<00:00,  2.80batch/s, loss=0.439]


Epoch [3/20], Train Loss: 0.4103401 ,Val Loss: 0.4196015
F1 score on the val: 0.4888410, F1 Class 1: 0.4383562, F1 Class 0: 0.5393258


Epoch 4/20: 100%|██████████| 284/284 [05:11<00:00,  1.10s/batch, loss=0.337]
Epoch 4/20: 100%|██████████| 61/61 [00:21<00:00,  2.79batch/s, loss=0.307]


Epoch [4/20], Train Loss: 0.3562000 ,Val Loss: 0.4323402
F1 score on the val: 0.5981653, F1 Class 1: 0.6643110, F1 Class 0: 0.5320197


Epoch 5/20: 100%|██████████| 284/284 [05:11<00:00,  1.10s/batch, loss=0.306] 
Epoch 5/20: 100%|██████████| 61/61 [00:22<00:00,  2.76batch/s, loss=0.436] 


Epoch [5/20], Train Loss: 0.2543691 ,Val Loss: 0.5161296
F1 score on the val: 0.6043108, F1 Class 1: 0.6759582, F1 Class 0: 0.5326633


Epoch 6/20: 100%|██████████| 284/284 [05:14<00:00,  1.11s/batch, loss=0.147]  
Epoch 6/20: 100%|██████████| 61/61 [00:21<00:00,  2.77batch/s, loss=0.387] 


Epoch [6/20], Train Loss: 0.1759184 ,Val Loss: 0.6002421
F1 score on the val: 0.5099893, F1 Class 1: 0.5220884, F1 Class 0: 0.4978903


Epoch 7/20: 100%|██████████| 284/284 [05:14<00:00,  1.11s/batch, loss=0.0981] 
Epoch 7/20: 100%|██████████| 61/61 [00:21<00:00,  2.78batch/s, loss=0.499] 


Epoch [7/20], Train Loss: 0.1207159 ,Val Loss: 0.5996946
F1 score on the val: 0.6366181, F1 Class 1: 0.7607362, F1 Class 0: 0.5125000


Epoch 8/20: 100%|██████████| 284/284 [05:13<00:00,  1.10s/batch, loss=0.0695]  
Epoch 8/20: 100%|██████████| 61/61 [00:21<00:00,  2.79batch/s, loss=0.771] 


Epoch [8/20], Train Loss: 0.0779527 ,Val Loss: 0.7385732
F1 score on the val: 0.6305868, F1 Class 1: 0.7157191, F1 Class 0: 0.5454545


Epoch 9/20: 100%|██████████| 284/284 [05:14<00:00,  1.11s/batch, loss=0.00153] 
Epoch 9/20: 100%|██████████| 61/61 [00:21<00:00,  2.81batch/s, loss=0.507]  


Epoch [9/20], Train Loss: 0.0455633 ,Val Loss: 1.1207543
F1 score on the val: 0.6047766, F1 Class 1: 0.7853107, F1 Class 0: 0.4242424


Epoch 10/20: 100%|██████████| 284/284 [05:12<00:00,  1.10s/batch, loss=0.000845]
Epoch 10/20: 100%|██████████| 61/61 [00:22<00:00,  2.77batch/s, loss=0.688]  


Epoch [10/20], Train Loss: 0.0312514 ,Val Loss: 0.9259942
F1 score on the val: 0.6482945, F1 Class 1: 0.7771084, F1 Class 0: 0.5194805


Epoch 11/20: 100%|██████████| 284/284 [05:12<00:00,  1.10s/batch, loss=0.0575]  
Epoch 11/20: 100%|██████████| 61/61 [00:21<00:00,  2.81batch/s, loss=1.01]   


Epoch [11/20], Train Loss: 0.0306729 ,Val Loss: 0.8933986
F1 score on the val: 0.6185504, F1 Class 1: 0.6515152, F1 Class 0: 0.5855856


Epoch 12/20: 100%|██████████| 284/284 [05:10<00:00,  1.09s/batch, loss=0.795]   
Epoch 12/20: 100%|██████████| 61/61 [00:21<00:00,  2.89batch/s, loss=1.39]   


Epoch [12/20], Train Loss: 0.0484325 ,Val Loss: 0.9564599
F1 score on the val: 0.5469492, F1 Class 1: 0.5600000, F1 Class 0: 0.5338983


Epoch 13/20:  21%|██        | 59/284 [01:05<04:09,  1.11s/batch, loss=0.00195] 


KeyboardInterrupt: 

In [15]:
s_test = VideoDataset('test', args, data)
s_test_loader  = torch.utils.data.DataLoader(s_test,
                                                      batch_size=4,
                                                      num_workers=8,
                                                      shuffle=False,
                                                      pin_memory=True,
                                                      worker_init_fn=worker_init_fn)

In [16]:
def eval_FTModel(model, test_loader):  
    with torch.no_grad():
        y_true = []
        y_pred = []
        
        with tqdm(test_loader, unit="batch") as tepoch:
            for data in tepoch:
                videos = data['video'].to(device)
                videos.transpose_(1, 2)
                batch_size, C, frames, H, W = videos.shape    
                outputs = model(videos)
                m = nn.Sigmoid() 
                outputs = m(outputs)
                pred_cls = []
                for i in range(len(outputs)):
                    pred_cls.append(1 if outputs[i] > 0.5 else 0)
                y_true.extend(data['final_score'].numpy().reshape((batch_size, -1)).flatten().tolist())
                y_pred.extend(pred_cls)
        
        f1 = f1_score(y_true, y_pred,average='macro')
        f1score_class_1 = f1_score(y_true, y_pred, pos_label=1)
        f1score_class_0 = f1_score(y_true, y_pred, pos_label=0)
        print('F1 score on the test: {:.7f} C0: {:.2f} C1: {:.2f}'.format(f1,f1score_class_0,f1score_class_1))

In [17]:
ft_model = FTModel()
path = "./checkpoint_supervied_{0}.pt".format(model_name)
checkpoint = torch.load(path)
ft_model.load_state_dict(checkpoint['model_state_dict'])
ft_model.to(device)
eval_FTModel(ft_model, s_test_loader)

100%|██████████| 61/61 [00:31<00:00,  1.91batch/s]

F1 score on the test: 0.5763145 C0: 0.42 C1: 0.73



