In [4]:
import torch
import torch.nn as nn
from tqdm import tqdm
import random 
random_seed = 123  
random.seed(random_seed)
from sklearn.metrics import f1_score
from datasetSSL import VideoDatasetSSL
from utils import *
#from torchvision.models.video import r2plus1d_18, R2Plus1D_18_Weights
from models.pytorch_i3d import InceptionI3d

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
torch.cuda.empty_cache()
train_batch_size = 6
num_workers = 8

Using device: cuda


# SSL

In [6]:
input_path = "./FAQA/OHP/Unlabeled/"
dataloaders = {}
dataloaders['train'] = torch.utils.data.DataLoader(VideoDatasetSSL(input_path, 5490),
                                                    batch_size=train_batch_size,
                                                    num_workers=num_workers,
                                                    shuffle=True,
                                                    pin_memory=True,
                                                    worker_init_fn=worker_init_fn)
save_model = 'ssl_ohp'

In [4]:
class MotionDisentangling(torch.nn.Module):
    def __init__(self, f=1024, path = './models/rgb_i3d_pretrained.pt'):
        super().__init__()
        # Load the pre-trained R(2+1)D 18 model on Kinetics400_V1
        #weights = R2Plus1D_18_Weights.DEFAULT
        #self.backbone = r2plus1d_18(weights=weights)
        #self.backbone = torchvision.models.video.r2plus1d_18(pretrained=True)
        self.backbone = InceptionI3d()
        self.backbone.load_state_dict(torch.load(path))
        
        self.head = torch.nn.Sequential(
            torch.nn.Linear(f, f),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.5),
            torch.nn.Linear(f, f)
        )

    def forward(self, x):
        features = self.backbone(x)
        features = features.squeeze() #[B,1024]
        x = self.head(features)

        return x

In [5]:

class DistanceRatioLoss(nn.Module):
    
    def __init__(self, margin=0.5, epsilon=1e-8):
        super(DistanceRatioLoss, self).__init__()
        self.margin = margin
        self.epsilon = epsilon
        
    
    def forward(self, anchor, positive, negative):
        dist_pos_sq = torch.sum(torch.pow(anchor - positive, 2), dim=1)
        dist_neg_sq = torch.sum(torch.pow(anchor - negative, 2), dim=1)
        
        dist_pos = torch.exp(-torch.sqrt(dist_pos_sq))
        dist_neg = torch.exp(-torch.sqrt(dist_neg_sq))
        loss = -torch.log( dist_pos / (dist_pos + dist_neg))
        
        loss = torch.mean(loss)
        return loss

In [6]:
def train_ssl(model, num_epochs, optimizer, criterion, dataloaders):
    model.to(device)
    best = 100
    for epoch in range(num_epochs):        
        model.train()
        torch.set_grad_enabled(True)
        train_loss = 0.0

        with tqdm(total=len(dataloaders['train']), unit="batch", desc=f"Epoch {epoch}/{num_epochs}") as tepoch:
            for batch_idx, (anchor, positive, negative) in enumerate(dataloaders['train']):
                optimizer.zero_grad()
                anchor_emb = model(anchor.to(device))
                positive_emb = model(positive.to(device))
                negative_emb = model(negative.to(device))
                loss = criterion(anchor_emb, positive_emb, negative_emb)
                loss.backward()
                optimizer.step()
                train_loss += loss.item()
                tepoch.set_postfix(loss=loss.item())
                tepoch.update(1)

        train_loss /= len(dataloaders['train'])
        print('Epoch [{}/{}], Train Loss: {:.4f}'.format(epoch, num_epochs, train_loss))
        if train_loss < best:
            best = train_loss
            ruta_guardado = '{0}.pt'.format(save_model)
            torch.save(model.state_dict(), ruta_guardado)

In [None]:
ssl_model = MotionDisentangling()
lr = 1e-4
criterion = DistanceRatioLoss()
optimizer = torch.optim.Adam(ssl_model.parameters(),lr=lr, weight_decay=1e-5)
num_epochs = 20
train_ssl(ssl_model, num_epochs,optimizer,criterion, dataloaders)

# SS

In [None]:
from dataset import VideoDataset
from config import get_parser
from logger import Logger

from utils import *
import torch.nn.init as init

#model_name = 'squat_kf'
#model_name = 'squat_ki'
#model_name = 'ohp_e'
#model_name = 'ohp_k'
#data = 'error_knees_inward.json'
#data = 'error_knees_forward.json'
model_name = 'ohp_k'
#model_name = 'ohp_e'
#data = 'error_elbows.json'
data = 'error_knees.json'
dataset_path = './data/FAQA/Squat/'
dataset_path = './data/FAQA/OHP/'

In [None]:
class Args:
    def __init__(self, dataset_path):
        self.dataset_path = dataset_path

In [None]:
args = Args(dataset_path)
s_train = VideoDataset('train', args, data)
s_train_loader = torch.utils.data.DataLoader(s_train,
                                                       batch_size=4,
                                                       num_workers=8,
                                                       shuffle=True,
                                                       pin_memory=True,
                                                       worker_init_fn=worker_init_fn)
s_val = VideoDataset('val', args, data)
s_val_loader  = torch.utils.data.DataLoader(s_val,
                                                      batch_size=4,
                                                      num_workers=8,
                                                      shuffle=False,
                                                      pin_memory=True,
                                                      worker_init_fn=worker_init_fn)
print(s_train.__getitem__(0)['video'].shape)
print(s_train.__len__())
print(s_val.__len__())

labels = np.array(s_train.getlabels()) #sin encabezado

num_positive = np.sum(labels == 1)
num_negative = np.sum(labels == 0)
weight_positive = num_negative / (num_positive + num_negative)
weight_negative = num_positive / (num_positive + num_negative)
print(weight_positive, weight_negative)

torch.Size([32, 3, 224, 224])
1582
339
0.6580278128950695 0.3419721871049305


In [None]:
class W_BCEWithLogitsLoss(torch.nn.Module): 
    
    def __init__(self, w_p = None, w_n = None):
        super(W_BCEWithLogitsLoss, self).__init__()
        
        self.w_p = w_p
        self.w_n = w_n
        
    def forward(self, ps, labels, epsilon = 1e-7):
        
        loss_pos = -1 * torch.mean(self.w_p * labels * torch.log(ps + epsilon))
        loss_neg = -1 * torch.mean(self.w_n * (1-labels) * torch.log((1-ps) + epsilon))
        
        loss = loss_pos + loss_neg
        
        return loss

In [None]:
class FTModel(torch.nn.Module):
    def __init__(self, n_outputs=1):
        super().__init__()
        self.backbone = InceptionI3d()
        
        state_dict = torch.load('{0}.pt'.format(save_model))
        
        mapped_state_dict = {}
        for k, v in state_dict.items():
            if k.startswith('backbone.'):
                k = k[len('backbone.'):]  # Remove the 'backbone.' prefix
            mapped_state_dict[k] = v

        self.backbone.load_state_dict(mapped_state_dict)

        #for i, param in enumerate(self.backbone.parameters()):
        #    param.requires_grad = False 
        
        #path = "./models/rgb_i3d_pretrained.pt"
        #self.backbone.load_state_dict(torch.load(path))
            
            #print(i)
        feature_dim = 1024 #1024
        self.head = torch.nn.Sequential(
            torch.nn.Flatten(),
            torch.nn.Linear(feature_dim, 256),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.5),
            torch.nn.Linear(256, 128),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.5),
            torch.nn.Linear(128, n_outputs)
        )

        self.dp = nn.Dropout(0.5)
        self.getprob = nn.Sigmoid() 

    def forward(self, x):
        x = self.backbone(x)
        x = self.head(x)
        #output = self.getprob(x) #BCEwithlogits already has
        return x

In [None]:
def train_FTModel(model, num_epochs, optimizer, criterion, train_loader, val_loader, model_name):
    hist = {'loss': [], 'acc': [], 'test_acc': []}
    best = 0
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        #true_scores, pred_scores, keys_list = [], [], []
        with tqdm(train_loader, unit="batch") as tepoch:
            for data in tepoch:
                videos = data['video'].to(device)
                videos.transpose_(1, 2)
                batch_size, C, frames, H, W = videos.shape
                labels = torch.tensor(data['final_score'].numpy().reshape((batch_size, -1))).to(device).float()
                tepoch.set_description(f"Epoch {epoch+1}/{num_epochs}")
                optimizer.zero_grad()
                outputs = model(videos)
                #print(outputs.shape)
                #print(labels.shape)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                train_loss += loss.item()
                tepoch.set_postfix(loss=loss.item())

        train_loss /= len(train_loader)
        model.eval()
        val_loss = 0.0
        
        with torch.no_grad():
            y_true = []
            y_pred = []
            
            with tqdm(val_loader, unit="batch") as tepoch:
                for data in tepoch:
                    videos = data['video'].to(device)
                    videos.transpose_(1, 2)
                    batch_size, C, frames, H, W = videos.shape
                    tepoch.set_description(f"Epoch {epoch+1}/{num_epochs}")
                    #labels = torch.tensor(data['final_score'].numpy()).to(device)
                    labels = torch.tensor(data['final_score'].numpy().reshape((batch_size, -1))).to(device).float()
                    outputs = model(videos)
                    loss = criterion(outputs, labels)
                    val_loss += loss.item()
                    tepoch.set_postfix(loss=loss.item())
                    pred_cls = []
                    for i in range(len(outputs)):
                        pred_cls.append(1 if outputs[i] > 0.5 else 0)
                    y_true.extend(data['final_score'].numpy().reshape((batch_size, -1)).flatten().tolist())
                    y_pred.extend(pred_cls)
                
        val_loss /= len(val_loader)
        print('Epoch [{}/{}], Train Loss: {:.7f} ,Val Loss: {:.7f}'.format(epoch+1, num_epochs, train_loss, val_loss))
        f1 = f1_score(y_true, y_pred,average='macro')
        f1score_class_1 = f1_score(y_true, y_pred, pos_label=1)
        f1score_class_0 = f1_score(y_true, y_pred, pos_label=0)
        print('F1 score on the val: {:.7f}, F1 Class 1: {:.7f}, F1 Class 0: {:.7f}'.format(f1, f1score_class_1, f1score_class_0))

        if f1 > best:
            best = f1
            print('-----New best found!-----')
            checkpoint = {
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict()
            }

            torch.save(checkpoint, 'checkpoint_supervied_{0}.pt'.format(model_name))

In [None]:
def SSLeval():
    ft_model = FTModel()
    
    def init_weights(m):
        if isinstance(m, nn.Linear):
            init.kaiming_uniform_(m.weight)
            if m.bias is not None:
                init.constant_(m.bias, 0)

    ft_model.apply(init_weights)

    ft_model.to(device)
    optimizer = torch.optim.Adam(ft_model.parameters(), lr=1e-5, weight_decay=1e-7) #1e-7 funciona bien 
    #criterion = W_BCEWithLogitsLoss(w_p=weight_positive, w_n=weight_negative) 
    weights = torch.tensor([weight_positive]).to(device)
    criterion = nn.BCEWithLogitsLoss(pos_weight=weights)
    num_epochs = 10
    train_FTModel(ft_model, num_epochs, optimizer, criterion,s_train_loader, s_val_loader, model_name)

In [None]:
SSLeval()

Epoch 1/10: 100%|██████████| 396/396 [03:44<00:00,  1.76batch/s, loss=1.53]   
Epoch 1/10: 100%|██████████| 85/85 [00:14<00:00,  5.88batch/s, loss=0.295] 


Epoch [1/10], Train Loss: 0.7622022 ,Val Loss: 0.4360906
F1 score on the val: 0.6518207, F1 Class 1: 0.4761905, F1 Class 0: 0.8274510
-----New best found!-----


Epoch 2/10: 100%|██████████| 396/396 [03:46<00:00,  1.75batch/s, loss=0.505] 
Epoch 2/10: 100%|██████████| 85/85 [00:15<00:00,  5.60batch/s, loss=0.0862]


Epoch [2/10], Train Loss: 0.4607540 ,Val Loss: 0.3799159
F1 score on the val: 0.6308467, F1 Class 1: 0.4217687, F1 Class 0: 0.8399247


Epoch 3/10: 100%|██████████| 396/396 [03:48<00:00,  1.73batch/s, loss=0.32]  
Epoch 3/10: 100%|██████████| 85/85 [00:15<00:00,  5.57batch/s, loss=0.013]  


Epoch [3/10], Train Loss: 0.3512368 ,Val Loss: 0.4115641
F1 score on the val: 0.6781646, F1 Class 1: 0.5063291, F1 Class 0: 0.8500000
-----New best found!-----


Epoch 4/10: 100%|██████████| 396/396 [03:48<00:00,  1.73batch/s, loss=0.373]  
Epoch 4/10: 100%|██████████| 85/85 [00:15<00:00,  5.66batch/s, loss=0.103] 


Epoch [4/10], Train Loss: 0.2849281 ,Val Loss: 0.4463144
F1 score on the val: 0.7044326, F1 Class 1: 0.5562130, F1 Class 0: 0.8526523
-----New best found!-----


Epoch 5/10: 100%|██████████| 396/396 [03:48<00:00,  1.73batch/s, loss=0.00238]
Epoch 5/10: 100%|██████████| 85/85 [00:15<00:00,  5.65batch/s, loss=0.00143]


Epoch [5/10], Train Loss: 0.2004368 ,Val Loss: 0.4486103
F1 score on the val: 0.7674897, F1 Class 1: 0.6666667, F1 Class 0: 0.8683128
-----New best found!-----


Epoch 6/10: 100%|██████████| 396/396 [03:46<00:00,  1.75batch/s, loss=0.00673] 
Epoch 6/10: 100%|██████████| 85/85 [00:15<00:00,  5.63batch/s, loss=0.0129]  


Epoch [6/10], Train Loss: 0.1122906 ,Val Loss: 0.6148764
F1 score on the val: 0.7173692, F1 Class 1: 0.5764706, F1 Class 0: 0.8582677


Epoch 7/10: 100%|██████████| 396/396 [03:42<00:00,  1.78batch/s, loss=6.96e-5] 
Epoch 7/10: 100%|██████████| 85/85 [00:14<00:00,  5.80batch/s, loss=0.00022] 


Epoch [7/10], Train Loss: 0.1013987 ,Val Loss: 0.6345371
F1 score on the val: 0.7788510, F1 Class 1: 0.6868687, F1 Class 0: 0.8708333
-----New best found!-----


Epoch 8/10: 100%|██████████| 396/396 [03:46<00:00,  1.75batch/s, loss=0.268]   
Epoch 8/10: 100%|██████████| 85/85 [00:15<00:00,  5.60batch/s, loss=6.31e-5]


Epoch [8/10], Train Loss: 0.0494573 ,Val Loss: 0.6754275
F1 score on the val: 0.7714804, F1 Class 1: 0.6798030, F1 Class 0: 0.8631579


Epoch 9/10: 100%|██████████| 396/396 [03:44<00:00,  1.76batch/s, loss=0.000115]
Epoch 9/10: 100%|██████████| 85/85 [00:15<00:00,  5.66batch/s, loss=2.06e-6] 


Epoch [9/10], Train Loss: 0.0519119 ,Val Loss: 0.5646961
F1 score on the val: 0.7855432, F1 Class 1: 0.6995074, F1 Class 0: 0.8715789
-----New best found!-----


Epoch 10/10: 100%|██████████| 396/396 [03:44<00:00,  1.77batch/s, loss=3.16e-5] 
Epoch 10/10: 100%|██████████| 85/85 [00:14<00:00,  5.81batch/s, loss=1.38e-5] 

Epoch [10/10], Train Loss: 0.0344845 ,Val Loss: 0.9208322
F1 score on the val: 0.7771597, F1 Class 1: 0.7022222, F1 Class 0: 0.8520971





In [None]:
s_test = VideoDataset('test', args, data)
s_test_loader  = torch.utils.data.DataLoader(s_test,
                                                      batch_size=4,
                                                      num_workers=8,
                                                      shuffle=False,
                                                      pin_memory=True,
                                                      worker_init_fn=worker_init_fn)

In [None]:
def eval_FTModel(model, test_loader):  
    with torch.no_grad():
        y_true = []
        y_pred = []
        
        with tqdm(test_loader, unit="batch") as tepoch:
            for data in tepoch:
                videos = data['video'].to(device)
                videos.transpose_(1, 2)
                batch_size, C, frames, H, W = videos.shape    
                outputs = model(videos)
                pred_cls = []
                for i in range(len(outputs)):
                    pred_cls.append(1 if outputs[i] > 0.5 else 0)
                y_true.extend(data['final_score'].numpy().reshape((batch_size, -1)).flatten().tolist())
                y_pred.extend(pred_cls)
        
        f1 = f1_score(y_true, y_pred,average='macro')
        print('F1 score on the test: {:.7f}'.format(f1))

In [None]:
criterion = nn.BCELoss()
ft_model = FTModel()
path = "./checkpoint_supervied_{0}.pt".format(model_name)
checkpoint = torch.load(path)
ft_model.load_state_dict(checkpoint['model_state_dict'])
ft_model.to(device)
eval_FTModel(ft_model, s_test_loader)