In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
pip install einops



#timesformer

In [None]:
from torch import nn, einsum
import torch.nn.functional as F
from einops import rearrange, repeat
import torch
from google.colab.patches import cv2_imshow
class PreNorm(nn.Module):
    def __init__(self, dim, fn):
        super().__init__()
        self.fn = fn
        self.norm = nn.LayerNorm(dim)

    def forward(self, x, *args, **kwargs):
        x = self.norm(x)
        return self.fn(x, *args, **kwargs)
class GEGLU(nn.Module):
    def forward(self, x):
        x, gates = x.chunk(2, dim = -1)
        return x * F.gelu(gates)

class FeedForward(nn.Module):
    def __init__(self, dim, mult = 4, dropout = 0.):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(dim, dim * mult * 2),
            GEGLU(),
            nn.Dropout(dropout),
            nn.Linear(dim * mult, dim)
        )

    def forward(self, x):
        return self.net(x)
# attention

def attn(q, k, v):
    sim = einsum('b i d, b j d -> b i j', q, k)
    attn = sim.softmax(dim = -1)
    out = einsum('b i j, b j d -> b i d', attn, v)
    return out

class Attention(nn.Module):
    def __init__(
        self,
        dim,
        dim_head = 64,
        heads = 8,
        dropout = 0.
    ):
        super().__init__()
        self.heads = heads
        self.scale = dim_head ** -0.5
        inner_dim = dim_head * heads

        self.to_qkv = nn.Linear(dim, inner_dim * 3, bias = False)
        self.to_out = nn.Sequential(
            nn.Linear(inner_dim, dim),
            nn.Dropout(dropout)
        )

    def forward(self, x, einops_from, einops_to, **einops_dims):
        h = self.heads
        q, k, v = self.to_qkv(x).chunk(3, dim = -1)
        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h = h), (q, k, v))

        # q *= self.scale()

        # splice out classification token at index 1
        (cls_q, q_), (cls_k, k_), (cls_v, v_) = map(lambda t: (t[:, 0:1], t[:, 1:]), (q, k, v))

        # let classification token attend to key / values of all patches across time and space
        cls_out = attn(cls_q, k, v)

        # rearrange across time or space
        q_, k_, v_ = map(lambda t: rearrange(t, f'{einops_from} -> {einops_to}', **einops_dims), (q_, k_, v_))

        # expand cls token keys and values across time or space and concat
        r = q_.shape[0] // cls_k.shape[0]
        cls_k, cls_v = map(lambda t: repeat(t, 'b () d -> (b r) () d', r = r), (cls_k, cls_v))

        k_ = torch.cat((cls_k, k_), dim = 1)
        v_ = torch.cat((cls_v, v_), dim = 1)

        # attention
        out = attn(q_, k_, v_)

        # merge back time or space
        out = rearrange(out, f'{einops_to} -> {einops_from}', **einops_dims)

        # concat back the cls token
        out = torch.cat((cls_out, out), dim = 1)

        # merge back the heads
        out = rearrange(out, '(b h) n d -> b n (h d)', h = h)

        # combine heads out
        return self.to_out(out)
class TimeSformer(nn.Module):
    def __init__(
        self,
        *,
        dim,
        num_frames,
        num_classes,
        image_size = 224,
        patch_size = 16,
        channels = 3,
        depth = 12,
        heads = 8,
        dim_head = 64,
        attn_dropout = 0.,
        ff_dropout = 0.
    ):
        super().__init__()
        assert image_size % patch_size == 0, 'Image dimensions must be divisible by the patch size.'

        num_patches = (image_size // patch_size) ** 2
        num_positions = num_frames * num_patches
        patch_dim = channels * patch_size ** 2

        self.patch_size = patch_size
        self.to_patch_embedding = nn.Linear(patch_dim, dim)
        self.pos_emb = nn.Embedding(num_positions + 1, dim)
        self.cls_token = nn.Parameter(torch.randn(1, dim))

        self.layers = nn.ModuleList([])
        for _ in range(depth):
            self.layers.append(nn.ModuleList([
                PreNorm(dim, Attention(dim, dim_head = dim_head, heads = heads, dropout = attn_dropout)), # Time attention
                PreNorm(dim, Attention(dim, dim_head = dim_head, heads = heads, dropout = attn_dropout)), # Spatial attention
                PreNorm(dim, FeedForward(dim, dropout = ff_dropout)) # Feed Forward
            ]))

        self.to_out = nn.Sequential(
            nn.LayerNorm(dim),
            nn.Linear(dim, num_classes)
        )

    def forward(self, video):
        b, f, _, h, w, *_, device, p = *video.shape, video.device, self.patch_size
        assert h % p == 0 and w % p == 0, f'height {h} and width {w} of video must be divisible by the patch size {p}'

        n = (h // p) * (w // p)
        video = rearrange(video, 'b f c (h p1) (w p2) -> b (f h w) (p1 p2 c)', p1 = p, p2 = p)
        
        tokens = self.to_patch_embedding(video)

        cls_token = repeat(self.cls_token, 'n d -> b n d', b = b)
        x =  torch.cat((cls_token, tokens), dim = 1)
        x += self.pos_emb(torch.arange(x.shape[1], device = device))

        for (time_attn, spatial_attn, ff) in self.layers:
            x = time_attn(x, 'b (f n) d', '(b n) f d', n = n) + x
            x = spatial_attn(x, 'b (f n) d', '(b f) n d', f = f) + x
            x = ff(x) + x

        cls_token = x[:, 0]
        
        return self.to_out(cls_token)

#Vidextract

In [None]:
import torch
import cv2
import numpy as np
import os
import math
import torch.optim as optim
FRAMES_INTERVAL = 10
def vidextract(DATA_DIR, start, end):
  preview = []
  videos = []
  audios = []
  labels = []
  # GET MIN VIDEO FRAME PER VIDEO: Videos may have different shapes
  min_video_frames = math.inf 

  for video_file_name in os.listdir(DATA_DIR):
    # emoname = video_file_name[:2]
    # if emoname in emotoval:
    vidcap  = cv2.VideoCapture(DATA_DIR + video_file_name)
    length = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
    # print(length)
    if length < min_video_frames:
      min_video_frames = length
  print(min_video_frames)
  count = 0
  # EXTRACT SOME FRAMES FROM VIDEOS
  for video_file_name in os.listdir(DATA_DIR):
    if count>= start and count <end:
      
      print("Loading " + video_file_name + "...")
      # label1 = [0 for i in range(lenemo)]
      print(video_file_name[5:9])
      painname = video_file_name[:2]
      if video_file_name[5:9] == 'Pain':
        labels.append([1,0])
      else:
        labels.append([0,1])


      vidcap  = cv2.VideoCapture(DATA_DIR + video_file_name)
      success, image = vidcap.read()
      considered_frames_counter = 0
      frames = []
      while success:    

        if considered_frames_counter == int(min_video_frames / 10) - 1:
          break

        success,image = vidcap.read()
        if considered_frames_counter == FRAMES_INTERVAL:
          preview.append((video_file_name, cv2.resize(image, (224,224))))
        if success and considered_frames_counter % FRAMES_INTERVAL == 0:
          # print(considered_frames_counter,'hi')
          image = np.transpose(np.asarray(cv2.resize(image, (224,224))), (2, 0, 1))
          frames.append(image)
        
        if success:
          considered_frames_counter += 1

      videos.append(frames)
    elif count >= end:
      break
    count += 1
    # print(count, considered_frames_counter)
              
  # audio = torch.tensor(np.asarray(videos)).float()
  video = torch.tensor(np.asarray(videos)).float() # (batch x frames x channels x height x width)
  print(video.shape)
  del videos
  return video,labels


#video 1 : 0-5

In [None]:
vidpain , vidpainlabel = vidextract("/content/drive/MyDrive/dataset/pain complete/mix data/",0,5)

600
Loading S001_Friction_1_[0]_20s.mp4...
Fric
Loading S001_Rest_1_[0]_20s.mp4...
Rest
Loading S001_Rest_2_[0]_20s.mp4...
Rest
Loading S001_Rest_2_[2]_20s.mp4...
Rest
Loading S002_Friction_1_[0]_20s.mp4...
Fric
torch.Size([5, 6, 3, 224, 224])


In [None]:
video = vidpain
DIM = 224
IMAGE_SIZE = 224
PATCH_SIZE = 16
NUM_CLASSES = 2
NUM_FRAMES = 20
DEPTH = 12
HEADS = 8
DIM_HEAD = 64
ATTN_DROPOUT = 0.1
FF_DROPOUT = 0.1
ITERATIONS = 10
model = torch.nn.Sequential(
    TimeSformer(dim = DIM, image_size = IMAGE_SIZE, patch_size = PATCH_SIZE, num_frames = NUM_FRAMES, num_classes = NUM_CLASSES, depth = DEPTH, heads = HEADS, dim_head = DIM_HEAD, attn_dropout = ATTN_DROPOUT, ff_dropout = FF_DROPOUT),
    nn.Softmax(dim=1)
)

loss_fn = torch.nn.BCELoss()
labels = torch.FloatTensor(vidpainlabel)#[[0.4, 0.6] for i in range(len(video))]) # Add here your own labels

learning_rate = 1e-4
for t in range(15):#ITERATIONS):
  y_pred = model(video)

  loss = loss_fn(y_pred, torch.tensor(labels))
  print("#" + str(t), " loss:" + str(loss.item()))
  
  model.zero_grad()
  loss.backward()
  with torch.no_grad():
      for param in model.parameters():
          param -= learning_rate * param.grad




#0  loss:1.6338802576065063
#1  loss:1.5756819248199463
#2  loss:1.415379524230957
#3  loss:1.2777669429779053
#4  loss:1.2754961252212524
#5  loss:1.1294617652893066
#6  loss:1.1601109504699707
#7  loss:0.8977737426757812
#8  loss:0.8946170806884766
#9  loss:0.845363438129425
#10  loss:0.7245543599128723
#11  loss:0.5927243828773499
#12  loss:0.5107002854347229
#13  loss:0.5726728439331055
#14  loss:0.6309493184089661


In [None]:
PATH = '/content/drive/MyDrive/dataset/pain complete/iCOPEvid/painvid1.pth'
torch.save(model.state_dict(), PATH, 0,5)

#video 2 : 85-90

In [None]:
vidpain , vidpainlabel = vidextract("/content/drive/MyDrive/dataset/pain complete/mix data/", 85,90)

600
Loading S038_Pain_1_[0]_20s.mp4...
Pain
Loading S037_Pain_1_[0]_20s.mp4...
Pain
Loading S039_Pain_1_[0]_20s.mp4...
Pain
Loading S040_Pain_1_[0]_20s.mp4...
Pain
Loading S041_Pain_1_[0]_20s.mp4...
Pain
torch.Size([5, 6, 3, 224, 224])


In [None]:
video = vidpain
# DIM = 224
# IMAGE_SIZE = 224
# PATCH_SIZE = 16
# NUM_CLASSES = 2
# NUM_FRAMES = 20
# DEPTH = 12
# HEADS = 8
# DIM_HEAD = 64
# ATTN_DROPOUT = 0.1
# FF_DROPOUT = 0.1
# ITERATIONS = 10
# model = torch.nn.Sequential(
#     TimeSformer(dim = DIM, image_size = IMAGE_SIZE, patch_size = PATCH_SIZE, num_frames = NUM_FRAMES, num_classes = NUM_CLASSES, depth = DEPTH, heads = HEADS, dim_head = DIM_HEAD, attn_dropout = ATTN_DROPOUT, ff_dropout = FF_DROPOUT),
#     nn.Softmax(dim=1)
# )
# #load older
# PATH = '/content/drive/MyDrive/dataset/pain complete/iCOPEvid/painvid1.pth'
# chkpnt = torch.load(PATH)
# model.load_state_dict(chkpnt)

loss_fn = torch.nn.BCELoss()
labels = torch.FloatTensor(vidpainlabel)#[[0.4, 0.6] for i in range(len(video))]) # Add here your own labels

learning_rate = 1e-4
for t in range(15):#ITERATIONS):
  y_pred = model(video)

  loss = loss_fn(y_pred, torch.tensor(labels))
  print("#" + str(t), " loss:" + str(loss.item()))
  
  model.zero_grad()
  loss.backward()
  with torch.no_grad():
      for param in model.parameters():
          param -= learning_rate * param.grad




#0  loss:0.9451214075088501
#1  loss:0.9577410817146301
#2  loss:0.8504666090011597
#3  loss:0.7201770544052124
#4  loss:0.6832591891288757
#5  loss:0.6455307006835938
#6  loss:0.6098749041557312
#7  loss:0.5796276926994324
#8  loss:0.47455033659935
#9  loss:0.45027822256088257
#10  loss:0.4838547110557556
#11  loss:0.43895095586776733
#12  loss:0.40717965364456177
#13  loss:0.367914617061615
#14  loss:0.3593064546585083


In [None]:
PATH = '/content/drive/MyDrive/dataset/pain complete/iCOPEvid/painvid2.pth'
torch.save(model.state_dict(), PATH)

#video 3 : 7-14

In [None]:
vidpain , vidpainlabel = vidextract("/content/drive/MyDrive/dataset/pain complete/mix data/", 7,14)
video = vidpain
# DIM = 224
# IMAGE_SIZE = 224
# PATCH_SIZE = 16
# NUM_CLASSES = 2
# NUM_FRAMES = 20
# DEPTH = 12
# HEADS = 8
# DIM_HEAD = 64
# ATTN_DROPOUT = 0.1
# FF_DROPOUT = 0.1
# ITERATIONS = 10
# model = torch.nn.Sequential(
#     TimeSformer(dim = DIM, image_size = IMAGE_SIZE, patch_size = PATCH_SIZE, num_frames = NUM_FRAMES, num_classes = NUM_CLASSES, depth = DEPTH, heads = HEADS, dim_head = DIM_HEAD, attn_dropout = ATTN_DROPOUT, ff_dropout = FF_DROPOUT),
#     nn.Softmax(dim=1)
# )
# #load older
# PATH = '/content/drive/MyDrive/dataset/pain complete/iCOPEvid/painvid2.pth'
# chkpnt = torch.load(PATH)
# model.load_state_dict(chkpnt)

loss_fn = torch.nn.BCELoss()
labels = torch.FloatTensor(vidpainlabel)#[[0.4, 0.6] for i in range(len(video))]) # Add here your own labels

learning_rate = 1e-4
for t in range(15):#ITERATIONS):
  y_pred = model(video)

  loss = loss_fn(y_pred, torch.tensor(labels))
  print("#" + str(t), " loss:" + str(loss.item()))
  
  model.zero_grad()
  loss.backward()
  with torch.no_grad():
      for param in model.parameters():
          param -= learning_rate * param.grad


600
Loading S003_Rest_1_[0]_20s.mp4...
Rest
Loading S003_Rest_2_[0]_20s.mp4...
Rest
Loading S003_Friction_1_[0]_20s.mp4...
Fric
Loading S004_Move_1_[0]_20s.mp4...
Move
Loading S004_Friction_1_[0]_20s.mp4...
Fric
Loading S004_Rest_2_[0]_20s.mp4...
Rest
Loading S004_Rest_1_[1]_20s.mp4...
Rest
torch.Size([7, 6, 3, 224, 224])




#0  loss:1.2379896640777588
#1  loss:1.1322221755981445
#2  loss:1.1348035335540771
#3  loss:1.0067535638809204
#4  loss:0.8581860661506653
#5  loss:0.8210849165916443
#6  loss:0.8002451658248901
#7  loss:0.7872864603996277
#8  loss:0.6532102227210999
#9  loss:0.6351861953735352
#10  loss:0.5920183062553406
#11  loss:0.5333684086799622
#12  loss:0.5208831429481506
#13  loss:0.46817827224731445
#14  loss:0.4285447597503662


In [None]:
PATH = '/content/drive/MyDrive/dataset/pain complete/iCOPEvid/painvid3.pth'
torch.save(model.state_dict(), PATH)

#video 4 : 14-21

In [None]:
vidpain , vidpainlabel = vidextract("/content/drive/MyDrive/dataset/pain complete/mix data/", 14,21)
video = vidpain
# DIM = 224
# IMAGE_SIZE = 224
# PATCH_SIZE = 16
# NUM_CLASSES = 2
# NUM_FRAMES = 20
# DEPTH = 12
# HEADS = 8
# DIM_HEAD = 64
# ATTN_DROPOUT = 0.1
# FF_DROPOUT = 0.1
# ITERATIONS = 10
# model = torch.nn.Sequential(
#     TimeSformer(dim = DIM, image_size = IMAGE_SIZE, patch_size = PATCH_SIZE, num_frames = NUM_FRAMES, num_classes = NUM_CLASSES, depth = DEPTH, heads = HEADS, dim_head = DIM_HEAD, attn_dropout = ATTN_DROPOUT, ff_dropout = FF_DROPOUT),
#     nn.Softmax(dim=1)
# )
# #load older
# PATH = '/content/drive/MyDrive/dataset/pain complete/iCOPEvid/painvid2.pth'
# chkpnt = torch.load(PATH)
# model.load_state_dict(chkpnt)

loss_fn = torch.nn.BCELoss()
labels = torch.FloatTensor(vidpainlabel)#[[0.4, 0.6] for i in range(len(video))]) # Add here your own labels

learning_rate = 1e-4
for t in range(15):#ITERATIONS):
  y_pred = model(video)

  loss = loss_fn(y_pred, torch.tensor(labels))
  print("#" + str(t), " loss:" + str(loss.item()))
  
  model.zero_grad()
  loss.backward()
  with torch.no_grad():
      for param in model.parameters():
          param -= learning_rate * param.grad
PATH = '/content/drive/MyDrive/dataset/pain complete/iCOPEvid/painvid4.pth'
torch.save(model.state_dict(), PATH)

600
Loading S004_Rest_1_[0]_20s.mp4...
Rest
Loading S005_Friction_1_[0]_20s.mp4...
Fric
Loading S005_Rest_2_[0]_20s.mp4...
Rest
Loading S005_Rest_1_[1]_20s.mp4...
Rest
Loading S005_Rest_1_[0]_20s.mp4...
Rest
Loading S006_Friction_1_[0]_20s.mp4...
Fric
Loading S005_Rest_2_[1]_20s.mp4...
Rest
torch.Size([7, 6, 3, 224, 224])




#0  loss:0.4369902014732361
#1  loss:0.3651163876056671
#2  loss:0.3975377678871155
#3  loss:0.31286337971687317
#4  loss:0.35004106163978577
#5  loss:0.28338876366615295
#6  loss:0.2927102744579315
#7  loss:0.2791118919849396
#8  loss:0.2506631016731262
#9  loss:0.2473168671131134
#10  loss:0.24679718911647797
#11  loss:0.22811639308929443
#12  loss:0.24174390733242035
#13  loss:0.22851495444774628
#14  loss:0.21194446086883545


#video 5 : 49-56

In [None]:
vidpain , vidpainlabel = vidextract("/content/drive/MyDrive/dataset/pain complete/mix data/", 49,56)
video = vidpain
# DIM = 224
# IMAGE_SIZE = 224
# PATCH_SIZE = 16
# NUM_CLASSES = 2
# NUM_FRAMES = 20
# DEPTH = 12
# HEADS = 8
# DIM_HEAD = 64
# ATTN_DROPOUT = 0.1
# FF_DROPOUT = 0.1
# ITERATIONS = 10
# model = torch.nn.Sequential(
#     TimeSformer(dim = DIM, image_size = IMAGE_SIZE, patch_size = PATCH_SIZE, num_frames = NUM_FRAMES, num_classes = NUM_CLASSES, depth = DEPTH, heads = HEADS, dim_head = DIM_HEAD, attn_dropout = ATTN_DROPOUT, ff_dropout = FF_DROPOUT),
#     nn.Softmax(dim=1)
# )
# #load older
# PATH = '/content/drive/MyDrive/dataset/pain complete/iCOPEvid/painvid2.pth'
# chkpnt = torch.load(PATH)
# model.load_state_dict(chkpnt)

loss_fn = torch.nn.BCELoss()
labels = torch.FloatTensor(vidpainlabel)#[[0.4, 0.6] for i in range(len(video))]) # Add here your own labels

learning_rate = 1e-4
for t in range(15):#ITERATIONS):
  y_pred = model(video)

  loss = loss_fn(y_pred, torch.tensor(labels))
  print("#" + str(t), " loss:" + str(loss.item()))
  
  model.zero_grad()
  loss.backward()
  with torch.no_grad():
      for param in model.parameters():
          param -= learning_rate * param.grad
PATH = '/content/drive/MyDrive/dataset/pain complete/iCOPEvid/painvid5.pth'
torch.save(model.state_dict(), PATH)

600
Loading S001_Pain_1_[0]_20s.mp4...
Pain
Loading S002_Pain_1_[0]_20s.mp4...
Pain
Loading S003_Pain_1_[0]_20s.mp4...
Pain
Loading S004_Pain_1_[0]_20s.mp4...
Pain
Loading S005_Pain_1_[0]_20s.mp4...
Pain
Loading S009_Pain_1_[0]_20s.mp4...
Pain
Loading S006_Pain_1_[0]_20s.mp4...
Pain
torch.Size([7, 6, 3, 224, 224])




#0  loss:1.6582930088043213
#1  loss:1.616240382194519
#2  loss:1.4291915893554688
#3  loss:1.2625181674957275
#4  loss:1.18136465549469
#5  loss:1.2060812711715698
#6  loss:0.932726263999939
#7  loss:0.9179891347885132
#8  loss:0.8857330083847046
#9  loss:0.8180720210075378
#10  loss:0.7458814382553101
#11  loss:0.6794313788414001
#12  loss:0.6606083512306213
#13  loss:0.5868210196495056
#14  loss:0.5977506637573242


#video 6 : 21-30

In [None]:
vidpain , vidpainlabel = vidextract("/content/drive/MyDrive/dataset/pain complete/mix data/", 21,30)
video = vidpain
# DIM = 224
# IMAGE_SIZE = 224
# PATCH_SIZE = 16
# NUM_CLASSES = 2
# NUM_FRAMES = 20
# DEPTH = 12
# HEADS = 8
# DIM_HEAD = 64
# ATTN_DROPOUT = 0.1
# FF_DROPOUT = 0.1
# ITERATIONS = 10
# model = torch.nn.Sequential(
#     TimeSformer(dim = DIM, image_size = IMAGE_SIZE, patch_size = PATCH_SIZE, num_frames = NUM_FRAMES, num_classes = NUM_CLASSES, depth = DEPTH, heads = HEADS, dim_head = DIM_HEAD, attn_dropout = ATTN_DROPOUT, ff_dropout = FF_DROPOUT),
#     nn.Softmax(dim=1)
# )
# #load older
# PATH = '/content/drive/MyDrive/dataset/pain complete/iCOPEvid/painvid2.pth'
# chkpnt = torch.load(PATH)
# model.load_state_dict(chkpnt)

loss_fn = torch.nn.BCELoss()
labels = torch.FloatTensor(vidpainlabel)#[[0.4, 0.6] for i in range(len(video))]) # Add here your own labels

learning_rate = 1e-4
for t in range(15):#ITERATIONS):
  y_pred = model(video)

  loss = loss_fn(y_pred, torch.tensor(labels))
  print("#" + str(t), " loss:" + str(loss.item()))
  
  model.zero_grad()
  loss.backward()
  with torch.no_grad():
      for param in model.parameters():
          param -= learning_rate * param.grad
PATH = '/content/drive/MyDrive/dataset/pain complete/iCOPEvid/painvid6.pth'
torch.save(model.state_dict(), PATH)

600
Loading S006_Rest_1_[0]_20s.mp4...
Rest
Loading S006_Rest_2_[0]_20s.mp4...
Rest
Loading S007_Friction_1_[0]_20s.mp4...
Fric
Loading S007_Move_2_[0]_20s.mp4...
Move
Loading S007_Rest_1_[0]_20s.mp4...
Rest
Loading S007_Rest_1_[1]_20s.mp4...
Rest
Loading S008_Move_1_[0]_20s.mp4...
Move
Loading S008_Rest_1_[0]_20s.mp4...
Rest
Loading S008_Friction_1_[0]_20s.mp4...
Fric
torch.Size([9, 6, 3, 224, 224])




#0  loss:0.9765403270721436
#1  loss:0.8126559853553772
#2  loss:0.8205122351646423
#3  loss:0.7645125985145569
#4  loss:0.6128836870193481
#5  loss:0.6453857421875
#6  loss:0.5892297029495239
#7  loss:0.5206518769264221
#8  loss:0.5768120288848877
#9  loss:0.4762381911277771
#10  loss:0.4812524616718292
#11  loss:0.39502573013305664
#12  loss:0.4029880166053772
#13  loss:0.36511510610580444
#14  loss:0.3465440273284912


#video 7 : 30-40

In [None]:
vidpain , vidpainlabel = vidextract("/content/drive/MyDrive/dataset/pain complete/mix data/", 30,40)
video = vidpain
# DIM = 224
# IMAGE_SIZE = 224
# PATCH_SIZE = 16
# NUM_CLASSES = 2
# NUM_FRAMES = 20
# DEPTH = 12
# HEADS = 8
# DIM_HEAD = 64
# ATTN_DROPOUT = 0.1
# FF_DROPOUT = 0.1
# ITERATIONS = 10
# model = torch.nn.Sequential(
#     TimeSformer(dim = DIM, image_size = IMAGE_SIZE, patch_size = PATCH_SIZE, num_frames = NUM_FRAMES, num_classes = NUM_CLASSES, depth = DEPTH, heads = HEADS, dim_head = DIM_HEAD, attn_dropout = ATTN_DROPOUT, ff_dropout = FF_DROPOUT),
#     nn.Softmax(dim=1)
# )
# #load older
# PATH = '/content/drive/MyDrive/dataset/pain complete/iCOPEvid/painvid2.pth'
# chkpnt = torch.load(PATH)
# model.load_state_dict(chkpnt)

loss_fn = torch.nn.BCELoss()
labels = torch.FloatTensor(vidpainlabel)#[[0.4, 0.6] for i in range(len(video))]) # Add here your own labels

learning_rate = 1e-4
for t in range(15):#ITERATIONS):
  y_pred = model(video)

  loss = loss_fn(y_pred, torch.tensor(labels))
  print("#" + str(t), " loss:" + str(loss.item()))
  
  model.zero_grad()
  loss.backward()
  with torch.no_grad():
      for param in model.parameters():
          param -= learning_rate * param.grad
PATH = '/content/drive/MyDrive/dataset/pain complete/iCOPEvid/painvid7.pth'
torch.save(model.state_dict(), PATH)

600
Loading S009_Rest_1_[0]_20s.mp4...
Rest
Loading S009_Move_1_[0]_20s.mp4...
Move
Loading S009_Friction_1_[0]_20s.mp4...
Fric
Loading S009_Rest_3_[0]_20s.mp4...
Rest
Loading S009_Rest_1_[1]_20s.mp4...
Rest
Loading S010_Friction_1_[0]_20s.mp4...
Fric
Loading S010_Rest_1_[0]_20s.mp4...
Rest
Loading S010_Rest_2_[0]_20s.mp4...
Rest
Loading S011_Friction_1_[0]_20s.mp4...
Fric
Loading S011_Rest_1_[0]_20s.mp4...
Rest
torch.Size([10, 6, 3, 224, 224])




#0  loss:0.34249788522720337
#1  loss:0.3073200583457947
#2  loss:0.32910141348838806
#3  loss:0.28698545694351196
#4  loss:0.2992827594280243
#5  loss:0.27069327235221863
#6  loss:0.23652800917625427
#7  loss:0.24706265330314636
#8  loss:0.2331760674715042
#9  loss:0.20456448197364807
#10  loss:0.21044783294200897
#11  loss:0.23726443946361542
#12  loss:0.19461467862129211
#13  loss:0.20005011558532715
#14  loss:0.20513787865638733


#video 8 : 56-66

In [None]:
vidpain , vidpainlabel = vidextract("/content/drive/MyDrive/dataset/pain complete/mix data/", 56,66)
video = vidpain
# DIM = 224
# IMAGE_SIZE = 224
# PATCH_SIZE = 16
# NUM_CLASSES = 2
# NUM_FRAMES = 20
# DEPTH = 12
# HEADS = 8
# DIM_HEAD = 64
# ATTN_DROPOUT = 0.1
# FF_DROPOUT = 0.1
# ITERATIONS = 10
# model = torch.nn.Sequential(
#     TimeSformer(dim = DIM, image_size = IMAGE_SIZE, patch_size = PATCH_SIZE, num_frames = NUM_FRAMES, num_classes = NUM_CLASSES, depth = DEPTH, heads = HEADS, dim_head = DIM_HEAD, attn_dropout = ATTN_DROPOUT, ff_dropout = FF_DROPOUT),
#     nn.Softmax(dim=1)
# )
# #load older
# PATH = '/content/drive/MyDrive/dataset/pain complete/iCOPEvid/painvid2.pth'
# chkpnt = torch.load(PATH)
# model.load_state_dict(chkpnt)

loss_fn = torch.nn.BCELoss()
labels = torch.FloatTensor(vidpainlabel)#[[0.4, 0.6] for i in range(len(video))]) # Add here your own labels

learning_rate = 1e-4
for t in range(15):#ITERATIONS):
  y_pred = model(video)

  loss = loss_fn(y_pred, torch.tensor(labels))
  print("#" + str(t), " loss:" + str(loss.item()))
  
  model.zero_grad()
  loss.backward()
  with torch.no_grad():
      for param in model.parameters():
          param -= learning_rate * param.grad
PATH = '/content/drive/MyDrive/dataset/pain complete/iCOPEvid/painvid8.pth'
torch.save(model.state_dict(), PATH)

600
Loading S007_Pain_1_[0]_20s.mp4...
Pain
Loading S008_Pain_1_[0]_20s.mp4...
Pain
Loading S013_Pain_1_[0]_20s.mp4...
Pain
Loading S012_Pain_1_[0]_20s.mp4...
Pain
Loading S010_Pain_1_[0]_20s.mp4...
Pain
Loading S011_Pain_1_[0]_20s.mp4...
Pain
Loading S015_Pain_1_[0]_20s.mp4...
Pain
Loading S014_Pain_1_[0]_20s.mp4...
Pain
Loading S016_Pain_1_[0]_20s.mp4...
Pain
Loading S017_Pain_1_[0]_20s.mp4...
Pain
torch.Size([10, 6, 3, 224, 224])




#0  loss:1.8368104696273804
#1  loss:1.5673831701278687
#2  loss:1.434472918510437
#3  loss:1.3366107940673828
#4  loss:1.2632849216461182
#5  loss:1.1441583633422852
#6  loss:1.0352413654327393
#7  loss:1.0363061428070068
#8  loss:0.9128656387329102
#9  loss:0.8450150489807129
#10  loss:0.779040515422821
#11  loss:0.7399762272834778
#12  loss:0.6612304449081421
#13  loss:0.6191798448562622
#14  loss:0.5482053756713867


#video 9 : 40-49

In [None]:
vidpain , vidpainlabel = vidextract("/content/drive/MyDrive/dataset/pain complete/mix data/", 40,49)
video = vidpain
# DIM = 224
# IMAGE_SIZE = 224
# PATCH_SIZE = 16
# NUM_CLASSES = 2
# NUM_FRAMES = 20
# DEPTH = 12
# HEADS = 8
# DIM_HEAD = 64
# ATTN_DROPOUT = 0.1
# FF_DROPOUT = 0.1
# ITERATIONS = 10
# model = torch.nn.Sequential(
#     TimeSformer(dim = DIM, image_size = IMAGE_SIZE, patch_size = PATCH_SIZE, num_frames = NUM_FRAMES, num_classes = NUM_CLASSES, depth = DEPTH, heads = HEADS, dim_head = DIM_HEAD, attn_dropout = ATTN_DROPOUT, ff_dropout = FF_DROPOUT),
#     nn.Softmax(dim=1)
# )
# #load older
# PATH = '/content/drive/MyDrive/dataset/pain complete/iCOPEvid/painvid2.pth'
# chkpnt = torch.load(PATH)
# model.load_state_dict(chkpnt)

loss_fn = torch.nn.BCELoss()
labels = torch.FloatTensor(vidpainlabel)#[[0.4, 0.6] for i in range(len(video))]) # Add here your own labels

learning_rate = 1e-4
for t in range(15):#ITERATIONS):
  y_pred = model(video)

  loss = loss_fn(y_pred, torch.tensor(labels))
  print("#" + str(t), " loss:" + str(loss.item()))
  
  model.zero_grad()
  loss.backward()
  with torch.no_grad():
      for param in model.parameters():
          param -= learning_rate * param.grad
PATH = '/content/drive/MyDrive/dataset/pain complete/iCOPEvid/painvid9.pth'
torch.save(model.state_dict(), PATH)

600
Loading S011_Rest_1_[1]_20s.mp4...
Rest
Loading S012_Friction_1_[0]_20s.mp4...
Fric
Loading S012_Move_1_[1]_20s.mp4...
Move
Loading S012_Rest_1_[0]_20s.mp4...
Rest
Loading S012_Rest_1_[1]_20s.mp4...
Rest
Loading S012_Rest_2_[0]_20s.mp4...
Rest
Loading S013_Friction_1_[0]_20s.mp4...
Fric
Loading S013_Move_1_[0]_20s.mp4...
Move
Loading S013_Rest_2_[0]_20s.mp4...
Rest
torch.Size([9, 6, 3, 224, 224])




#0  loss:0.9391663074493408
#1  loss:0.8125343322753906
#2  loss:0.7035319209098816
#3  loss:0.7084596157073975
#4  loss:0.676719069480896
#5  loss:0.5737939476966858
#6  loss:0.47496297955513
#7  loss:0.5209059119224548
#8  loss:0.4407774806022644
#9  loss:0.45410141348838806
#10  loss:0.42635631561279297
#11  loss:0.4008125066757202
#12  loss:0.379938006401062
#13  loss:0.3719461262226105
#14  loss:0.37133315205574036


#video 10 : 66-76

In [None]:
vidpain , vidpainlabel = vidextract("/content/drive/MyDrive/dataset/pain complete/mix data/", 66,76)
video = vidpain
DIM = 224
IMAGE_SIZE = 224
PATCH_SIZE = 16
NUM_CLASSES = 2
NUM_FRAMES = 20
DEPTH = 12
HEADS = 8
DIM_HEAD = 64
ATTN_DROPOUT = 0.1
FF_DROPOUT = 0.1
ITERATIONS = 10
model = torch.nn.Sequential(
    TimeSformer(dim = DIM, image_size = IMAGE_SIZE, patch_size = PATCH_SIZE, num_frames = NUM_FRAMES, num_classes = NUM_CLASSES, depth = DEPTH, heads = HEADS, dim_head = DIM_HEAD, attn_dropout = ATTN_DROPOUT, ff_dropout = FF_DROPOUT),
    nn.Softmax(dim=1)
)
#load older
PATH = '/content/drive/MyDrive/dataset/pain complete/iCOPEvid/painvid9.pth'
chkpnt = torch.load(PATH)
model.load_state_dict(chkpnt)

loss_fn = torch.nn.BCELoss()
labels = torch.FloatTensor(vidpainlabel)#[[0.4, 0.6] for i in range(len(video))]) # Add here your own labels

learning_rate = 1e-4
for t in range(15):#ITERATIONS):
  y_pred = model(video)

  loss = loss_fn(y_pred, torch.tensor(labels))
  print("#" + str(t), " loss:" + str(loss.item()))
  
  model.zero_grad()
  loss.backward()
  with torch.no_grad():
      for param in model.parameters():
          param -= learning_rate * param.grad
PATH = '/content/drive/MyDrive/dataset/pain complete/iCOPEvid/painvid10.pth'
torch.save(model.state_dict(), PATH)

600
Loading S018_Pain_1_[0]_20s.mp4...
Pain
Loading S019_Pain_1_[0]_20s.mp4...
Pain
Loading S020_Pain_1_[0]_20s.mp4...
Pain
Loading S021_Pain_1_[0]_20s.mp4...
Pain
Loading S022_Pain_1_[0]_20s.mp4...
Pain
Loading S023_Pain_1_[0]_20s.mp4...
Pain
Loading S026_Pain_1_[0]_20s.mp4...
Pain
Loading S027_Pain_1_[0]_20s.mp4...
Pain
Loading S025_Pain_1_[0]_20s.mp4...
Pain
Loading S024_Pain_1_[0]_20s.mp4...
Pain
torch.Size([10, 6, 3, 224, 224])




#0  loss:1.4158822298049927
#1  loss:1.131632685661316
#2  loss:1.1168378591537476
#3  loss:0.975273609161377
#4  loss:0.9030499458312988
#5  loss:0.8576682806015015
#6  loss:0.7287939190864563
#7  loss:0.7666716575622559
#8  loss:0.6322994232177734
#9  loss:0.5989651083946228
#10  loss:0.5446217060089111
#11  loss:0.5603043437004089
#12  loss:0.523078203201294
#13  loss:0.48015671968460083
#14  loss:0.448465496301651


#EVAL

In [None]:
DIM = 224
IMAGE_SIZE = 224
PATCH_SIZE = 16
NUM_CLASSES = 2
NUM_FRAMES = 20
DEPTH = 12
HEADS = 8
DIM_HEAD = 64
ATTN_DROPOUT = 0.1
FF_DROPOUT = 0.1
ITERATIONS = 10
model = torch.nn.Sequential(
    TimeSformer(dim = DIM, image_size = IMAGE_SIZE, patch_size = PATCH_SIZE, num_frames = NUM_FRAMES, num_classes = NUM_CLASSES, depth = DEPTH, heads = HEADS, dim_head = DIM_HEAD, attn_dropout = ATTN_DROPOUT, ff_dropout = FF_DROPOUT),
    nn.Softmax(dim=1)
)
#load older
PATH = '/content/drive/MyDrive/dataset/pain complete/iCOPEvid/painvid10.pth'
chkpnt = torch.load(PATH)
model.load_state_dict(chkpnt)


<All keys matched successfully>

In [None]:
vidpain , vidpainlabel = vidextract("/content/drive/MyDrive/dataset/pain complete/mix data/", 77,80)

600
Loading S030_Pain_1_[0]_20s.mp4...
Pain
Loading S031_Pain_1_[0]_20s.mp4...
Pain
Loading S029_Pain_1_[0]_20s.mp4...
Pain
torch.Size([3, 6, 3, 224, 224])


In [None]:
vidNopain , vidNopainlabel = vidextract("/content/drive/MyDrive/dataset/pain complete/mix data/", 5,10)

600
Loading S002_Rest_1_[0]_20s.mp4...
Rest
Loading S002_Rest_2_[0]_20s.mp4...
Rest
Loading S003_Rest_1_[0]_20s.mp4...
Rest
Loading S003_Rest_2_[0]_20s.mp4...
Rest
Loading S003_Friction_1_[0]_20s.mp4...
Fric
torch.Size([5, 6, 3, 224, 224])


In [None]:
testvid = torch.Tensor(np.concatenate((vidNopain, vidpain), axis =0)).float()
testlabel = vidNopainlabel + vidpainlabel

In [None]:
testpred = model(testvid)

In [None]:
outGT = np.asarray(testlabel)
outPRED = np.asarray(torch.Tensor.detach(testpred))
from sklearn.metrics import *
print('F1: {}'.format(f1_score(outGT, outPRED>.5, average="samples")))
print('Precision: {}'.format(precision_score(outGT, outPRED>.5, average="samples")))
print('Recall: {}'.format(recall_score(outGT, outPRED >.5, average="samples")))
print('Accuracy: {}'.format(accuracy_score(outGT, outPRED>.5)))

F1: 0.375
Precision: 0.375
Recall: 0.375
Accuracy: 0.375


In [None]:
for i in range(len(testpred)):
  print(testpred[i], testlabel[i] )

tensor([0.6861, 0.3139], grad_fn=<SelectBackward0>) [0, 1]
tensor([0.7124, 0.2876], grad_fn=<SelectBackward0>) [0, 1]
tensor([0.7000, 0.3000], grad_fn=<SelectBackward0>) [0, 1]
tensor([0.6060, 0.3940], grad_fn=<SelectBackward0>) [0, 1]
tensor([0.6604, 0.3396], grad_fn=<SelectBackward0>) [0, 1]
tensor([0.5837, 0.4163], grad_fn=<SelectBackward0>) [1, 0]
tensor([0.6970, 0.3030], grad_fn=<SelectBackward0>) [1, 0]
tensor([0.6428, 0.3572], grad_fn=<SelectBackward0>) [1, 0]


In [None]:
vidNopain , vidNopainlabel = vidextract("/content/drive/MyDrive/dataset/pain complete/mix data/", 5,7)

600
Loading S002_Rest_1_[0]_20s.mp4...
Rest
Loading S002_Rest_2_[0]_20s.mp4...
Rest
torch.Size([2, 6, 3, 224, 224])


In [None]:
vidNopainlabel

[[0, 1], [0, 1]]