In [21]:
import numpy as np
import os
import cv2
import os
#import pandas as pd
from torch.utils.data import Dataset
import torch
from PIL import Image
from torchvision import transforms,datasets
from model import Net

DATA_DIR= "../../../datashare/"


In [22]:
class CustomImageDataset(Dataset):
    def __init__(self,img_names ,img_dir, transform= transforms.ToTensor(), target_transform=None):
        self.img_labels =img_names
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels[idx])
        image=Image.open(img_path)
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image


class VideoFrameDataset(Dataset):
    def __init__(self,fold_num,f,top=True) -> None:
        super().__init__()
        self.fold_num=fold_num
        self.file_name=f
        self.gesture_mapping= {"G0":0,"G1":1,"G2":2,"G3":3,"G4":4,"G5":5}
        self.topOrSide="_"+("top" if top else "side")
        self.shape=[1280, 36,6]

        if f not in ['P032_tissue2.npy','P025_balloon2.npy']:
            data = np.load(DATA_DIR+"APAS/features/fold"+self.fold_num+"/"+f)
            data = np.transpose(data)
            
            n_f=data.shape[0]
            
            data_k = np.load(DATA_DIR+"APAS/kinematics_npy/"+f)
            data_k = np.transpose(data_k)
            k_f=data_k.shape[0]
            missing_frames_number = n_f-k_f
            if missing_frames_number > 0:
                missing_frames=np.random.choice(range(k_f),missing_frames_number)
                data_k=np.insert(data_k,missing_frames,[data_k[i,:] for i in missing_frames],axis=0) 
            elif missing_frames_number < 0:
                missing_frames=np.random.choice(range(n_f),-missing_frames_number)
                data=np.insert(data,missing_frames,[data[i,:] for i in missing_frames],axis=0)
             #frames_vec=torch.cat((torch.from_numpy(data),torch.from_numpy(data_k)),axis=0)
            data=torch.from_numpy(data)
            data_k=torch.from_numpy(data_k)
            labels=[]
            frames_path=DATA_DIR+"APAS/frames/"+f.split('.')[0]+f"{self.topOrSide}/"                  
            frames= sorted(os.listdir(frames_path))
            
            
            
            #normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
            #                         std=[0.229, 0.224, 0.225])

            framesdataset=CustomImageDataset(img_names=frames,img_dir=frames_path,
                                             transform=transforms.Compose([
            #        transforms.RandomHorizontalFlip(),
                    transforms.ToTensor(),
            #        normalize,
                ]))
           
            
            with open(DATA_DIR+"APAS/transcriptions_gestures/"+f.split('.')[0]+".txt", "r") as file:
                labels_data = file.readlines()
            for line in labels_data:
                values = line[:-1].split()
                if int(values[1]) > len(data):
                    ul = len(data)
                else:
                    ul = values[1]
                if int(values[0]) == 0:
                    start = 1
                else:
                    start = int(values[0])
                
                for i in range(start,int(ul)+1):
                    labels.append(self.gesture_mapping[values[2]])    
           
            missing_frames_number = len(labels)-len(framesdataset)
            if  missing_frames_number > 0:
                    missing_frames=np.random.choice(len(framesdataset),missing_frames_number)
                    for index in missing_frames:
                        framesdataset.img_labels.insert(index,framesdataset.img_labels[index])  
            self.vid=framesdataset
            self.f_e=data
            self.f_k=data_k 
            self.labels=labels
        print(f,len(framesdataset),len(labels))
        assert len(framesdataset) ==len(labels)
        
    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
       
        #return self.f_k[idx,:],self.f_e[idx,:],self.labels[idx]

        return self.vid[idx],self.f_k[idx,:],self.f_e[idx,:],self.labels[idx]

In [23]:
def load_model(model,path,args):
    model = model(*args)
    model.load_state_dict(torch.load(path))
    return model.eval()

In [24]:
foldNum="0"
top=True
videoname="P016_tissue1.npy"
weight_value=0.1

In [25]:

vid=VideoFrameDataset(f=videoname,fold_num=foldNum,top=top)
num_of_features_e,num_of_features_k,num_of_classes=vid.shape

net = Net(num_of_classes=num_of_classes,num_of_features_e=num_of_features_e,num_of_features_k=num_of_features_k,weight1=weight_value)


P016_tissue1.npy 6751 6751


In [7]:
def predict_with_video(model,dataset,fps=30):
    labels_dict=vid.gesture_mapping
    x,y,w,h = 0,0,400,25
    img_array=[]
    for data in dataset:
        frame,input_e,input_v,label=data
        height, width, layers = frame.shape
        size = (width, height)
        pred=model(input_e,input_v)
        labels_dict[pred]
        cv2.putText(img=frame, text="Pred:" +labels_dict[pred]+ "Label:" +labels_dict[label], org=(x + int(w / 15), y + int(h / 3)),
                fontFace=cv2.FONT_HERSHEY_DUPLEX,
                fontScale=0.5, color=(0, 0, 255), thickness=1)
        img_array.append(frame)
    out = cv2.VideoWriter(f'{videoname}.mp4', cv2.VideoWriter_fourcc(*'DIVX'), fps, size)

    for i in range(len(img_array)):
        out.write(img_array[i])
    out.release() 

In [8]:
a=np.load(DATA_DIR+"APAS"+"/kinematics_npy"+"/P018_balloon1.npy")

In [None]:

def predict(model,data_loader,frames_loader,labels_dict):
    x,y,w,h = 0,0,400,25
    img_array=[]
    for data,frame in zip(data_loader,frames_loader):
        input_e,input_v,label=data
        height, width, layers = frame.shape
        size = (width, height)
        pred=model(input_e,input_v)
        labels_dict[pred]
        cv2.putText(img=frame, text="Pred:" +labels_dict[pred]+ "Label:" +labels_dict[label], org=(x + int(w / 15), y + int(h / 3)),
                fontFace=cv2.FONT_HERSHEY_DUPLEX,
                fontScale=0.5, color=(0, 0, 255), thickness=1)
        img_array.append(frame)
    out = cv2.VideoWriter('P026_tissue1_new.mp4', cv2.VideoWriter_fourcc(*'DIVX'), 15, size)

    for i in range(len(img_array)):
        out.write(img_array[i])
    out.release()
            
    


In [3]:
def pred(model,filename):
    

SyntaxError: incomplete input (3476188823.py, line 1)

In [9]:
a.shape

(36, 3934)

In [5]:
b=np.load(DATA_DIR+"APAS"+"/features"+"/fold0"+"/P032_tissue2.npy")

In [7]:
b.shape

(1280, 6911)

Bad pipe message: %s [b't(\xb3\x11`\x9d\xea"\x12l\x90\xbe\xbag\x1f\xb8\xc4\x89 ~edt`\'\x0c\xa3\x18QO\xcb.\x9e\xd9\xcdlX\x88\xe3\x18\xca\xc5\xc8\xe8\x94\xc8"T\x04\xec\xdc\x00\x08\x13\x02\x13\x03\x13\x01\x00\xff\x01\x00\x00\x8f\x00\x00\x00\x0e\x00\x0c\x00\x00']
Bad pipe message: %s [b"\xa0^\xdb\xb64vDE\xf6\xe8\xdb\xce\xf6\xf6\xc2\x1a{\xea\x00\x00|\xc0,\xc00\x00\xa3\x00\x9f\xcc\xa9\xcc\xa8\xcc\xaa\xc0\xaf\xc0\xad\xc0\xa3\xc0\x9f\xc0]\xc0a\xc0W\xc0S\xc0+\xc0/\x00\xa2\x00\x9e\xc0\xae\xc0\xac\xc0\xa2\xc0\x9e\xc0\\\xc0`\xc0V\xc0R\xc0$\xc0(\x00k\x00j\xc0#\xc0'\x00g\x00@\xc0\n\xc0\x14\x009\x008\xc0\t\xc0\x13\x003\x002\x00\x9d\xc0\xa1\xc0\x9d\xc0Q\x00\x9c\xc0\xa0\xc0\x9c\xc0P\x00=\x00<\x005\x00/\x00\x9a\x00\x99\xc0\x07\xc0\x11\x00\x96\x00\x05\x00\xff\x01\x00\x00j\x00\x00\x00\x0e\x00\x0c\x00\x00\t127.0.0.1\x00\x0b"]
Bad pipe message: %s [b"\x7f\xca\xd9\xb4\x00\xc03\x96\rh\xf8\x07xK\xc5f\x16\xe2\x00\x00\xa6\xc0,\xc00\x00\xa3\x00\x9f\xcc\xa9\xcc\xa8\xcc\xaa\xc0\xaf\xc0\xad\xc0\xa3\xc0\x9f\xc0]\xc0a