# Extracting the JAAD images from videos and showing the crossing prediction

In [None]:
import numpy as np

from torch import from_numpy
from torch import cuda
from torch import no_grad
from torch import optim

from torch_geometric.data import Data
from torch_geometric.loader import DataLoader

import matplotlib.pyplot as plt
from matplotlib.figure import Figure
from matplotlib.backends.backend_agg import FigureCanvasAgg

%matplotlib inline

from Code.GNN import *
from Code.SkeletonsDataset import *
from Code.ModelTrainEvaluate import *

import cv2
from PIL import Image

## Loading the skeletons dataset

In [2]:
datasetName = 'JAAD'
subset = 'test'
poseEstimator = 'AlphaPose'
numberOfClasses = 2
info = 87
data_augm = ''

net = 'TGCN'
dropout = 0.5
num_epochs = 1000
batch_size = 10000

data_augmentation_name = '' if data_augm == None or data_augm == '' else '_' + data_augm
numberOfJoints = 25 if poseEstimator == 'OpenPose' else 18

dataset = SkeletonsDataset('Data/' + datasetName + '/' + subset + '_jaad_' + poseEstimator + '.csv',
                           normalization='minmax', target='cross', info=info,
                           remove_undetected=True, numberOfJoints=numberOfJoints)

In [3]:
videos_list = dataset.loadedData['video'].unique().tolist()

In [4]:
# First element of the dataset:
t0 = dataset[0]

# Node features:
t1 = t0.x_temporal[0]

# Number of nodes:
numberOfNodes = t1.shape[0]

# Number of dimensions of each node features:
embed_dim = t1.shape[1]

print('Number of nodes per skeleton:', numberOfNodes)
print('Number of features per node:', embed_dim)

Number of nodes per skeleton: 18
Number of features per node: 3


## Loading the trained model

In [5]:
modelName = net + '_' + poseEstimator + '_info=' + str(info) + '_dropout=' + str(dropout) + data_augmentation_name
modelName = modelName + '_epoch=' + str(num_epochs)

print('Loading model:', modelName)

device = torch.device('cuda')

model = SpatialTemporalGNN(embed_dim, numberOfClasses, numberOfNodes, net=net,
                           filterSize=embed_dim, dropout=dropout, batchSize=batch_size).to(device)

model.load_state_dict(torch.load('exportedModels/' + datasetName + '/' + modelName))

Loading model: TGCN_AlphaPose_info=87_dropout=0.5_epoch=1000


<All keys matched successfully>

In [6]:
loader = DataLoader(dataset, batch_size=batch_size)

predictions, groundtruth = predict(model, loader, device)

In [7]:
import os

os.makedirs("Frames_results/" + datasetName + '/' + poseEstimator + "/" + subset + "/" + modelName, exist_ok=True)

## Making the crossing/not-crossing prediction

In [8]:
datasetName = 'JAAD'
subset = 'test'
poseEstimator = 'AlphaPose'
numberOfClasses = 2
info = 87
data_augm = 'dataAugmentation4'

net = 'TGCN'
dropout = 0.5
num_epochs = 1000
batch_size = 10000

data_augmentation_name = '' if data_augm == None or data_augm == '' else '_' + data_augm
numberOfJoints = 25 if poseEstimator == 'OpenPose' else 18

dataset2 = SkeletonsDataset('Data/' + datasetName + '/' + subset + '_jaad_' + poseEstimator + '.csv',
                           normalization='minmax', target='cross', info=info,
                           remove_undetected=True, numberOfJoints=numberOfJoints)

In [9]:
modelName = net + '_' + poseEstimator + '_info=' + str(info) + '_dropout=' + str(dropout) + data_augmentation_name
modelName = modelName + '_epoch=' + str(num_epochs)

print('Loading model:', modelName)

device = torch.device('cuda')

model2 = SpatialTemporalGNN(embed_dim, numberOfClasses, numberOfNodes, net=net,
                           filterSize=embed_dim, dropout=dropout, batchSize=batch_size).to(device)

model2.load_state_dict(torch.load('exportedModels/' + datasetName + '/' + modelName))

Loading model: TGCN_AlphaPose_info=87_dropout=0.5_dataAugmentation4_epoch=1000


<All keys matched successfully>

In [10]:
loader2 = DataLoader(dataset2, batch_size=batch_size)

predictions2, groundtruth2 = predict(model2, loader2, device)

In [11]:
import os

os.makedirs("Frames_results/" + datasetName + '/' + poseEstimator + "/" + subset + "/" + modelName, exist_ok=True)

## Loading the video and exporting the result as a GIF

In [12]:
diff = np.argwhere(np.array(predictions)!=np.array(predictions2)).flatten()

diff.shape

(2873,)

In [13]:
diff = np.argwhere((np.array(predictions)!=np.array(predictions2)) & (np.array(predictions2)==np.array(groundtruth2))).flatten()

diff.shape

(1447,)

In [14]:
dataset.loadedData.iloc[diff]

Unnamed: 0,video,frame,ped_id,skeleton,skeleton_detected,cross,reaction,hand_gesture,look,action,nod,occlusion,bbox,bbox_center_x,bbox_center_y,skeleton_center_x,skeleton_center_y
29,video_0005,29,0_5_19b,"[[976.7589721679688, 700.0393676757812, 0.9337...",True,not-crossing,__undefined__,__undefined__,looking,walking,__undefined__,none,"[949.0, 686.0, 1014.0, 821.0]",981.5,753.5,979.420281,735.037727
30,video_0005,30,0_5_19b,"[[976.5730590820312, 699.3145141601562, 0.9294...",True,not-crossing,__undefined__,__undefined__,looking,walking,__undefined__,none,"[948.0, 685.0, 1013.0, 820.0]",980.5,752.5,979.039608,734.817508
31,video_0005,31,0_5_19b,"[[975.1636352539062, 698.6183471679688, 0.9457...",True,not-crossing,__undefined__,__undefined__,looking,walking,__undefined__,none,"[947.0, 685.0, 1012.0, 821.0]",979.5,753.0,977.963189,734.422947
32,video_0005,32,0_5_19b,"[[974.7236328125, 698.50439453125, 0.960399389...",True,not-crossing,__undefined__,__undefined__,looking,walking,__undefined__,none,"[946.0, 685.0, 1012.0, 822.0]",979.0,753.5,977.089771,733.959215
60,video_0005,60,0_5_19b,"[[948.6123657226562, 702.7825927734375, 0.9550...",True,not-crossing,__undefined__,__undefined__,looking,walking,__undefined__,none,"[917.0, 682.0, 992.0, 840.0]",954.5,761.0,953.401199,743.422039
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29904,video_0221,234,0_221_1623b,"[[1231.2386474609375, 481.4247741699219, 0.847...",True,not-crossing,__undefined__,__undefined__,not-looking,walking,__undefined__,none,"[1173.0, 452.0, 1293.0, 688.0]",1233.0,570.0,1226.064541,542.478532
29907,video_0221,237,0_221_1623b,"[[1228.7847900390625, 482.3612976074219, 0.519...",True,not-crossing,__undefined__,__undefined__,not-looking,walking,__undefined__,none,"[1173.0, 452.0, 1290.0, 682.0]",1231.5,567.0,1223.565582,541.724231
29908,video_0221,238,0_221_1623b,"[[1228.268310546875, 481.5972900390625, 0.5759...",True,not-crossing,__undefined__,__undefined__,not-looking,walking,__undefined__,none,"[1173.0, 452.0, 1289.0, 680.0]",1231.0,566.0,1223.872033,540.489577
29909,video_0221,239,0_221_1623b,"[[1222.3924560546875, 473.7130126953125, 0.689...",True,not-crossing,__undefined__,__undefined__,not-looking,walking,__undefined__,none,"[1173.0, 451.0, 1288.0, 676.0]",1230.5,563.5,1221.364712,538.148085


In [15]:
videos_list = dataset.loadedData.iloc[diff]['video'].unique().tolist()

videos_list

['video_0005',
 'video_0016',
 'video_0017',
 'video_0028',
 'video_0042',
 'video_0045',
 'video_0046',
 'video_0048',
 'video_0053',
 'video_0055',
 'video_0059',
 'video_0071',
 'video_0076',
 'video_0084',
 'video_0087',
 'video_0090',
 'video_0092',
 'video_0093',
 'video_0096',
 'video_0097',
 'video_0100',
 'video_0103',
 'video_0104',
 'video_0105',
 'video_0106',
 'video_0107',
 'video_0110',
 'video_0113',
 'video_0115',
 'video_0116',
 'video_0117',
 'video_0124',
 'video_0125',
 'video_0128',
 'video_0135',
 'video_0141',
 'video_0144',
 'video_0148',
 'video_0150',
 'video_0151',
 'video_0152',
 'video_0155',
 'video_0162',
 'video_0163',
 'video_0164',
 'video_0165',
 'video_0173',
 'video_0177',
 'video_0178',
 'video_0179',
 'video_0183',
 'video_0187',
 'video_0197',
 'video_0201',
 'video_0203',
 'video_0206',
 'video_0212',
 'video_0213',
 'video_0216',
 'video_0221']

In [16]:
quantity = 10

gen_clips = os.listdir("Data/" + datasetName + "-videos/")

videos_list_frames = dataset.loadedData['video'].tolist()

exported = 0

for video_id in videos_list[0:10]:
    
    print('Starting processing of video', video_id)

    video = cv2.VideoCapture("Data/" + datasetName + "-videos/" + video_id + ".mp4")


    # First column in the dataset where the video starts:
    video_first_dataset_row = videos_list_frames.index(video_id)
    
    frames = dataset.loadedData[dataset.loadedData['video'] == video_id].sort_values(by=['frame', 'ped_id'], ascending=True)
    frames = frames['frame'].tolist()
    frames_rev = frames[::-1]
    
    frames_diff = dataset.loadedData.iloc[diff][dataset.loadedData.iloc[diff]['video'] == video_id]['frame'].tolist()

    video_outputs = []

    frame_i = 0
    ret = True
    while ret:

        ret, frame = video.read()

        if ret:
            
            frame = frame[...,::-1]
            
            if frame_i in frames:
                
                if frame_i in frames_diff:
                    
                    for pred, gt, data_augm in [[predictions, groundtruth, ''], [predictions2, groundtruth2, 'dataAugmentation4']]:
                        
                        data_augmentation_name = '' if data_augm == None or data_augm == '' else '_' + data_augm
                        
                        
                        modelName = net + '_' + poseEstimator + '_info=' + str(info) + '_dropout=' + str(dropout) + data_augmentation_name
                        modelName = modelName + '_epoch=' + str(num_epochs)
                        
            
                        frame_first_index = frames.index(frame_i)
                        frame_last_index = len(frames_rev) - frames_rev.index(frame_i) - 1

                        frame_prediction = pred[video_first_dataset_row + frame_first_index:video_first_dataset_row + frame_last_index + 1]
                        frame_groundtruth = gt[video_first_dataset_row + frame_first_index:video_first_dataset_row + frame_last_index + 1]

                        frame_prediction = ["Crossing" if f else "Not-crossing" for f in frame_prediction]
                        frame_groundtruth = ["Crossing" if f else "Not-crossing" for f in frame_groundtruth]

                        #im_title = "Prediction: " + frame_prediction + "\nGroundtruth: " + frame_groundtruth
                        im_title = "Video: " + video_id
                        im_title = im_title + " - Frame: " + str(frame_i)
                        im_title = im_title + "\nPose estimator: " + poseEstimator #if frame_i == 0 else ""
                        im_title = im_title + "\nClassifier: " + net + '_info=' + str(info) + '_dropout=' + str(dropout) + data_augmentation_name + '_epoch=' + str(num_epochs)


                        fig = dataset.showSkeleton(videoNum=video_id, frameNum=frame_i, showLegend=False, frameImage=frame,
                                                   normalizedSkeletons=False, title=im_title, show=False,
                                                   predictions=frame_prediction, groundtruths=frame_groundtruth)


    #                     canvas = FigureCanvasAgg(fig)
    #                     canvas.draw()
    #                     frame_result = np.asarray(canvas.buffer_rgba()).astype(np.uint8)

    #                     frame_result = Image.fromarray(frame_result)


                        fig.savefig("Frames_results/" + datasetName + '/' + poseEstimator + "/" + subset + "/" + modelName + "/" + video_id + "_" + str(frame_i) + ".png",
                                    dpi=fig.dpi, bbox_inches='tight')


    #                     canvas.get_renderer().clear()
                        plt.close(fig)

        frame_i = frame_i + 1
                
    
    exported = exported + 1
    
    print('Exported video:', video_id, ' - Clip', str(exported) + '/' + str(quantity), '\n')
    
    video.release()
    
    if exported == quantity:
        break

Starting processing of video video_0005
Exported video: video_0005  - Clip 1/10 

Starting processing of video video_0016
Exported video: video_0016  - Clip 2/10 

Starting processing of video video_0017
Exported video: video_0017  - Clip 3/10 

Starting processing of video video_0028
Exported video: video_0028  - Clip 4/10 

Starting processing of video video_0042
Exported video: video_0042  - Clip 5/10 

Starting processing of video video_0045
Exported video: video_0045  - Clip 6/10 

Starting processing of video video_0046
Exported video: video_0046  - Clip 7/10 

Starting processing of video video_0048
Exported video: video_0048  - Clip 8/10 

Starting processing of video video_0053
Exported video: video_0053  - Clip 9/10 

Starting processing of video video_0055
Exported video: video_0055  - Clip 10/10 

