## Ensemble for one video
### Imports

In [7]:
import torch
import torchvision.transforms as transforms
from PIL import Image
from collections import OrderedDict
import numpy as np
import os
import sys

# I hate python sooooooo much for making me do this
sys.path.append(os.path.abspath('DataPreparation'))
sys.path.append(os.path.abspath('models'))
sys.path.append(os.path.abspath('FeatureExtraction'))
sys.path.append(os.path.abspath('SL-GCN/data_gen/'))
#sys.path.append(os.path.abspath('../SL-GCN/data_gen'))

from Conv3D import r2plus1d_18
import decouple_gcn_attn
from T_Pose_model import T_Pose_model
import split_video
import demo
import gen_frames
import wholepose_features_extraction
import sign_gendata
import gen_bone_data
import gen_motion_data
import sign_27



### Load Models

In [8]:
joint_model = torch.load('D:/joint_models/sign_joint_final-24-95.pt')
joint_motion_model = torch.load('D:/joint_motion_models/sign_joint_motion_final-32-86.pt')
bone_model = torch.load('D:/bone_models/sign_bone_final-25.pt')
bone_motion_model = torch.load('D:/bone_motion_models/sign_bone_motion_final-25.pt')
#decent sign_resnet2d+1_5_epoch009.pth
rgb_model = torch.load('D:/rgb_final/rgb_final_3layers_16frames_epoch9isbest/sign_resnet2d+1_5_epoch009.pth')
tcn_model = torch.load('D:/TCN_models/T_Pose_model_16_99.0.pth')

## Data Prep

In [9]:
# The purpose of this notebook is extracting the needed info from the video (bones, joints, frames, etc.)
# It will save this info in the same directory as the video
path_to_video = 'D:/output/i-have-school-tomorrow/'
#output_path = 'D:/output/hello-what-you-name/'


### Split video into 16 frame sets

In [None]:
split_video.run(path_to_video + "i-have-school-tomorrow.mp4")

### Generate npy files for GCN

In [None]:
# First extract .npy file from video using demo.py

print("generating npy files")
#loop through each folder in the path_to_videos, and run demo.py on each video
for folder in os.listdir(path_to_video):
    if os.path.isdir(path_to_video + folder):
        os.mkdir(path_to_video + folder + '/npy/')
        demo.run(path_to_video + folder, path_to_video + folder + '/npy/')
print("done generating npy files")

### Extract frames from video

In [None]:
# Next we will use the .npy file to extract the frames of the video

print("generating frames")
for folder in os.listdir(path_to_video):
    if os.path.isdir(path_to_video + folder):
        #print(path_to_videos + folder + "/16.mp4")
        os.mkdir(path_to_video + folder + '/frames/')
        gen_frames.run(path_to_video + folder, path_to_video + folder + '/frames/', path_to_video + folder + '/npy/')
print("done generating frames")

### Generate pt files for TCN

In [None]:
# Now extract .pt file from video using wholepose_features_extraction.py

print("generating wholepose feature files")
for folder in os.listdir(path_to_video):
    if os.path.isdir(path_to_video + folder):
        os.mkdir(path_to_video + folder + '/pt/')
        wholepose_features_extraction.run(path_to_video + folder, path_to_video + folder + '/pt/', False)
print("done generating wholepose feature files")

### Generate sign data for the GCN

In [None]:
print("generating sign data files")
for folder in os.listdir(path_to_video):
    if os.path.isdir(path_to_video + folder):
        os.mkdir(path_to_video + folder + '/sign_gen/')
        sign_gendata.run(path_to_video + folder + '/npy/', path_to_video + folder + '/sign_gen/')
print("done generating sign data files")

### Generate bone data for GCN

In [None]:
print("generating bone data files")
for folder in os.listdir(path_to_video):
    if os.path.isdir(path_to_video + folder):
        # bone data is saved into sign_gen folder
        gen_bone_data.run(path_to_video + folder + '/sign_gen/')
print("done generating bone data files")

### Generate motion data for the GCN

In [None]:
print("generating motion data files for joint and bones")
for folder in os.listdir(path_to_video):
    if os.path.isdir(path_to_video + folder):
        #os.mkdir(path_to_video + folder + '/sign_gen/')
        gen_motion_data.run(path_to_video + folder + '/sign_gen/')
print("done generating motion data files for joint and bones")

## Predictions

### 3D CNN Prediction

In [13]:
transform = transforms.Compose([transforms.Resize([240, 240]),
                                    transforms.ToTensor(),
                                    transforms.Normalize(mean=[0.5], std=[0.5])])
#input_clips = []
class_labels = ["car", "go", "have", "hello", "my", "name", "school", "self", "tomorrow", "we", "what", "yesterday", "you"]
model = r2plus1d_18(pretrained=True, num_classes=13)
new_state_dict = OrderedDict()

for k, v in rgb_model.items():
    #name = k[7:] # remove 'module.'
    name = k.replace('module.', '')
    new_state_dict[name]=v

# Will be in form of a list of lists of tuples (percent, label)
cnn_predictions = []

model.load_state_dict(new_state_dict)
for folder in os.listdir(path_to_video):
    if os.path.isdir(path_to_video + folder + '/frames/0'):
        images = []
        input_clips = []
        for i, file in enumerate(os.listdir(path_to_video + folder + '/frames/0')):
            if i < 4:
                continue
            image = Image.open(path_to_video + folder + '/frames/0/' + file)
            image = transform(image)
            images.append(image)
            if len(images) == 16:
                images = torch.stack(images, dim=0)
                images = images.permute(1, 0, 2, 3)
                images = torch.Tensor(images)
                images = images.unsqueeze(0)
                input_clips.append(images)
                images = []

#outputs_clips =[]
#for i_clip in range(inputs_clips.size(1)):
#    inputs = inputs_clips[:,i_clip,:,:]
#    outputs_clips.append(model(inputs))
#input = inputs_clips[:,i_clip,:,:]
        print("Predictions for " + folder + ":")
        for set in input_clips:
            current_set_preds = []
            output = model(set)
            # Convert the predictions to probabilities using softmax
            probs = torch.nn.functional.softmax(output, dim=1)

            # Get the top k probabilities and their indices
            top_probs, top_idxs = probs.topk(5, dim=1)

            # Convert indices to class labels
            top_classes = [class_labels[idx] for idx in top_idxs[0]]

            # Print the top k probabilities and their corresponding class labels
            for i in range(3):
                current_set_preds.append((top_probs[0][i]*100, top_classes[i]))
            #print("\n")
            cnn_predictions.append(current_set_preds)
print(cnn_predictions)



Predictions for set_0:
Predictions for set_1:
Predictions for set_2:


RuntimeError: [enforce fail at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\c10\core\impl\alloc_cpu.cpp:72] data. DefaultCPUAllocator: not enough memory: you tried to allocate 132710400 bytes.

### Graph Convolutional Predictions (GCN)

In [11]:

class_labels = ["car", "go", "have", "hello", "my", "name", "school", "self", "tomorrow", "we", "what", "yesterday", "you"]

# remove unnecessary module. from state_dict
joint_state_dict = OrderedDict()
bone_state_dict = OrderedDict()
joint_motion_state_dict = OrderedDict()
bone_motion_state_dict = OrderedDict()
for i, model in enumerate([joint_model, bone_model, joint_motion_model, bone_motion_model]):
    for k, v in model.items():
        #name = k[7:] # remove 'module.'
        name = k.replace('module.', '')
        if i == 0:
            joint_state_dict[name]=v
        elif i == 1:
            bone_state_dict[name]=v
        elif i == 2:
            joint_motion_state_dict[name]=v
        elif i == 3:
            bone_motion_state_dict[name]=v

# Load model architechure
Model_j = decouple_gcn_attn.Model(13, 27, 1, 16, 41,  "sign_27.Graph", {"labeling_mode": 'spatial'}, 3)
Model_b = decouple_gcn_attn.Model(13, 27, 1, 16, 41, "sign_27.Graph", {"labeling_mode": 'spatial'}, 3)
Model_jm = decouple_gcn_attn.Model(13, 27, 1, 16, 41, "sign_27.Graph", {"labeling_mode": 'spatial'}, 3)
Model_bm = decouple_gcn_attn.Model(13, 27, 1, 16, 41, "sign_27.Graph", {"labeling_mode": 'spatial'}, 3)

# Load model states from checkpoints
Model_j.load_state_dict(joint_state_dict)
Model_b.load_state_dict(bone_state_dict)
Model_jm.load_state_dict(joint_motion_state_dict)
Model_bm.load_state_dict(bone_motion_state_dict)

# Set the model to evaluation mode
Model_j.eval()
Model_b.eval()
Model_jm.eval()
Model_bm.eval()

gcn_predictions = []

for folder in os.listdir(path_to_video):
    if os.path.isdir(path_to_video + folder + '/sign_gen/'):
        bone_npy = np.load(path_to_video + folder + '/sign_gen/test_data_bone.npy')
        joint_npy = np.load(path_to_video + folder + '/sign_gen/test_data_joint.npy')
        bone_motion_npy = np.load(path_to_video + folder + '/sign_gen/test_data_bone_motion.npy')
        joint_motion_npy = np.load(path_to_video + folder + '/sign_gen/test_data_joint_motion.npy')

        # Load the data onto the GPU if available
        #device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        device = torch.device("cpu")
        bone_npy = torch.from_numpy(bone_npy).to(device)
        joint_npy = torch.from_numpy(joint_npy).to(device)
        bone_motion_npy = torch.from_numpy(bone_motion_npy).to(device)
        joint_motion_npy = torch.from_numpy(joint_motion_npy).to(device)

        # Make predictions using the four models
        with torch.no_grad():
            joint_output = Model_j(joint_npy)
            bone_output = Model_b(bone_npy)
            joint_motion_output = Model_jm(joint_motion_npy)
            bone_motion_output = Model_bm(bone_motion_npy)

        # Print the top 5 predictions and their confidence percentages for each model
        def print_top_5(output):
            probabilities = torch.nn.functional.softmax(output, dim=1)
            top_5_probs, top_5_indices = torch.topk(probabilities, 5)
            for i in range(5):
                print(f"Prediction {i+1}: {class_labels[top_5_indices[0][i]]}: {top_5_probs[0][i]*100:.2f}%")
        print("Predictions for " + folder + ":")
        print("Joint Model:")
        print_top_5(joint_output)
        print("Bone Model:")
        print_top_5(bone_output)
        print("Joint Motion Model:")
        print_top_5(joint_motion_output)
        print("Bone Motion Model:")
        print_top_5(bone_motion_output)
        print("\n")



  nn.init.kaiming_normal(conv.weight, mode='fan_out')
  nn.init.constant(conv.bias, 0)
  nn.init.constant(bn.weight, scale)
  nn.init.constant(bn.bias, 0)
  nn.init.constant(self.Linear_bias, 1e-6)
  self.eyes = nn.Parameter(torch.tensor(torch.stack(
  nn.init.normal(self.fc.weight, 0, math.sqrt(2. / num_class))


Predictions for set_0:
Joint Model:
Prediction 1: you: 54.35%
Prediction 2: what: 31.85%
Prediction 3: hello: 6.96%
Prediction 4: self: 4.23%
Prediction 5: tomorrow: 1.75%
Bone Model:
Prediction 1: self: 85.82%
Prediction 2: my: 6.94%
Prediction 3: tomorrow: 2.55%
Prediction 4: yesterday: 1.91%
Prediction 5: you: 1.84%
Joint Motion Model:
Prediction 1: tomorrow: 98.41%
Prediction 2: self: 0.82%
Prediction 3: we: 0.56%
Prediction 4: you: 0.21%
Prediction 5: my: 0.01%
Bone Motion Model:
Prediction 1: self: 69.44%
Prediction 2: my: 30.31%
Prediction 3: we: 0.16%
Prediction 4: you: 0.05%
Prediction 5: hello: 0.02%


Predictions for set_1:
Joint Model:
Prediction 1: what: 95.88%
Prediction 2: you: 2.90%
Prediction 3: hello: 0.52%
Prediction 4: self: 0.36%
Prediction 5: name: 0.11%
Bone Model:
Prediction 1: tomorrow: 28.97%
Prediction 2: have: 22.49%
Prediction 3: yesterday: 15.63%
Prediction 4: self: 12.98%
Prediction 5: what: 7.96%
Joint Motion Model:
Prediction 1: you: 59.64%
Prediction 2

### Temporal Convolutional Predictions (TCN)

In [12]:
class_labels = ["car", "go", "have", "hello", "my", "name", "school", "self", "tomorrow", "we", "what", "yesterday", "you"]

#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu")
model = T_Pose_model(frames_number=60,joints_number=33,
    n_classes=13
)
#model = nn.DataParallel(model)    
model = model.to(device)
tcn_state_dict = OrderedDict()

for k, v in tcn_model.items():
    #name = k[7:] # remove 'module.'
    name = k.replace('module.', '')
    tcn_state_dict[name]=v

# Add weights from checkpoint model
model.load_state_dict(tcn_state_dict)#,strict=False)

model.eval()

for folder in os.listdir(path_to_video):
    if os.path.isdir(path_to_video + folder + '/pt/'):
        pt_file = path_to_video + folder + '/pt/0.mp4.pt'
        data = torch.load(pt_file)
        #data = data.contiguous().view(1,-1,24,24)
        data_in = torch.autograd.Variable(data.to(device), requires_grad=False)
        with torch.no_grad():
            pred=model(data_in)
        #pred = pred.cpu().detach().numpy()
        def print_top_5(output):
            probabilities = torch.nn.functional.softmax(output, dim=1)
            top_5_probs, top_5_indices = torch.topk(probabilities, 5)
            for i in range(5):
                print(f"Prediction {i+1}: {class_labels[top_5_indices[0][i]]}: {top_5_probs[0][i]*100:.2f}%")
        print("Predictions for " + folder + ":")
        print_top_5(pred)
        print("\n")



Predictions for set_0:
Prediction 1: self: 73.46%
Prediction 2: you: 18.64%
Prediction 3: tomorrow: 4.66%
Prediction 4: we: 0.74%
Prediction 5: hello: 0.56%


Predictions for set_1:
Prediction 1: tomorrow: 31.99%
Prediction 2: car: 15.06%
Prediction 3: you: 11.89%
Prediction 4: have: 11.83%
Prediction 5: go: 7.91%


Predictions for set_2:
Prediction 1: have: 50.89%
Prediction 2: school: 9.70%
Prediction 3: tomorrow: 7.17%
Prediction 4: go: 6.96%
Prediction 5: self: 6.69%


Predictions for set_3:
Prediction 1: name: 33.27%
Prediction 2: school: 17.47%
Prediction 3: tomorrow: 13.11%
Prediction 4: you: 7.20%
Prediction 5: hello: 6.41%




## Ensemble