In [1]:
import os
import re
import time
import numpy as np
import torch
import torch.nn as nn
from torch.utils import data
from glob import glob

# Prepare Dataset

In [13]:
np.random.seed(42)
test = np.random.randn(3, 4, 5)
np.cov(test[0])

array([[ 5.01592876e-01, -4.20712542e-01, -1.33449991e-01,
         1.63951386e-01],
       [-4.20712542e-01,  6.74275176e-01,  3.52944979e-01,
        -3.03432845e-04],
       [-1.33449991e-01,  3.52944979e-01,  8.46100168e-01,
         4.70929056e-01],
       [ 1.63951386e-01, -3.03432845e-04,  4.70929056e-01,
         4.23714402e-01]])

In [32]:
np.dot(test, np.random.randn(5,10)).shape

(3, 4, 10)

In [20]:
evals = np.array([4,3,2,1])
evals = evals / np.sum(evals)
contrib = np.cumsum(evals)
print(contrib[2])
sum(contrib > 0.85)

0.8999999999999999


2

In [2]:
POSE_FEATURE_PATH = "D:/CU Files/IoT/Featurized_dataset/"
IMAGE_FEATURE_PATH = "D:/CU Files/IoT/image_feature/resnet50/"

def get_path_dict(pose_feature_path,
                  image_feature_path):
    all_pose_path = [y for y in glob(os.path.join(pose_feature_path, "*.mp4.npz"))]
    all_pose_name = [re.findall("Featurized_dataset\\\\(.+).mp4.npz", path)[0]
                     for path in all_pose_path]
    all_image_path = [y for y in glob(os.path.join(image_feature_path, "*.mp4.npz"))]
    all_image_name = [re.findall(image_feature_path[:-1] + "\\\\(.+).mp4.npz", path)[0] for path in all_image_path]
    name_intersection = list(set(all_pose_name).intersection(set(all_image_name)))

    print("{} pose feature files".format(len(all_pose_name)))
    print("{} image feature files".format(len(all_image_name)))
    print("{} feature files available".format(len(name_intersection)))


    pi_path_dict = {}   # get a dictionary which records the pose and image feature path
    for i, feature_name in enumerate(all_pose_name):
        try:
            idx = all_image_name.index(feature_name)
        except:
            continue
        pose_path = all_pose_path[i]
        image_path = all_image_path[idx]
        pi_path_dict[feature_name] = (pose_path, image_path)

    return pi_path_dict

In [3]:
path_dict = get_path_dict(pose_feature_path=POSE_FEATURE_PATH, image_feature_path=IMAGE_FEATURE_PATH)

977 pose feature files
969 image feature files
969 feature files available


In [9]:
for feature_name, (pose_path, image_path) in path_dict.items():
    pose_file = np.load(pose_path, allow_pickle=True)
    trajectory = []
    for i, (_, k) in enumerate(pose_file["keypoints"]):
        if len(k) != 0:
            two_d_point = k[0, [0,1], :]
            trajectory.append(two_d_point)
    trajectory = np.stack(trajectory, axis=0).reshape(len(trajectory), -1)
    print(trajectory.shape)

    image_file = np.load(image_path, allow_pickle=True)
    image_feature = image_file['feature']
    print(image_feature.shape)
    print(np.hstack((trajectory, image_feature)).shape)
    break

(127, 34)
(127, 2048)
(127, 2082)


In [12]:
class mydataset(data.Dataset):
    def __init__(self, path_dict, pose=True, image=True):
        self.path_dict = path_dict
        self.Data, self.Label = self._get_features(pose, image)

    def _get_features(self, pose=True, image=True):
        features = []
        labels = []
        self.__label_encoder = {'no_interaction':0,
                                'open_close_fridge':1,
                                'put_back_item':2,
                                'screen_interaction':3,
                                'take_out_item':4}
        for feature_name, (pose_path, image_path) in path_dict.items():
            label = '_'.join(feature_name.split('_')[:-3])
            labels.append(self.__label_encoder[label])
            this_feature = self._get_single_feature(pose_path, image_path, pose, image)
            features.append(this_feature)

        return features, labels

    def _get_single_feature(self, pose_path, image_path, pose, image):
        if not pose:
            image_file = np.load(image_path, allow_pickle=True)
            image_feature = image_file["feature"]
            return image_feature
        elif not image:
            pose_file = np.load(pose_path, allow_pickle=True)
            pose_feature = self._extract_trajectories(pose_file["keypoints"])
            return pose_feature
        else:
            image_file = np.load(image_path, allow_pickle=True)
            image_feature = image_file["feature"]
            pose_file = np.load(pose_path, allow_pickle=True)
            pose_feature = self._extract_trajectories(pose_file["keypoints"])
            assert image_feature.shape[0] == pose_feature.shape[0], "number of frames mismatch"
            return np.hstack((pose_feature, image_feature))

    def _extract_trajectories(self, keypoints):
        trajectory = []
        for i, (_, k) in enumerate(keypoints):
            if len(k) != 0:
                two_d_point = k[0, [0,1], :]
                trajectory.append(two_d_point)
        trajectory = np.stack(trajectory, axis=0).reshape(len(trajectory), -1)
        return trajectory

    def __getitem__(self, index):
        seq = torch.from_numpy(self.Data[index])
        label = torch.tensor(self.Label[index])
        return seq, label

    def __len__(self):
        return len(self.Label)


def collate_fn(batch):
    seq_list = [item[0] for item in batch]
    labels = torch.LongTensor([item[1] for item in batch])
    return seq_list, labels

In [13]:
def data_split(path_dict, mode="tt"):
    if mode == "tt":
        train_path_dict = {}
        test_path_dict = {}
        for key, value in path_dict.items():
            u = np.random.uniform(0, 1)
            if u < 0.9:
                train_path_dict[key] = value
            else:
                test_path_dict[key] = value
        return train_path_dict, test_path_dict
    if mode == "tvt":
        train_path_dict = {}
        valid_path_dict = {}
        test_path_dict = {}
        for key, value in path_dict.items():
            u = np.random.uniform(0, 1)
            if u < 0.8:
                train_path_dict[key] = value
            elif 0.8 < u < 0.9:
                valid_path_dict[key] = value
            else:
                test_path_dict[key] = value
        return train_path_dict, valid_path_dict, test_path_dict

In [14]:
train_path_dict, test_path_dict = data_split(path_dict, mode="tt")
train_dataset = mydataset(train_path_dict, pose=True, image=False)
train_loader = data.DataLoader(train_dataset, collate_fn=collate_fn, batch_size=32, shuffle=True)
test_dataset = mydataset(test_path_dict, pose=True, image=False)
test_loader = data.DataLoader(test_dataset, collate_fn=collate_fn, batch_size=20, shuffle=True)

In [15]:
for X, Y in train_loader:
    print(X[0].shape)
    break

torch.Size([222, 34])


# Models

In [16]:
class LSTMClassifier(nn.Module):
    def __init__(self, input_size=2082, hidden_size=1024):
        super(LSTMClassifier, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=3,
            batch_first=True
        )
        self.fc = nn.Sequential(
            nn.Linear(hidden_size, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(inplace=True),
            nn.Linear(128, 5)
        )

    def forward(self, sequences):
        outputs1 = []
        outputs2 = []
        outputs3 = []
        for seq in sequences:
            out, (hidden, _) = self.lstm(seq)
            outputs1.append(hidden[0,:])
            outputs2.append(hidden[1,:])
            outputs3.append(hidden[2,:])
        outputs1 = torch.stack(outputs1)
        outputs2 = torch.stack(outputs2)
        outputs3 = torch.stack(outputs3)

        y1 = self.fc(outputs1)
        y2 = self.fc(outputs2)
        y3 = self.fc(outputs3)

        return y1, y2 ,y3

In [28]:
length_list = list(map(lambda x: x.shape[0], train_dataset.Data))
length_list.index(1093)

584

# Train Model

In [17]:
def get_acc(output, label):
    total = output.shape[0]
    _, pred_label = output.max(1)
    num_correct = (pred_label == label).sum().item()
    return num_correct / total

In [18]:
def train(model, train_loader, valid_loader, epochs, optimizer, criterion, device=torch.device("cuda:0")):
    prev_time = time.time()
    model.to(device)

    for epoch in range(epochs):
        train_loss = 0
        train_acc = 0
        valid_loss = 0
        valid_acc = 0
        model.train()
        for X, Y in train_loader:
            # forward
            ## move to device
            X = [x.to(device) for x in X]
            Y = Y.to(device)
            out1, out2, out3 = model(X)
            loss1 = criterion(out1, Y)
            loss2 = criterion(out2, Y)
            loss3 = criterion(out3, Y)
            loss = 0.2 * loss1 + 0.3 * loss2 + 0.5 * loss3
            # backward
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            train_acc += get_acc(out3, Y)

        # evaluation
        model.eval()
        with torch.no_grad():
            for X, Y in valid_loader:
                X = [x.to(device) for x in X]
                Y = Y.to(device)
                out1, out2, out3 = model(X)
                loss = criterion(out3, Y)
                valid_loss += loss.item()
                valid_acc += get_acc(out3, Y)

        print("Epoch {}   Train Loss:{:.3f}   Train Acc:{:.3f}   Valid Loss:{:.3f}   Valid Acc:{:.3f}   Time:{}".format(
            epoch, train_loss / len(train_loader), train_acc / len(train_loader), valid_loss / len(test_loader), valid_acc / len(test_loader), time.time() - prev_time
        ))
        prev_time = time.time()

In [19]:
lstm = LSTMClassifier(input_size=34, hidden_size=128)
EPOCHS = 50
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(lstm.parameters(), lr=0.001)

train(lstm, train_loader, test_loader, EPOCHS, optimizer, criterion)

Epoch 0   Train Loss:1.638   Train Acc:0.220   Valid Loss:1.670   Valid Acc:0.209   Time:18.04733633995056
Epoch 1   Train Loss:1.610   Train Acc:0.215   Valid Loss:1.661   Valid Acc:0.209   Time:13.971253395080566
Epoch 2   Train Loss:1.604   Train Acc:0.217   Valid Loss:1.608   Valid Acc:0.214   Time:14.00832200050354
Epoch 3   Train Loss:1.609   Train Acc:0.210   Valid Loss:1.605   Valid Acc:0.220   Time:14.0246000289917
Epoch 4   Train Loss:1.607   Train Acc:0.221   Valid Loss:1.599   Valid Acc:0.248   Time:14.026754140853882
Epoch 5   Train Loss:1.597   Train Acc:0.256   Valid Loss:1.635   Valid Acc:0.207   Time:14.023938417434692
Epoch 6   Train Loss:1.599   Train Acc:0.220   Valid Loss:1.613   Valid Acc:0.212   Time:14.031240463256836
Epoch 7   Train Loss:1.593   Train Acc:0.248   Valid Loss:1.660   Valid Acc:0.206   Time:14.01772665977478
Epoch 8   Train Loss:1.594   Train Acc:0.239   Valid Loss:1.632   Valid Acc:0.230   Time:14.030089616775513
Epoch 9   Train Loss:1.599   Trai