# Action Classification Using Pose-Motion feature representation

On scaled pose motion representation without cropping and data augmentation

Using GPU_0


In [1]:
# import required packages and global variables
import sys
import os
import io
import os.path as osp
import numpy as np
import math
import matplotlib.pyplot as plt
import glob
%matplotlib inline

### Defining global variables

In [2]:
DTYPE = np.float32
height = 1080  # frame height in pixel
width = 1920  # frame width in pixel
fps = 30.0
col_ch = 3
sigma = 2
resize_scale = 0.125
sub_sample =1500
crop = False
classes=[
    'Sitting',
    'Sit-to-Stand',
    'Standing',
    'Walking',
    'Stand-to-Sit'
]
keypoints = [
        'nose',
        'left_eye',
        'right_eye',
        'left_ear',
        'right_ear',
        'left_shoulder',
        'right_shoulder',
        'left_elbow',
        'right_elbow',
        'left_wrist',
        'right_wrist',
        'left_hip',
        'right_hip',
        'left_knee',
        'right_knee',
        'left_ankle',
        'right_ankle']

## Designing the action classification network

In [3]:
# import extra liraries required for designing the network
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import lmdb
import shutil
import time
from imageio import imread
import caffe2.python.predictor.predictor_exporter as pe
from caffe2.proto import caffe2_pb2
from caffe2.python.predictor import mobile_exporter
from caffe2.python import (
    brew,
    core,
    model_helper,
    net_drawer,
    optimizer,
    visualize,
    workspace,
    memonger
)

# If you would like to see some really detailed initializations,
# you can change --caffe2_log_level=0 to --caffe2_log_level=-1
core.GlobalInit(['caffe2', '--caffe2_log_level=0'])
print("Necessities for action recognition network is imported!")

net_drawer will not run correctly. Please install the correct dependencies.
Necessities for action recognition network is imported!


No handlers could be found for logger "caffe2.python.net_drawer"


### Define CNN model
Define dataset-specific parameters, and declare model training parameters.
#### Come back and tinker with these parameters to see how it effects training and efficiency.
base_learning_rate and weight_decay will both influence training and can be interesting to change and witness the impact on accuracy or confidence 

In [4]:
# Paths to LMDBs
training_lmdb_path = osp.join('/data','BehnazData','Results', 'PoseBased_ActionRec', 'training_3_2_111_lmdb')
validation_lmdb_path = osp.join('/data', 'BehnazData', 'Results', 'PoseBased_ActionRec', 'validation_3_2_111_lmdb')
testing_lmdb_path = osp.join('/data','BehnazData','Results', 'PoseBased_ActionRec', 'testing_3ch_lmdb')
# Paths to the init & predict net output locations
init_net_out = 'TuftsAction_init_net.pb'
predict_net_out = 'TuftsAction_predict_net.pb'

# Dataset specific params
train_data_count = 4699 * 4
validation_count = 554 * 4
test_data_count = 624 * 4
data_db_type = "lmdb"
image_width = int(width * resize_scale)                # input image width
image_height = int(height * resize_scale)               # input image height
image_channels = 14 * col_ch                           # input image channels
num_classes = 5                                        # number of action classes

# Training params                   
num_epoch = 40                                                        
batch_size =  50        # total batch size 
validation_interval = 50                               # validate every <validation_interval> training iterations
checkpoint_iters = 500                                 # output checkpoint db every <checkpoint_iters> iterations
base_learning_rate = 0.01          # initial learning rate (scale with total batch size)
step_size = 1                                     # influence the learning rate after 10 epochs
weight_decay = 1e-3                                     # weight decay (L2 regularization)

In [5]:
root_folder = os.path.join('..','classification_net','SingleGPU')
# Create root_folder if not already there
if not os.path.isdir(root_folder):
    os.makedirs(root_folder)

# Resetting workspace with root_folder argument sets root_folder as working directory
workspace.ResetWorkspace(root_folder)


True

### Defining helper functions

In [6]:
def AddInput(model, db, db_type, batch_size, noise=0):
    # load the data
    data_f32, label, ID = brew.db_input(
        model,
        blobs_out=["data_f32", "label", "ID"],
        batch_size=batch_size,
        db=db,
        db_type=db_type,
    )
    data = model.Cast(data_f32, "data", to=core.DataType.FLOAT)
    Noise = model.GaussianFill([], "noise", shape=[batch_size, image_channels, image_height, image_width], mean=0.0,
                              std=1.0, run_once=0)
    data_noise = data.Add(Noise, "data_noise")
    # prevent back-propagation: optional performance improvement; may not be observable at small scale
    if noise:
        data = model.Copy(data_noise, "data")
    
    data = model.StopGradient(data, data)
    data_noise = model.StopGradient(data_noise, data_noise)
    
    
# Helper function for maintaining the correct height and width dimensions after
# convolutional and pooling layers downsample the input data
def update_dims(height, width, kernel, stride, pad):
    new_height = ((height - kernel + 2*pad)//stride) + 1
    new_width = ((width - kernel + 2*pad)//stride) + 1
    return new_height, new_width

# Defining the action classification network model
def Add_Action_Tufts_Model(model, num_classes, image_height, image_width, image_channels, is_test=0):
    ################################## Block 1 ############################
    # Convolutional layer 1
    conv1_1 = brew.conv(model, 'data', 'conv1_1', dim_in=image_channels, dim_out=64, kernel=3, stride=2, pad=0)
    h,w = update_dims(height=image_height, width=image_width, kernel=3, stride=2, pad=0)
    # ReLU layer 1
    relu1_1 = brew.relu(model, conv1_1, 'relu1_1')
    # Batch normalization layer 1
    bn1_1 = brew.spatial_bn(model, relu1_1, 'bn1_1', dim_in=64, epsilon=1e-3, momentum=0.1, is_test=is_test)
    # Drop out with p=0.25
    dropout1_1 = brew.dropout(model, bn1_1, 'dropout1_1', ratio=0.35, is_test=is_test)
    
    # Convolutional layer 2
    conv1_2 = brew.conv(model, dropout1_1, 'conv1_2', dim_in=64, dim_out=64, kernel=3, stride=1, pad=0)
    h,w = update_dims(height=h, width=w, kernel=3, stride=1, pad=0)
    # ReLU layer 1
    relu1_2 = brew.relu(model, conv1_2, 'relu1_2')
    # Batch normalization layer 1
    bn1_2 = brew.spatial_bn(model, relu1_2, 'bn1_2', dim_in=64, epsilon=1e-3, momentum=0.1, is_test=is_test)
    # Drop out with p=0.25
    dropout1_2 = brew.dropout(model, bn1_2, 'dropout1_2', ratio=0.35, is_test=is_test)
    ##################################### Block 2 ##########################
    # Convolutional layer 3
    conv2_1 = brew.conv(model, 'dropout1_2', 'conv2_1', dim_in=64, dim_out=128, kernel=3, stride=2, pad=0)
    h,w = update_dims(height=image_height, width=image_width, kernel=3, stride=2, pad=0)
    # ReLU layer 1
    relu2_1 = brew.relu(model, conv2_1, 'relu2_1')
    # Batch normalization layer 1
    bn2_1 = brew.spatial_bn(model, relu2_1, 'bn2_1', dim_in=128, epsilon=1e-3, momentum=0.1, is_test=is_test)
    # Drop out with p=0.25
    dropout2_1 = brew.dropout(model, bn2_1, 'dropout2_1', ratio=0.35, is_test=is_test)
    
    # Convolutional layer 4
    conv2_2 = brew.conv(model, dropout2_1, 'conv2_2', dim_in=128, dim_out=128, kernel=3, stride=1, pad=0)
    h,w = update_dims(height=h, width=w, kernel=3, stride=1, pad=0)
    # ReLU layer 1
    relu2_2 = brew.relu(model, conv2_2, 'relu2_2')
    # Batch normalization layer 1
    bn2_2 = brew.spatial_bn(model, relu2_2, 'bn2_2', dim_in=128, epsilon=1e-3, momentum=0.1, is_test=is_test)
    # Drop out with p=0.25
    dropout2_2 = brew.dropout(model, bn2_2, 'dropout2_2', ratio=0.35, is_test=is_test) 
    ##################################### Block 3 ############################
    # Convolutional layer 5
    conv3_1 = brew.conv(model, dropout2_2, 'conv3_1', dim_in=128, dim_out=256, kernel=3, stride=2, pad=0)
    h,w = update_dims(height=h, width=w, kernel=3, stride=2, pad=0)
    # ReLU layer 1
    relu3_1 = brew.relu(model, conv3_1, 'relu3_1')
    # Batch normalization layer 1
    bn3_1 = brew.spatial_bn(model, relu3_1, 'bn3_1', dim_in=256, epsilon=1e-3, momentum=0.1, is_test=is_test)
    # Drop out with p=0.25
    dropout3_1 = brew.dropout(model, bn3_1, 'dropout3_1', ratio=0.35, is_test=is_test)
    
    # Convolutional layer 4
    conv3_2 = brew.conv(model, dropout3_1, 'conv3_2', dim_in=256, dim_out=256, kernel=3, stride=1, pad=0)
    h,w = update_dims(height=h, width=w, kernel=3, stride=1, pad=0)
    # ReLU layer 1
    relu3_2 = brew.relu(model, conv3_2, 'relu3_2')
    # Batch normalization layer 1
    bn3_2 = brew.spatial_bn(model, relu3_2, 'bn3_2', dim_in=256, epsilon=1e-3, momentum=0.1, is_test=is_test)
    # Drop out with p=0.25
    dropout3_2 = brew.dropout(model, bn3_2, 'dropout3_2', ratio=0.35, is_test=is_test)
    
    # Global average pooling
    pool1 = brew.average_pool(model, dropout3_2, 'pool1', global_pooling=True)
    # Fully connected layers
    pred = brew.fc(model, pool1, 'fc1', dim_in=256, dim_out=num_classes)
    # Softmax layer
    softmax, loss = model.SoftmaxWithLoss([pred, 'label'], ['softmax', 'loss'])
    brew.accuracy(model, [softmax, 'label'], 'accuracy')
    model.net.MultiClassAccuracy([softmax, 'label'], ['accuracy_per_class', 'amount_per_class'])
    return [loss]

def AddOptimizerOps_fixsgd(model):
    optimizer.build_sgd(
        model,
        base_learning_rate=0.01,
        policy="fixed",
        momentum=0.9,
        weight_decay=0.004
    )

def AddOptimizerOps_adam(model):
    # Use adam as optimization function
    optimizer.build_adam(
        model,
        base_learning_rate=base_learning_rate
#        policy="step",
#        momentum=0.9,
#        weight_decay=0.004
    )
def AddOptimizerOps_sgd(model):
    """Add optimizer ops."""
    optimizer.build_sgd(model, base_learning_rate=0.01,
                        policy='step', stepsize=1, gamma=0.999,
                        momentum=0.9, nesterov=False)
    
def AddOptimizerOps_nestsgd(model):
    brew.add_weight_decay(model, weight_decay)
    iter = brew.iter(model, "iter")
    lr = model.net.LearningRate(
        [iter],
        "lr",
        base_lr=base_learning_rate,
        policy="step",
        stepsize=step_size,
        gamma=0.1,
    )
    for param in model.GetParams():
        param_grad = model.param_to_grad[param]
        param_momentum = model.param_init_net.ConstantFill(
            [param], param + '_momentum', value=0.0
        )

        # Update param_grad and param_momentum in place
        model.net.MomentumSGDUpdate(
            [param_grad, param_momentum, lr, param],
            [param_grad, param_momentum, param],
            # almost 100% but with room to grow
            momentum=0.9,
            # netsterov is a defenseman for the Montreal Canadiens, but
            # Nesterov Momentum works slightly better than standard momentum
            nesterov=1,
        )
def AddAccuracy(model):
    accuracy = brew.accuracy(model, ["softmax", "label"], "accuracy")
    return accuracy

def OptimizeGradientMemory(model, loss):
    model.net._net = memonger.share_grad_blobs(
        model.net,
        loss,
        set(model.param_to_grad.values()),
        namescope="memaction",
        share_activations=False,
        )
def save_net(INIT_NET, PREDICT_NET, model) :
    extra_params = []
    extra_blobs = []
    for blob in workspace.Blobs():
        name = str(blob)
        if name.endswith("_rm") or name.endswith("_riv"):
            extra_params.append(name)
            extra_blobs.append(workspace.FetchBlob(name))
    for name, blob in zip(extra_params, extra_blobs):
        model.params.append(name)
 
    init_net, predict_net = mobile_exporter.Export(
        workspace, model.net, model.params
    )
     
    with open(PREDICT_NET, 'wb') as f:
        f.write(model.net._net.SerializeToString())
    with open(INIT_NET, 'wb') as f:
        f.write(init_net.SerializeToString())

Adding check-points

In [7]:
import datetime

# Create uniquely named directory under root_folder to output checkpoints to
unique_timestamp = str(datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
checkpoint_dir = os.path.join(root_folder, unique_timestamp)
os.makedirs(checkpoint_dir)
print("Checkpoint output location: ", checkpoint_dir)

# Add checkpoints to a given model
def AddCheckpoints(model, checkpoint_iters, db_type):
    ITER = brew.iter(model, "iter")
    model.Checkpoint([ITER] + model.params, [],
                           db=os.path.join(unique_timestamp, "action_tufts_checkpoint_%05d.lmdb"),
                           db_type="lmdb", every=checkpoint_iters)

Checkpoint output location:  ../classification_net/SingleGPU/2018-12-20_11-41-12


## Defining Training net and Test net creating functions

In [8]:
arg_scope = {"order": "NCHW"}
# TRAINING MODEL
def createTrainModel(training_lmdb_path, batch_size):
    """Create and return a training model, complete with training ops."""
    train_model = model_helper.ModelHelper(name='train_net', arg_scope=arg_scope)
    AddInput(train_model, db=training_lmdb_path, db_type=data_db_type, batch_size=batch_size, noise=1)
    losses = Add_Action_Tufts_Model(train_model,num_classes, image_height, image_width, image_channels, is_test=0)
    train_model.AddGradientOperators(losses)
    AddOptimizerOps_adam(train_model)
    AddCheckpoints(train_model, checkpoint_iters, db_type="lmdb")
    workspace.RunNetOnce(train_model.param_init_net)
    workspace.CreateNet(train_model.net, overwrite=True)
    return train_model

# VALIDATION MODEL
def createValidationModel(validation_lmdb_path, batch_size, with_noise=0):
    """Create and return a test model. Does not include training ops."""
    val_model = model_helper.ModelHelper(name='val_net', arg_scope=arg_scope, init_params=False)
    AddInput(val_model, db=validation_lmdb_path, db_type=data_db_type, batch_size=batch_size, noise=with_noise)
    losses = Add_Action_Tufts_Model(val_model,num_classes, image_height, image_width, image_channels, is_test=1)
    workspace.RunNetOnce(val_model.param_init_net)
    workspace.CreateNet(val_model.net, overwrite=True)
    return val_model
# DEPLOY MODEL
def createDeployModel():
    deploy_model = model_helper.ModelHelper(name="deploy_net", arg_scope=arg_scope, init_params=False)
    Add_Action_Tufts_Model(deploy_model,num_classes, image_height, image_width, image_channels, is_test=1)
    workspace.RunNetOnce(deploy_model.param_init_net)
    workspace.CreateNet(deploy_model.net, overwrite=True)
    return deploy_model

### Train and validate

In [None]:
import math
# initialize the logging variables 
val_loss = np.zeros(num_epoch)
val_total_accuracy = np.zeros(num_epoch)
train_loss = np.zeros(num_epoch)
train_accuracy = np.zeros(num_epoch)
val_class_accuracy = np.zeros((num_epoch, num_classes))
val_class_count = np.zeros(num_classes, dtype=int)
val_count = 0
tot_itr_count = 0
total_time = 0
# defining GPU device and training/ validation networks
device = 2
with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, device)):
    train_model = createTrainModel(training_lmdb_path, batch_size)
    val_model = createValidationModel(validation_lmdb_path, batch_size=batch_size, with_noise=1)

# iteraring the forward/ backword pass for optimizing variables
train_iter_per_epoch = train_data_count // batch_size
val_iter_per_epoch = validation_count // batch_size
# Now, we run the network (forward & backward pass)
for epoch in range(1, num_epoch+1):
    t1 = time.time()
    accuracies = []
    losses = []
    for itr in range(1, train_iter_per_epoch+1):
        # Stopwatch start!
        tot_itr_count += 1
        workspace.RunNet(train_model.net)
        accuracies.append(workspace.FetchBlob('accuracy'))
        losses.append(workspace.FetchBlob('loss'))
        #if not tot_itr_count % disp_interval:
    train_loss[val_count] = np.array(losses).mean()
    train_accuracy[val_count] = np.array(accuracies).mean()
    t2 = time.time()
    dt = t2 - t1
    total_time += dt
    # Validate every epoch
    print(".... epoch:{}/{}   el_time:{}".format(epoch, num_epoch, dt))
    print("training loss:{}, training accuracy:{}".format(train_loss[val_count], train_accuracy[val_count]))
    losses = []
    accuracies = []
    accuracies_per_class = []
    class_count = []
    for _ in range(val_iter_per_epoch):        
        workspace.RunNet(val_model.net)
        losses.append(workspace.FetchBlob('loss'))
        accuracies.append(workspace.FetchBlob('accuracy'))
        accuracies_per_class.append(workspace.FetchBlob('accuracy_per_class'))
        if epoch == num_epoch:
            class_count.append(workspace.FetchBlob('amount_per_class'))
    val_loss[val_count] = np.array(losses).mean()
    val_total_accuracy[val_count] = np.array(accuracies).mean()
    val_class_accuracy[val_count, :] = np.array(accuracies_per_class).mean(axis=0)
    if epoch == num_epoch:
        val_class_count = np.array(class_count).sum(axis=0)
    print("Validation Loss:{}, Validation total accuracy:{}, Per class validation accuracy:{}"
          .format(val_loss[val_count],val_total_accuracy[val_count], val_class_accuracy[val_count, :] ))
    val_count += 1

print("Per class data count: Sitting={}, Sit-to-Stand={}, Standing={}, Walking={}, Stand-to-Sit={}"
      .format(val_class_count[0], val_class_count[1],
            val_class_count[2], val_class_count[3],
            val_class_count[4]))
print('total elapsed time is {}'.format(total_time))



### Plotting validation results

In [None]:
plt.figure(figsize=(15,12))
plt.title("Training Loss vs. Validation Accuracy and loss")
plt.plot(range(1, num_epoch+1), train_loss, 'b')
plt.plot(range(1, num_epoch+1), val_loss, 'c')
plt.plot(range(1, num_epoch+1), train_accuracy, 'm')
plt.plot(range(1, num_epoch+1), val_total_accuracy, 'r')
plt.xlabel("Training epoch")
plt.legend(('Training Loss', 'Validation loss','Training accuracy','Validation accuracy'), loc='upper right')
plt.xlim((0,num_epoch))
plt.ylim((0, 1.5))
plt.grid(b=1,which='major', linestyle='-', linewidth=2)

### Test the network on Test dataset

In [None]:
import operator
test_batch_size = 1
# confusion matrix
cmat = np.zeros((5,5))
with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, device)):
    test_model = createValidationModel(testing_lmdb_path, batch_size=test_batch_size, with_noise=0)
# iteraring the forward/ backword pass for optimizing variables
test_iter_per_epoch = test_data_count // test_batch_size
# Now, we run the network (forward & backward pass)
losses = []
accuracies = []
accuracies_per_class = []
class_count = []
pred = []
true = []
Ids = []
t0 = time.time()
for _ in range(test_iter_per_epoch):        
    workspace.RunNet(test_model.net)
#    losses.append(workspace.FetchBlob('loss'))
    accuracies.append(workspace.FetchBlob('accuracy'))
#    accuracies_per_class.append(workspace.FetchBlob('accuracy_per_class'))
    class_count.append(workspace.FetchBlob('amount_per_class'))
    results = workspace.FetchBlob('softmax')[0]
    label = workspace.FetchBlob('label')[0]
    Ids.append(workspace.FetchBlob("ID"))
    max_index, max_value = max(enumerate(results), key=operator.itemgetter(1))
    pred.append(classes[max_index])
    true.append(label)
    # Update confusion matrix
    cmat[label,max_index] += 1
t1 = time.time()
dt = t1 - t0
#test_loss = np.array(losses).mean()
test_total_accuracy = np.array(accuracies).mean()
test_class_accuracy = np.diag(cmat)/np.sum(cmat, axis=1)
test_class_count = np.array(class_count).sum(axis=0)
print("Test total accuracy:{}, Per class test accuracy:{}"
        .format(test_total_accuracy, test_class_accuracy))

print("Per class data count: Sitting={}, Sit-to-Stand={}, Standing={}, Walking={}, Stand-to-Sit={}"
      .format(test_class_count[0], test_class_count[1],
            test_class_count[2], test_class_count[3],
            test_class_count[4]))
print('total elapsed time is {}'.format(dt))   

### plot confusion matrix

In [None]:
# Plot confusion matrix
fig = plt.figure(figsize=(5,5))
plt.tight_layout()
ax = fig.add_subplot(111)
res = ax.imshow(cmat, cmap=plt.cm.summer, interpolation='nearest')
width, height = cmat.shape
for x in xrange(width):
    for y in xrange(height):
        ax.annotate(str(cmat[x,y]), xy=(y, x),horizontalalignment='center',verticalalignment='center')
plt.xticks(range(width), classes, rotation=0)
plt.yticks(range(height), classes, rotation=0)
ax.set_xlabel('Predicted Class')
ax.set_ylabel('True Class')
plt.title('Confusion Matrix for test')
plt.show()

In [None]:
print(int(Ids[0]))

In [None]:
# save the prediction results in a text file
pred_file = os.path.join(checkpoint_dir, 'test_prediction.txt')
testing_labels_path = osp.join(testing_lmdb_path, 'valid_data_list.txt')
labels_handler = open(testing_labels_path, "r")
lines = labels_handler.readlines()
with open(pred_file, 'wb') as f:
    f.write(" clip address, true class, predicted class \n")
    for j in range(len(pred)):
        indx = int(Ids[j]-1)
        f.write(lines[indx].rstrip()+'\t'+ classes[true[j]] + '\t'+ pred[j] + '\n')
        
labels_handler.close()
f.close()

In [None]:
po_tion_im = workspace.FetchBlob('data')
print(classes[int(workspace.FetchBlob('label'))])

po_tion_im = po_tion_im[0,:,:,:]
print(po_tion_im.shape)

### Save trained model

In [None]:
# Run init net and create main net
#workspace.RunNetOnce(deploy_model.param_init_net)
#workspace.CreateNet(deploy_model.net, overwrite=True)
with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, device)):
    deploy_model = createDeployModel()

# Locations of output files
full_init_net_out = os.path.join(checkpoint_dir, init_net_out)
full_predict_net_out = os.path.join(checkpoint_dir, predict_net_out)

save_net(full_init_net_out, full_predict_net_out, deploy_model)
print("Model saved as " + full_init_net_out + " and " + full_predict_net_out)




If your network does not have batch normalization just use the following script

In [None]:
# Use mobile_exporter's Export function to acquire init_net and predict_net
#init_net, predict_net = mobile_exporter.Export(workspace, deploy_model.net, deploy_model.params)

# Locations of output files
#full_init_net_out = os.path.join(checkpoint_dir, init_net_out)
#full_predict_net_out = os.path.join(checkpoint_dir, predict_net_out)

# Simply write the two nets to file
#with open(full_init_net_out, 'wb') as f:
#    f.write(init_net.SerializeToString())
#with open(full_predict_net_out, 'wb') as f:
#    f.write(predict_net.SerializeToString())