In [None]:
!pip install torch torchvision

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt 

from scipy.io import loadmat
import numpy as np
import torch.utils.data
from torchvision import transforms, datasets
from torch.utils.data import Dataset, DataLoader
from scipy import misc
import matplotlib.patches as patches
from datetime import datetime
from scipy.misc import imshow
import model_code.multi_modal_nn as mmnn
from model_code.face_landmark_dataset import FaceLandmarksDataset
import model_code.test as test
import model_code.train as train
 

feature_size = 136
output_size = 1
image_dim = 108
im_height = 60
im_width = 108

In [3]:
def extract_data(indices, suffix = '', include_pos = True):
  ftrs = torch.Tensor()
  ftrs = ftrs.type(torch.cuda.FloatTensor)  
  gz = torch.Tensor()
  gz = gz.type(torch.cuda.FloatTensor) 
    
  eye_reg = torch.Tensor()
  eye_reg = eye_reg.type(torch.cuda.IntTensor)
    
    
  img_loc = np.asarray([])
#   img_loc = img_loc.type(torch.cuda.FloatTensor) 
  
  for index in indices:
    data = loadmat(str(index) + suffix + '_lmarks_location_eye.mat')
    
    #Landmark features
    ftrs_single = torch.from_numpy(data['ftrs'])
    
    
    ftrs_single = ftrs_single.type(torch.cuda.FloatTensor)  

    
    ftrs = torch.cat((ftrs, ftrs_single))
    
    #Gaze features

    gz_single = torch.from_numpy(data['gz'])
    gz_single = gz_single.type(torch.cuda.FloatTensor)  

    gz_single = torch.t(gz_single)

    gz = torch.cat((gz, gz_single))
    
    
    #Eye regions should be n X 4 size
    
    eye_reg_single = torch.from_numpy(data['eye_reg'])
    eye_reg_single = eye_reg_single.type(torch.cuda.IntTensor)
    
    
    eye_reg = torch.cat((eye_reg, eye_reg_single))
    
    
    #Get image location
    
    img_loc_single = data['location']
        
    img_loc = np.concatenate((img_loc, img_loc_single))
    
  return(ftrs, gz, eye_reg, img_loc)


In [4]:
def test_model(net, test_face_landmarks_dataset, batch_size = 128):
  net.eval()
#   y_total = torch.Tensor().cuda()
#   y_hat_total = torch.Tensor().cuda()

  accuracies = []

  testloader = torch.utils.data.DataLoader(dataset=test_face_landmarks_dataset , batch_size=batch_size, shuffle=True)
  
  then = datetime.now()

  for i, (train_batch) in enumerate(testloader):

      #calculating time
      now  = datetime.now()  
      duration = now - then 

      seconds = duration.total_seconds()
      minutes = divmod(seconds, 60)[0]


      print(datetime.now().time())
      print('Time for testing passed %d minutes and %d seconds' % (minutes, seconds))
      #for measuring starting time
      then = datetime.now()


      (x_batch, y_batch) = train_batch
      optimizer.zero_grad()
      yhat = torch.Tensor()
      yhat = yhat.type(torch.cuda.FloatTensor)  


      yhat = net(x_batch) 


      yhat = yhat > 0.5
      accuracy = accuracy_score(yhat.cpu().numpy(), y_batch.cpu().numpy())



      print('This batch accuracy is %f %%' % (100.0 * accuracy) )
      print('Completed %d/%d  %f %%' % (batch_size*(i + 1), len(test_face_landmarks_dataset) , 100.0 * batch_size*(i + 1) / len(test_face_landmarks_dataset)))

      yhat = ftrs.type(torch.cuda.FloatTensor)  

    
      accuracies.append(accuracy)
  

  return (accuracies)

  

In [5]:
test_indices = [404,407,410]
train_indices = [401,402,403,405]


ftrs = torch.Tensor()
gz = torch.Tensor()

(ftrs, gz, eye_reg, img_loc) = extract_data(train_indices)

In [6]:
#Take out test data

test_ftrs = torch.Tensor().cuda()
test_gz = torch.Tensor().cuda()

(test_ftrs, test_gz, test_eye_reg, test_img_loc) = extract_data(test_indices)

print(test_gz.shape)
print(test_ftrs.shape)


torch.Size([263857, 1])
torch.Size([263857, 136])


In [7]:
(net,optimizer, scheduler) = mmnn.get_net_instance()
up = 0
down= 100
face_landmarks_dataset = FaceLandmarksDataset(ftrs = ftrs[up:down], eye_regions= eye_reg.cuda()[up:down], locations=img_loc[up:down],  gz = gz.cuda()[up:down], train_transforms=None, test_transforms=None)

In [8]:
x_test = test_ftrs
y_test = test_gz

test_face_landmarks_dataset = FaceLandmarksDataset(ftrs = test_ftrs, eye_regions=test_eye_reg.cuda(), locations=test_img_loc,  gz = test_gz.cuda(), train_transforms=None, test_transforms=None, load_type='test')

(net,optimizer, scheduler) = mmnn.get_net_instance()

# net.load_state_dict(torch.load('model_code/log_results/multimodal_model_14_64.py'))

In [10]:
import torch.onnx

train_id = 20
#18952 = 56x112x3 + 136
dummy_input = torch.randn(1, 18952, device='cuda')
print(net)
net.load_state_dict(torch.load('model_code/log_results/24/models/multimodal_model_24_256_epoch_0.py'))

torch.onnx.export(net,dummy_input, "model_20.onnx")


MultiModalNetwork(
  (landmarkUnit): LandmarkUnit(
    (relu): ReLU()
    (fc1): Linear(in_features=136, out_features=68, bias=True)
    (fc1_bn): BatchNorm1d(68, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (fc2): Linear(in_features=68, out_features=68, bias=True)
    (fc2_bn): BatchNorm1d(68, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (fc3): Linear(in_features=68, out_features=34, bias=True)
    (fc3_bn): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (fc5): Linear(in_features=34, out_features=34, bias=True)
    (fc5_bn): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (imageUnit): ImageUnit(
    (conv1): Conv2d(3, 32, kernel_size=(3, 5), stride=(1, 1), padding=(1, 2))
    (conv1_bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu1): ReLU()
    (conv2): Conv2d(32, 32, kernel_size=(3, 5), stride=(1, 1), padding=(1, 2))
  

  x[0:135:2] = x[0:135:2] / 608.0
  x[1:136:2] = x[1:136:2] / 342.0
  print(imageUnitOut.shape, landmarksUnitOut.shape)


(torch.Size([1, 1024]), torch.Size([1, 34]))


In [None]:
batch_size = 32

output_file = 'model_code/log_results/proba.txt' 
output_model_file = 'model_code/log_results/proba'

net = train.train_model(scheduler,optimizer,output_model_file, net, face_landmarks_dataset , 30, 1, False, batch_size = batch_size, output_file = output_file, train_id = 0)

(torch.Size([32, 1024]), torch.Size([32, 34]))
(torch.Size([32, 1024]), torch.Size([32, 34]))
(torch.Size([32, 1024]), torch.Size([32, 34]))
(torch.Size([4, 1024]), torch.Size([4, 34]))
Test size is 6000
21:06:38.044312
Time for testing passed 0 minutes and 0 seconds
(torch.Size([256, 1024]), torch.Size([256, 34]))
This batch accuracy is 75.390625 %
Completed 256/6000  4.266667 %
Total correct predicted number is 193 out of 256
21:06:38.422132
Time for testing passed 0 minutes and 0 seconds
(torch.Size([256, 1024]), torch.Size([256, 34]))
This batch accuracy is 74.218750 %
Completed 512/6000  8.533333 %
Total correct predicted number is 190 out of 256
21:06:38.798786
Time for testing passed 0 minutes and 0 seconds
(torch.Size([256, 1024]), torch.Size([256, 34]))
This batch accuracy is 73.828125 %
Completed 768/6000  12.800000 %
Total correct predicted number is 189 out of 256
21:06:39.181416
Time for testing passed 0 minutes and 0 seconds
(torch.Size([256, 1024]), torch.Size([256, 34])

21:06:52.511818
Time for testing passed 0 minutes and 0 seconds
(torch.Size([256, 1024]), torch.Size([256, 34]))
This batch accuracy is 75.000000 %
Completed 2816/6000  46.933333 %
Total correct predicted number is 192 out of 256
21:06:52.890818
Time for testing passed 0 minutes and 0 seconds
(torch.Size([256, 1024]), torch.Size([256, 34]))
This batch accuracy is 71.484375 %
Completed 3072/6000  51.200000 %
Total correct predicted number is 183 out of 256
21:06:53.271641
Time for testing passed 0 minutes and 0 seconds
(torch.Size([256, 1024]), torch.Size([256, 34]))
This batch accuracy is 74.609375 %
Completed 3328/6000  55.466667 %
Total correct predicted number is 191 out of 256
21:06:53.653943
Time for testing passed 0 minutes and 0 seconds
(torch.Size([256, 1024]), torch.Size([256, 34]))
This batch accuracy is 72.656250 %
Completed 3584/6000  59.733333 %
Total correct predicted number is 186 out of 256
21:06:54.034244
Time for testing passed 0 minutes and 0 seconds
(torch.Size([256

21:07:06.956058
Time for testing passed 0 minutes and 0 seconds
(torch.Size([256, 1024]), torch.Size([256, 34]))
This batch accuracy is 71.484375 %
Completed 5632/6000  93.866667 %
Total correct predicted number is 183 out of 256
21:07:07.337700
Time for testing passed 0 minutes and 0 seconds
(torch.Size([256, 1024]), torch.Size([256, 34]))
This batch accuracy is 73.437500 %
Completed 5888/6000  98.133333 %
Total correct predicted number is 188 out of 256
21:07:07.513185
Time for testing passed 0 minutes and 0 seconds
(torch.Size([112, 1024]), torch.Size([112, 34]))
This batch accuracy is 78.571429 %
Completed 6144/6000  102.400000 %
Total correct predicted number is 88 out of 256
(torch.Size([32, 1024]), torch.Size([32, 34]))
(torch.Size([32, 1024]), torch.Size([32, 34]))
(torch.Size([32, 1024]), torch.Size([32, 34]))
(torch.Size([4, 1024]), torch.Size([4, 34]))
Test size is 6000
21:07:09.181506
Time for testing passed 0 minutes and 0 seconds
(torch.Size([256, 1024]), torch.Size([256,

In [None]:


# (error, accuracy) = test_model(net, test_ftrs, test_gz)
(accuracy) = test.test_model(optimizer,net, test_face_landmarks_dataset, 32)


print('Total accuracies:' )
print(accuracies)

In [None]:
# torch.save(net.state_dict(), 'new_multi_modal_model.py')


In [None]:
# net = MultiModalNetwork().cuda()

# for param in net.landmarkUnit.parameters():
#     param.requires_grad = False
# net.load_state_dict(torch.load('multi_modal_model.py'))
# # model.eval()