In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
from pathlib import Path
import numpy as np
from PIL import Image
from matplotlib import pyplot as plt
import torch
import torch.nn as nn
import torchvision.transforms.functional as TF
from torchvision import models
from torch.utils.data import Dataset
from torch.utils.data import random_split, DataLoader
#from torchmetrics.functional import accuracy
from torch.nn.utils.rnn import pad_sequence
import torchvision.transforms as T
import argparse
import imutils
import time
import dlib
import cv2
from google.colab.patches import cv2_imshow
import os
import math
import csv

import sys
sys.path.append('/content/drive/MyDrive/NAPOLI/code/mtcnn/')
from mtcnn import MTCNN

# **Preparazione dataset per NN CON IMMAGINI + FEATURES**

## **Head ad Gaze pose estimation**

In [None]:
eyes_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_eye.xml")

class PoseEstimator:
    def __init__(self, weights=None):
      device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
      self.detector =  MTCNN()

    def detect_faces(self, image, image_shape_max=640):
      '''
      Performs face detection using retinaface method with speed boost and initial quality checks based on whole image size

      Parameters
      ----------
      image : uint8
          image for face detection.
      image_shape_max : int, optional
          maximum size (in pixels) of image. The default is None.

      Returns
      -------
      float array
          bounding boxes and score.
      float array
          landmarks.

      '''
      image_shape = image.shape[:2]

      # perform image resize for faster detection
      if image_shape_max:
          scale_factor = max([1, max(image_shape) / image_shape_max])
      else:
          scale_factor = 1

      if scale_factor > 1:
          scaled_image = cv2.resize(image, (0, 0), fx = 1/scale_factor, fy = 1/scale_factor)

          start = time.time()
          print("[INFO[ performing face detection with MTCNN...")
          boxes, points = self.detector.detect_faces(scaled_image)
          end = time.time()
          print("[INFO] face detection took {:.4f} seconds".format(end - start))

          boxes[:,:4] *= scale_factor
          points *= scale_factor
      else:
          start = time.time()
          print("[INFO[ performing face detection with MTCNN...")
          boxes, points = self.detector.detect_faces(image)
          end = time.time()
          print("[INFO] face detection took {:.4f} seconds".format(end - start))

      return boxes, points

    def draw_landmarks(self, image, boxes, points):
      '''
      Parameters
      ----------
      frame : TYPE
          RGB image
      bb : TYPE - Array of float64, Size = (5,)
          coordinates of bounding box for the selected face.
      points : TYPE - Array of float32, Size = (10,)
          coordinates of landmarks for the selected faces.

      Returns
      -------
      None.

      '''
      font = cv2.FONT_HERSHEY_COMPLEX # Text in video
      font_size = 0.6
      blue = (0, 0, 255)
      green = (0,128,0)
      red = (255, 0, 0)

      #boxes, points = self.detect_faces(image)

      boxes = boxes.astype(int)
      points = points.astype(int)
      # draw rectangle and landmarks on face
      cv2.rectangle(image, (boxes[0], boxes[1]), (boxes[2], boxes[3]), red, 1)
      cv2.circle(image, (int(points[0]), int(points[5])), 2, blue, 2)# left eye
      cv2.circle(image, (int(points[1]), int(points[6])), 2, blue, 2)# right eye
      cv2.circle(image, (int(points[2]), int(points[7])), 2, blue, 2)# nose
      cv2.circle(image, (int(points[3]), int(points[8])), 2, blue, 2)# mouth - left
      cv2.circle(image, (int(points[4]), int(points[9])), 2, blue, 2)# mouth - right

      cv2_imshow(image)

      #w = int(boxes[2])-int(boxes[0]) # width
      #h = int(boxes[3])-int(boxes[1]) # height


    def one_face(self, frame, bbs, pointss):
      """
      Parameters
      ----------
      frame : TYPE
          RGB image (numpy array).
      bbs : TYPE - Array of flaot64, Size = (N, 5)
          coordinates of bounding boxes for all detected faces.
      pointss : TYPE - Array of flaot32, Size = (N, 10)
          coordinates of landmarks for all detected faces.
      Returns
      -------
      bb : TYPE - Array of float 64, Size = (5,)
          coordinates of bounding box for the selected face.
      points : TYPE
          coordinates of five landmarks for the selected face.
      """
      # select only process only one face (center ?)
      offsets = [(bbs[:,0]+bbs[:,2])/2-frame.shape[1]/2,
                (bbs[:,1]+bbs[:,3])/2-frame.shape[0]/2]
      offset_dist = np.sum(np.abs(offsets),0)
      index = np.argmin(offset_dist)
      bb = bbs[index]
      points = pointss[:,index]
      return bb, points

    def find_smile(self, points):
      dx_eyes = points[1] - points[0]# between pupils
      dx_mout = points[4] - points[3]# between mouth corners
      smile_ratio = dx_mout/dx_eyes
      return smile_ratio

    def find_roll(self, points):
      """
      Parameters
      ----------
      points : TYPE - Array of float32, Size = (10,)
          coordinates of landmarks for the selected faces.
      Returns
      -------
      TYPE
          roll of face.

      """
      return points[6] - points[5]

    def find_yaw(self, points):
      """
      Parameters
      ----------
      points : TYPE - Array of float32, Size = (10,)
          coordinates of landmarks for the selected faces.
      Returns
      -------
      TYPE
          yaw of face.

      """
      le2n = points[2] - points[0]
      re2n = points[1] - points[2]
      return le2n - re2n

    def find_pitch(self, points):
      """
      Parameters
      ----------
      points : TYPE - Array of float32, Size = (10,)
          coordinates of landmarks for the selected faces.
      Returns
      -------
      Pitch
      """
      eye_y = (points[5] + points[6]) / 2
      mou_y = (points[8] + points[9]) / 2
      e2n = eye_y - points[7]
      n2m = points[7] - mou_y
      return e2n / n2m


In [None]:
#preprocessing DATA per training della SVM

class dataset(Dataset):

  def __init__(self, csv_file):
    self.sample_elements = []
    # sample_elements = [(sample_1), (sample_2),..]
    # sample_1 = [(FEATURES OCCHI, POSIZIONE TESTA), LABLE)] etc.
    self.roll_pitch_yaw = []
    self.pupil_coords = []
    self.paths = []
    self.labels = []
    self.faces_cropped = []
    self.names = []
    self.csv_file = csv_file
    self.model = models.resnet50(weights='DEFAULT')
    self.extract_path_label_from_csv()


  def extract_path_label_from_csv(self):

    #read CSV
    with open(self.csv_file, mode ='r') as file:

      csvFile = csv.reader(file)

      for lines in csvFile:
        if '0_0' not in lines[0]:
          id_class = lines[1]
          name = lines[0]
          self.paths.append("/content/drive/MyDrive/NAPOLI/DATASET_FACE/PNG/"+name[:3]+"/"+name+".png")
          self.labels.append(id_class)
          self.names.append(name)

    print(self.paths) #[0])
    print(self.labels) #[0])

    return

  def extract_eyestensor_headpose(self):
    #print(self.path)
    est = PoseEstimator()

    for i in self.paths:
      print(i)
      rpy = []
      pupils = []
      image = cv2.imread(i)

      image = imutils.resize(image, width=600)
      image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)# convert to rgb
      image_rgb = cv2.flip(image_rgb, 1)# flip for user friendliness

      bounding_boxes, landmarks = est.detect_faces(image_rgb)

      #print(bounding_boxes)

      if len(bounding_boxes) > 0:
        if len(bounding_boxes) >= 2:
          bounding_boxes, landmarks = est.one_face(image_rgb, bounding_boxes, landmarks)
          roll = est.find_roll(landmarks)
          pitch = est.find_pitch(landmarks)
          yaw = est.find_yaw(landmarks)
          lx = landmarks[0]
          ly = landmarks[5]
          rx = landmarks[1]
          ry = landmarks[6]

          '''
          bounding_boxes = bounding_boxes.astype(int)
          for (x, y, w, h) in bounding_boxes[0]:
            face_crop = image[y:y + h, x:x + w]

          self.faces_cropped.append(face_crop)
          print(face_crop)
          print(len(face_crop))
          '''
        else:
          roll = est.find_roll(landmarks)
          pitch = est.find_pitch(landmarks)
          yaw = est.find_yaw(landmarks)
          lx = landmarks[0]
          ly = landmarks[5]
          rx = landmarks[1]
          ry = landmarks[6]

          print(bounding_boxes)

          '''
          #bounding_boxes = bounding_boxes.astype(int)
          for (x, y, w, h) in bounding_boxes[0]:
            face_crop = image[y:y + h, x:x + w]

          print(face_crop)
          print(len(face_crop))
          self.faces_cropped.append(face_crop)
          '''

      else:
        print("NO FACE FOUND")
        roll = 0.0
        pitch = 0.0
        yaw = 0.0
        lx = 0.0
        ly = 0.0
        rx = 0.0
        ry = 0.0


      gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)# convert to rgb
      eyes = eyes_cascade.detectMultiScale(gray, 1.1, 5, minSize = [40,40])
      #print(eyes)

      #ESTRAZIONE HEAD POSE
      rpy = [roll, pitch, yaw]
      pupils = [lx, ly, rx, ry]

      self.roll_pitch_yaw.append(rpy)
      self.pupil_coords.append(pupils)


  def process_data(self):

    print(len(self.labels))
    for i in range(len(self.labels)):
      j = 0
      head_pose = np.array(self.roll_pitch_yaw[i])
      pupil_pose = np.array(self.pupil_coords[i])

    #  QUI DOBBIAMO CREARE I SAMPLE DEL DATASET

      #print(len(eyes))
      self.sample_elements.append([np.array(self.names[i]), np.array(head_pose), np.array(pupil_pose)])

    return self.sample_elements, self.labels

  def __len__(self):
    # returns the number of samples in our dataset
    return len(self.sample_elements)

  def __getitem__(self, i):
    return self.sample_elements[i]





In [None]:
data = dataset('/content/drive/MyDrive/NAPOLI/Dataset_volti_num_ultime.csv')

['/content/drive/MyDrive/NAPOLI/DATASET_FACE/PNG/1_1/1_1_a(1).png', '/content/drive/MyDrive/NAPOLI/DATASET_FACE/PNG/1_1/1_1_a(2).png', '/content/drive/MyDrive/NAPOLI/DATASET_FACE/PNG/1_1/1_1_a(3).png', '/content/drive/MyDrive/NAPOLI/DATASET_FACE/PNG/1_1/1_1_a(4).png', '/content/drive/MyDrive/NAPOLI/DATASET_FACE/PNG/1_1/1_1_a(5).png', '/content/drive/MyDrive/NAPOLI/DATASET_FACE/PNG/1_1/1_1_a(6).png', '/content/drive/MyDrive/NAPOLI/DATASET_FACE/PNG/1_1/1_1_a(7).png', '/content/drive/MyDrive/NAPOLI/DATASET_FACE/PNG/1_1/1_1_a(8).png', '/content/drive/MyDrive/NAPOLI/DATASET_FACE/PNG/1_1/1_1_a(9).png', '/content/drive/MyDrive/NAPOLI/DATASET_FACE/PNG/1_1/1_1_a(10).png', '/content/drive/MyDrive/NAPOLI/DATASET_FACE/PNG/1_1/1_1_a(11).png', '/content/drive/MyDrive/NAPOLI/DATASET_FACE/PNG/1_1/1_1_a(12).png', '/content/drive/MyDrive/NAPOLI/DATASET_FACE/PNG/1_1/1_1_a(13).png', '/content/drive/MyDrive/NAPOLI/DATASET_FACE/PNG/1_1/1_1_a(14).png', '/content/drive/MyDrive/NAPOLI/DATASET_FACE/PNG/1_1/1_1_

In [None]:
data.extract_eyestensor_headpose()

/content/drive/MyDrive/NAPOLI/DATASET_FACE/PNG/1_1/1_1_a(1).png
[INFO[ performing face detection with MTCNN...
[INFO] face detection took 0.1831 seconds
[[218.73394909 305.33268112 380.56492247 528.29256896   0.99960345]]
/content/drive/MyDrive/NAPOLI/DATASET_FACE/PNG/1_1/1_1_a(2).png
[INFO[ performing face detection with MTCNN...
[INFO] face detection took 0.0553 seconds
[[225.64135327 302.66041025 396.97103173 536.91371975   0.99901688]]
/content/drive/MyDrive/NAPOLI/DATASET_FACE/PNG/1_1/1_1_a(3).png
[INFO[ performing face detection with MTCNN...
[INFO] face detection took 0.0530 seconds
[[213.52393369 311.20970319 370.88877611 518.3644875    0.99968433]]
/content/drive/MyDrive/NAPOLI/DATASET_FACE/PNG/1_1/1_1_a(4).png
[INFO[ performing face detection with MTCNN...
[INFO] face detection took 0.0547 seconds
[[224.33454812 309.08393993 380.06600337 512.8218108    0.99975461]]
/content/drive/MyDrive/NAPOLI/DATASET_FACE/PNG/1_1/1_1_a(5).png
[INFO[ performing face detection with MTCNN...
[

In [None]:
samples, labels = data.process_data()

906


# CSV 906 samples

In [None]:
#WRITE THE CSV FILE

#write the header
header = []

header.append("name")
header.append("roll")
header.append("pitch")
header.append("yawn")
header.append("lx")
header.append("ly")
header.append("rx")
header.append("ry")
header.append("label")
print(header)
print(len(header))


f = "/content/drive/MyDrive/NAPOLI/dataset.csv"

with open(f, 'w') as csvfile:
  writer = csv.writer(csvfile)

  writer.writerow(header)

  j = 0
  for i in samples:
    m = np.concatenate((i[0], i[1], i[2], labels[j]), axis=None)
    j += 1

    writer.writerow(m)
    print(m)
    print(len(m))

    #print(len(i[0]))
    #print(len(i[1]))
  print("------------------------------")
  print(labels)


['name', 'roll', 'pitch', 'yawn', 'lx', 'ly', 'rx', 'ry', 'label']
9
['1_1_a(1)' '3.6223755' '0.6795519' '-27.87616' '250.21185' '387.73267'
 '328.97845' '391.35504' '1']
9
['1_1_a(2)' '16.792542' '0.6994347' '-24.672058' '270.08667' '383.69733'
 '351.5263' '400.48987' '1']
9
['1_1_a(3)' '2.10141' '0.55794036' '-47.104156' '240.21048' '385.0185'
 '313.03577' '387.1199' '1']
9
['1_1_a(4)' '7.389801' '0.4944545' '-40.626953' '254.92618' '379.477'
 '329.40176' '386.8668' '1']
9
['1_1_a(5)' '10.037537' '0.5145665' '-22.966492' '256.39172' '375.81378'
 '340.19885' '385.85132' '1']
9
['1_1_a(6)' '5.808197' '0.5358786' '-24.831055' '267.4815' '381.74017'
 '346.1756' '387.54837' '1']
9
['1_1_a(7)' '12.366119' '0.34728438' '-23.543945' '261.9678' '368.61078'
 '346.03128' '380.9769' '1']
9
['1_1_a(8)' '9.559937' '0.80432415' '1.4777222' '278.76788' '388.26666'
 '360.56763' '397.8266' '1']
9
['1_1_a(9)' '10.600159' '0.70386225' '-0.8757324' '279.40295' '386.4222'
 '360.141' '397.02237' '1']
9
['1

# CSV 1812 samples (data augmentation)

In [None]:
#WRITE THE CSV FILE

#write the header
header = []

header.append("name")
header.append("roll")
header.append("pitch")
header.append("yawn")
header.append("lx")
header.append("ly")
header.append("rx")
header.append("ry")
header.append("label")
print(header)
print(len(header))

f = "/content/drive/MyDrive/NAPOLI/dataset.csv"
fn = "/content/drive/MyDrive/NAPOLI/dataset_new.csv"

with open(fn, 'w') as csvfilenew:
  writer = csv.writer(csvfilenew)
  writer.writerow(header)

  with open(f, 'r') as csvfile:
    next(csvfile)
    reader = csv.reader(csvfile)

    for row in reader:
      print(row)
      writer.writerow(row)
      writer.writerow([row[0]+'_change', row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8]])


['name', 'roll', 'pitch', 'yawn', 'lx', 'ly', 'rx', 'ry', 'label']
9
['1_1_a(1)', '3.6223755', '0.6795519', '-27.87616', '250.21185', '387.73267', '328.97845', '391.35504', '1']
['1_1_a(2)', '16.792542', '0.6994347', '-24.672058', '270.08667', '383.69733', '351.5263', '400.48987', '1']
['1_1_a(3)', '2.10141', '0.55794036', '-47.104156', '240.21048', '385.0185', '313.03577', '387.1199', '1']
['1_1_a(4)', '7.389801', '0.4944545', '-40.626953', '254.92618', '379.477', '329.40176', '386.8668', '1']
['1_1_a(5)', '10.037537', '0.5145665', '-22.966492', '256.39172', '375.81378', '340.19885', '385.85132', '1']
['1_1_a(6)', '5.808197', '0.5358786', '-24.831055', '267.4815', '381.74017', '346.1756', '387.54837', '1']
['1_1_a(7)', '12.366119', '0.34728438', '-23.543945', '261.9678', '368.61078', '346.03128', '380.9769', '1']
['1_1_a(8)', '9.559937', '0.80432415', '1.4777222', '278.76788', '388.26666', '360.56763', '397.8266', '1']
['1_1_a(9)', '10.600159', '0.70386225', '-0.8757324', '279.40295',

# **New ClassNet (13 CNN + Classification Part)**

In [4]:
import pandas as pd
df = pd.read_csv("/content/drive/MyDrive/NAPOLI/dataset_final.csv", delimiter=',', header=0)

In [5]:
df.shape
print(df.shape)
print(df)

(1006, 9)
           name       roll     pitch       yawn         lx         ly  \
0      0_0_a(1)   7.348236  1.199448 -10.212616  269.77344  419.67825   
1      0_0_a(2)   4.940735  1.875357  -8.314484  267.75350  444.98190   
2      0_0_a(3)  13.642975  1.213234  30.171082  340.88083  427.08063   
3      0_0_a(4)  18.102783  1.582307  26.786377  396.34027  444.70786   
4      0_0_a(5)   1.709778  1.007026  57.224090  388.57280  415.17035   
...         ...        ...       ...        ...        ...        ...   
1001   3_3_d(7)  12.575684  0.815834  48.365570  366.52870  421.63547   
1002   3_3_d(8)  11.401001  0.818931  46.380370  362.56080  420.44623   
1003   3_3_d(9)  12.137817  0.842395  50.432830  361.39590  420.39407   
1004  3_3_d(10)  12.056610  0.869360  47.788757  361.45767  421.98425   
1005  3_3_d(11)  12.298462  0.875665  45.184540  362.45734  423.91116   

             rx         ry  label  
0     350.53073  427.02650      0  
1     349.03833  449.92264      0  
2    

In [6]:
from torch.nn import Module, Conv2d, Linear, MaxPool2d, ReLU, LogSoftmax, Dropout
from torch import flatten
# set the matplotlib backend so figures can be saved in the background
import matplotlib
matplotlib.use("Agg")
# import the necessary packages
from sklearn.metrics import classification_report
from torch.utils.data import random_split
import torch.utils.data as data_utils
from torchvision.transforms import ToTensor
from torchvision.datasets import KMNIST
from torch.optim import Adam, Adagrad
from torch import nn
import matplotlib.pyplot as plt
import numpy as np
import argparse
import torch
import time
from torchvision import transforms
from sklearn.preprocessing import normalize


class ClassNet(Module):

  def __init__(self, numChannels, classes):
		# call the parent constructor

    super(ClassNet, self).__init__()  #input(30, 3, 224, 224)
		# initialize first set of CONV => RELU => POOL layers    [(I - K +2 *P) / S] +1
    self.conv1 = Conv2d(in_channels=numChannels, out_channels=64, kernel_size=(3, 3), stride = 1 , padding = 1)                 #(30, 224, 224, 64) 5
    self.relu1 = ReLU()

    self.conv2 = Conv2d(in_channels=64, out_channels=64, kernel_size=(3, 3), stride = 1 , padding = 1)                 #(30, 224, 224, 64) 5
    self.relu2 = ReLU()

    self.maxpool1 = MaxPool2d(kernel_size=(2, 2), stride=(2, 2))                                    #(30, 64, 112, 112)   #nn.AdaptiveMaxPool2d

		# initialize second set of CONV => RELU => POOL layers
    self.conv3 = Conv2d(in_channels=64, out_channels=128, kernel_size=(3, 3), stride = 1 , padding = 1)  #(30, 50, 1, 1022)  5
    self.relu3 = ReLU()

    self.conv4 = Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), stride = 1 , padding = 1)  #(30, 50, 1, 1022)  5
    self.relu4 = ReLU()

    self.maxpool2 = MaxPool2d(kernel_size=(2, 2), stride=(2, 2))  #(30, 50, 1, 511)

    # initialize second set of CONV => RELU => POOL layers
    self.conv5 = Conv2d(in_channels=128, out_channels=256, kernel_size=(3, 3), stride = 1 , padding = 1)  #(30, 50, 1, 1022)  5
    self.relu5 = ReLU()

    self.conv6 = Conv2d(in_channels=256, out_channels=256, kernel_size=(3, 3), stride = 1 , padding = 1)  #(30, 50, 1, 1022)  5
    self.relu6 = ReLU()

    self.conv7 = Conv2d(in_channels=256, out_channels=256, kernel_size=(3, 3), stride = 1 , padding = 1)  #(30, 50, 1, 1022)  5
    self.relu7 = ReLU()

    self.maxpool3 = MaxPool2d(kernel_size=(2, 2), stride=(2, 2))  #(30, 50, 1, 511)

    # initialize second set of CONV => RELU => POOL layers
    self.conv8 = Conv2d(in_channels=256, out_channels=512, kernel_size=(3, 3), stride = 1 , padding = 1)  #(30, 50, 1, 1022)  5
    self.relu8 = ReLU()

    self.conv9 = Conv2d(in_channels=512, out_channels=512, kernel_size=(3, 3), stride = 1 , padding = 1)  #(30, 50, 1, 1022)  5
    self.relu9 = ReLU()

    self.conv10 = Conv2d(in_channels=512, out_channels=512, kernel_size=(3, 3), stride = 1 , padding = 1)  #(30, 50, 1, 1022)  5
    self.relu10 = ReLU()

    self.maxpool4 = MaxPool2d(kernel_size=(2, 2), stride=(2, 2))  #(30, 50, 1, 511)

    # initialize second set of CONV => RELU => POOL layers
    self.conv11 = Conv2d(in_channels=512, out_channels=512, kernel_size=(3, 3), stride = 1 , padding = 1)  #(30, 50, 1, 1022)  5
    self.relu11 = ReLU()

    self.conv12 = Conv2d(in_channels=512, out_channels=512, kernel_size=(3, 3), stride = 1 , padding = 1)  #(30, 50, 1, 1022)  5
    self.relu12 = ReLU()

    self.conv13 = Conv2d(in_channels=512, out_channels=512, kernel_size=(3, 3), stride = 1 , padding = 1)  #(30, 50, 1, 1022)  5
    self.relu13 = ReLU()

    self.maxpool5 = MaxPool2d(kernel_size=(2, 2), stride=(2, 2))  #(30, 50, 1, 511)   # --> output = 7x7x512

    #QUI DEVO ATTACCARE LE INFO SULLE TESTA ?????

    self.fc6 = Linear(in_features=25088+7, out_features=4096)
    self.relu6_1 = ReLU()
    self.drop6 = Dropout(0.5)

    self.fc7 = Linear(in_features=4096, out_features=1000)
    self.relu7_1 = ReLU()
    self.drop7 = Dropout(0.5)

    self.fc8 = Linear(in_features=1000, out_features=classes)
    self.logSoftmax = LogSoftmax(dim=1)

  def forward(self, s): #s.shape = 5x1x224x231

    x, feature = torch.split(s, [224, 7], dim=3)
    #print(type(x))
    #print(x)
    #print(feature)
    feature = feature[:, 0, 0, :]

    x = self.conv1(x)
    x = self.relu1(x)
    x = self.conv2(x)
    x = self.relu2(x)
    x = self.maxpool1(x)

    x = self.conv3(x)
    x = self.relu3(x)
    x = self.conv4(x)
    x = self.relu4(x)
    x = self.maxpool2(x)

    x = self.conv5(x)
    x = self.relu5(x)
    x = self.conv6(x)
    x = self.relu6(x)
    x = self.conv7(x)
    x = self.relu7(x)
    x = self.maxpool3(x)

    x = self.conv8(x)
    x = self.relu8(x)
    x = self.conv9(x)
    x = self.relu9(x)
    x = self.conv10(x)
    x = self.relu10(x)
    x = self.maxpool4(x)

    x = self.conv11(x)
    x = self.relu11(x)
    x = self.conv12(x)
    x = self.relu12(x)
    x = self.conv13(x)
    x = self.relu13(x)
    x = self.maxpool5(x)

    x = flatten(x, 1)


    x = torch.cat((x, feature), 1)
    x = self.fc6(x)
    x = self.relu6_1(x)
    x = self.drop6(x)
    x = self.fc7(x)
    x = self.relu7_1(x)
    x = self.drop7(x)

    x = self.fc8(x)
    print(x)
    output = self.logSoftmax(x)

    return output

In [7]:
preprocess = transforms.Compose([
                          transforms.ToPILImage(),
                          transforms.Resize(224),
                          #transforms.CenterCrop(224),
                          transforms.ToTensor(),
                          transforms.Normalize(mean=[0.485], std=[0.225])
                          ])                                                                                      #mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] se a colori (come nel nostro caso)
                                                                                                                #mean=[0.485], std=[0.225]),] se in greyscale

tensor_images = []
labels = []
#features_vgg = []
rpy_pupils = []
i = 0

for filename in os.listdir('/content/drive/MyDrive/NAPOLI/DATASET_FACE/PNG/HEAD-GAZE_DATASET/224/'):
    #print(filename)
    if filename[:-4] in df.values:
      i+=1
      path = "/content/drive/MyDrive/NAPOLI/DATASET_FACE/PNG/HEAD-GAZE_DATASET/224/"+filename
      image = cv2.imread(path)
      gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)   #convert to gray
      #print(type(image))
      image_tensor = preprocess(gray)                  #preprocess(gray)

      line = df[df['name'] == filename[:-4]]
      #print(line['ry'])
      rpy_pupil = [float(line['roll']), float(line['pitch']), float(line['yawn']), float(line['lx']), float(line['ly']), float(line['rx']), float(line['ry'])]
      #DOBBIAMO NORMALIZZARE I VALORI??????????????

      labels.append(line['label'])

      #interi samples per rete completa
      #rpy_pupil = torch.Tensor(rpy_pupil)

      #print(type(rpy_pupil))
      rpy_pupils.append(rpy_pupil)

      #sample = torch.cat((image_tensor, rpy_pupil), 1)

      tensor_images.append(image_tensor)

      ######################################################################
'''
      #solo immagini per estrarre features
      input_batch = image_tensor.unsqueeze(0) # create a mini-batch as expected by the model

      feature_extractor = torch.nn.Sequential(*list(model.children())[:-1])

      if torch.cuda.is_available():
        input_batch = input_batch.to('cuda')
        feature_extractor.to('cuda')

      with torch.no_grad():
        feature = feature_extractor(input_batch)
        print(feature[0])

      features_vgg.append(feature)
'''
print(i)

1002


In [12]:
# define training hyperparameters
INIT_LR = 1e-4
BATCH_SIZE = 32
EPOCHS = 100
# define the train and val splits
TRAIN_SPLIT = 0.75
VAL_SPLIT = 1 - TRAIN_SPLIT
# set the device we will be using to train the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# Creating np arrays
target = labels
print(len(target))
#print(type(target))
#target = [str(i) for i in target]
features = rpy_pupils       #.drop(['label','index'], axis=1)

features_norm = normalize(features, norm='l2', axis=1, copy=True, return_norm=False)
#print(features)


onek_target = []
for i in target:
  k = np.zeros(9)
  k[int(i)-1] = 1.
  onek_target.append(k)

onek_target = np.array(onek_target)
#print(onek_target)

features_tensor = torch.tensor(features_norm)
target_tensor = torch.tensor(onek_target)

final_samples = []
for i in range(len(target_tensor)):
    feature = features_tensor[i].repeat(1,224,1)
    if i == 0 : print((tensor_images[i].shape, features_tensor[i].shape))
    sample = torch.cat((tensor_images[i], feature), 2)
    #print(sample)
    #print(len(sample))
    '''if i == 0 :
      print((sample.shape))
      part1, part2 = torch.split(sample, [224, 7], dim=2)
      print((part1.shape))
      print((part2.shape))
      print(part2)
      print(part2[0, 0, :])
      '''
    final_samples.append(sample)
    #print(final_samples.shape)

final_tensor = torch.stack(final_samples)

# Passing to DataLoader
dataset = data_utils.TensorDataset(final_tensor, target_tensor)

print("[INFO] generating the train/validation split...")
numTrain = int(len(target) * TRAIN_SPLIT)+1
print(numTrain)
numVal = int(len(target) * VAL_SPLIT)
print(numVal)

(trainData, valData) = random_split(dataset, [numTrain, numVal], generator=torch.Generator().manual_seed(42))

trainDataLoader = data_utils.DataLoader(trainData, shuffle=True, batch_size=BATCH_SIZE)
valDataLoader = data_utils.DataLoader(valData, batch_size=BATCH_SIZE)

1002
(torch.Size([1, 224, 224]), torch.Size([7]))
[INFO] generating the train/validation split...
752
250


In [13]:
# initialize the model
print("[INFO] initializing the ClassNet model...")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ClassNet(numChannels=1, classes=9).to(device)
# initialize our optimizer and loss function
opt = Adam(model.parameters(), lr=INIT_LR)   #Adam
lossFn = nn.CrossEntropyLoss()
#MSELoss()

# measure how long training is going to take
print("[INFO] training the network...")
startTime = time.time()

[INFO] initializing the ClassNet model...
[INFO] training the network...


In [14]:
# loop over our epochs
for e in range(0, EPOCHS):
	# set the model in training mode
	model.train()
	# initialize the total training and validation loss
	totalTrainLoss = 0
	totalValLoss = 0
	# initialize the number of correct predictions in the training
	# and validation step
	trainCorrect = 0
	valCorrect = 0

  # loop over the training set
	for (x, y) in trainDataLoader:
		# send the input to the device
		x = x.to(dtype=torch.float32)
		y = y.to(dtype=torch.float32)
		#print(x.shape)
		#print(x,y)
		(x, y) = (x.to(device), y.to(device))
		# perform a forward pass and calculate the training loss
		opt.zero_grad()
		pred = model(x)
		#print(y.size)
		#print(pred.size)
		#print(y)
		#print(pred)
		loss = lossFn(pred, y)
		print(loss)
		# zero out the gradients, perform the backpropagation step,
		# and update the weights
		loss.backward()
		opt.step()
		# add the loss to the total training loss so far and
		# calculate the number of correct predictions
		totalTrainLoss += loss
		#print(pred.argmax(1))
		#print(y.argmax(1))
		trainCorrect += (pred.argmax(1) == y.argmax(1)).type(torch.float).sum().item()

  # switch off autograd for evaluation
	with torch.no_grad():
		# set the model in evaluation mode
		model.eval()
		# loop over the validation set
		for (x, y) in valDataLoader:
			x = x.to(dtype=torch.float32)
			y = y.to(dtype=torch.float32)
	 		# send the input to the device
			(x, y) = (x.to(device), y.to(device))
			# make the predictions and calculate the validation loss
			pred = model(x)
			totalValLoss += lossFn(pred, y)
			# calculate the number of correct predictions
			valCorrect += (pred.argmax(1) == y.argmax(1)).type(torch.float).sum().item()


	# calculate the training and validation accuracy
	trainCorrect = trainCorrect / len(trainDataLoader.dataset)
	valCorrect = valCorrect / len(valDataLoader.dataset)
	# update our training history

	print(trainCorrect)
	print(valCorrect)
	print('---------------------------------------')

	torch.save(model.state_dict(), f'weights_vgg_change_{e}.pt')


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
          2.8202e-01, -3.8030e+00,  3.0017e+00,  2.7355e+01],
        [-2.8794e+01, -1.7546e+01,  1.9668e+01, -1.8810e+01, -6.5018e-01,
          3.2795e+01, -8.6598e+00,  2.2795e+00,  5.7285e-01],
        [-1.8124e+01, -1.6522e+01, -3.2153e+00, -1.0879e+01, -2.1926e+00,
          7.8925e-01, -1.4348e+00,  2.7985e+01,  9.6930e+00],
        [ 7.6496e+01,  9.2283e+00, -2.6096e+01,  2.0181e+01, -3.7457e+01,
         -2.8441e+01, -2.1539e+01, -3.0954e+01,  3.6717e+00],
        [-4.0675e+01, -1.6165e+01, -1.7794e+00, -9.0768e-01,  4.8436e+01,
          9.4244e+00,  4.7945e+00, -1.6566e+01, -2.2391e+01],
        [-6.1421e+00, -1.1943e+01,  2.0017e+00, -1.4316e+01, -1.3518e+01,
          6.2799e+00, -6.1636e+00,  7.7526e+00,  2.1407e+01],
        [ 4.8030e+00,  3.5225e+01,  4.6101e-01, -6.9830e+00, -4.1018e+00,
         -9.3988e+00, -1.4439e+01, -1.3333e+01, -3.4613e+00],
        [ 1.0892e+01,  1.5085e-01, -1.3281e+01,  3.0228e+