In [6]:
from __future__ import print_function
import argparse
import numpy  as np
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torch.utils.data.sampler import SubsetRandomSampler
from torchvision import transforms

from data_loaders import Plain_Dataset, eval_data_dataloader
from deep_emotion import Deep_Emotion
from generate_data import Generate_data

device = torch.device("cuda")

if not torch.cuda.is_available():
    raise Exception("CUDA is not available. Make sure you have a CUDA-enabled GPU.")

def Train(epochs,train_loader,val_loader,criterion,optmizer,device):
    '''
    Training Loop
    '''
    print("===================================Start Training===================================")
    for e in range(epochs):
        train_loss = 0
        validation_loss = 0
        train_correct = 0
        val_correct = 0
        # Train the model  #
        net.train()
        for data, labels in train_loader:
            data, labels = data.to(device), labels.to(device)
            optmizer.zero_grad()
            outputs = net(data)
            loss = criterion(outputs,labels)
            loss.backward()
            optmizer.step()
            train_loss += loss.item()
            _, preds = torch.max(outputs,1)
            train_correct += torch.sum(preds == labels.data)

        #validate the model#
        net.eval()
        for data,labels in val_loader:
            data, labels = data.to(device), labels.to(device)
            val_outputs = net(data)
            val_loss = criterion(val_outputs, labels)
            validation_loss += val_loss.item()
            _, val_preds = torch.max(val_outputs,1)
            val_correct += torch.sum(val_preds == labels.data)

        train_loss = train_loss/len(train_dataset)
        train_acc = train_correct.double() / len(train_dataset)
        validation_loss =  validation_loss / len(validation_dataset)
        val_acc = val_correct.double() / len(validation_dataset)
        print('Epoch: {} \tTraining Loss: {:.8f} \tValidation Loss {:.8f} \tTraining Acuuarcy {:.3f}% \tValidation Acuuarcy {:.3f}%'
                                                           .format(e+1, train_loss,validation_loss,train_acc * 100, val_acc*100))

    torch.save(net.state_dict(),'deep_emotion-{}-{}-{}.pt'.format(epochs,batchsize,lr))
    print("===================================Training Finished===================================")


epochs = 4000
lr = 0.005
batchsize = 128

#     if args.train:
net = Deep_Emotion()
net.to(device)
print("Model archticture: ", net)
traincsv_file = 'data'+'/'+'train.csv'
validationcsv_file = 'data'+'/'+'val.csv'
train_img_dir = 'data'+'/'+'train/'
validation_img_dir = 'data'+'/'+'val/'

transformation= transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,),(0.5,))])
train_dataset= Plain_Dataset(csv_file=traincsv_file, img_dir = train_img_dir, datatype = 'train', transform = transformation)
validation_dataset= Plain_Dataset(csv_file=validationcsv_file, img_dir = validation_img_dir, datatype = 'val', transform = transformation)
train_loader= DataLoader(train_dataset,batch_size=batchsize,shuffle = True,num_workers=0)
val_loader=   DataLoader(validation_dataset,batch_size=batchsize,shuffle = True,num_workers=0)

criterion= nn.CrossEntropyLoss()
optmizer= optim.Adam(net.parameters(),lr= lr)
Train(epochs, train_loader, val_loader, criterion, optmizer, device)

Model archticture:  Deep_Emotion(
  (conv1): Conv2d(1, 10, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))
  (conv4): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))
  (pool4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (norm): BatchNorm2d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1): Linear(in_features=810, out_features=50, bias=True)
  (fc2): Linear(in_features=50, out_features=7, bias=True)
  (localization): Sequential(
    (0): Conv2d(1, 8, kernel_size=(7, 7), stride=(1, 1))
    (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (2): ReLU(inplace=True)
    (3): Conv2d(8, 10, kernel_size=(5, 5), stride=(1, 1))
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)




Epoch: 1 	Training Loss: 0.00163894 	Validation Loss 0.00161683 	Training Acuuarcy 18.237% 	Validation Acuuarcy 17.637%
Epoch: 2 	Training Loss: 0.00152890 	Validation Loss 0.00153637 	Training Acuuarcy 25.031% 	Validation Acuuarcy 22.458%
Epoch: 3 	Training Loss: 0.00151405 	Validation Loss 0.00152738 	Training Acuuarcy 24.852% 	Validation Acuuarcy 25.132%
Epoch: 4 	Training Loss: 0.00151390 	Validation Loss 0.00152383 	Training Acuuarcy 25.047% 	Validation Acuuarcy 25.104%
Epoch: 5 	Training Loss: 0.00151438 	Validation Loss 0.00152370 	Training Acuuarcy 25.042% 	Validation Acuuarcy 25.132%
Epoch: 6 	Training Loss: 0.00151294 	Validation Loss 0.00152340 	Training Acuuarcy 25.047% 	Validation Acuuarcy 25.077%
Epoch: 7 	Training Loss: 0.00150494 	Validation Loss 0.00153139 	Training Acuuarcy 25.014% 	Validation Acuuarcy 25.132%
Epoch: 8 	Training Loss: 0.00151550 	Validation Loss 0.00152445 	Training Acuuarcy 25.053% 	Validation Acuuarcy 25.132%
Epoch: 9 	Training Loss: 0.00151160 	Val

KeyboardInterrupt: 

In [7]:
import cv2
import numpy as np
import torch
import torch.nn.functional as F

path = "haarcascade_frontalface_default.xml"
font_scale = 1
font = cv2.FONT_HERSHEY_PLAIN

cap = cv2.VideoCapture(0)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

if not cap.isOpened():
    # Check if the webcam is opened correctly
    cap = cv2.VideoCapture(2)
    if not cap.isOpened():
        raise IOError("Cannot open webcam")

while True:
    ret, frame = cap.read()

    faceCascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    faces = faceCascade.detectMultiScale(gray, 1.1, 4)

    for x, y, w, h in faces:
        roi_gray = gray[y:y+h, x:x+w]
        roi_color = frame[y:y+h, x:x+w]

        cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)
        facess = faceCascade.detectMultiScale(roi_gray)

        if len(facess) == 0:
            print("Face not detected")
        else:
            for (ex, ey, ew, eh) in facess:
                face_roi = roi_color[ey: ey+eh, ex:ex+ew]  # cropping the face

            graytemp = cv2.cvtColor(face_roi, cv2.COLOR_BGR2GRAY)

            final_image = cv2.resize(graytemp, (48, 48))
            final_image = np.expand_dims(final_image, axis=0)  # Add third dimension
            final_image = np.expand_dims(final_image, axis=0)  # Add fourth dimension
            final_image = final_image / 255.0  # Normalization

            data = torch.from_numpy(final_image)
            data = data.type(torch.FloatTensor)
            data = data.to(device)

            outputs = net(data)
            pred = F.softmax(outputs, dim=1)
            prediction = torch.argmax(pred)

            print(prediction)

            if (prediction == 0):
                status = "Angry, take a deep breath"
                color = (0, 0, 255)
            elif (prediction == 2):
                status = "Fear, calm down"
                color = (0, 0, 255)
            elif (prediction == 3):
                status = "Happy, you are good"
                color = (0, 0, 255)
            elif (prediction == 4):
                status = "Sad, relax and meditate"
                color = (0, 0, 255)
            else:
                status = ""
                color = (255, 0, 0)

            x1, y1, w1, h1 = 0, 0, 175, 75
            cv2.rectangle(frame, (x1, x1), (x1 + w1, y1 + h1), (0, 0, 0), -1)
            cv2.putText(frame, status, (x1 + int(w1 / 10), y1 + int(h1 / 2)), cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)
            cv2.putText(frame, status, (100, 150), font, 3, color, 2, cv2.LINE_4)
            cv2.rectangle(frame, (x, y), (x + w, y + h), color)

        font = cv2.FONT_HERSHEY_SIMPLEX
        cv2.putText(frame,
                    status,
                    (50, 50),
                    font, 0,
                    color,
                    2,
                    cv2.LINE_4)
        cv2.imshow('Face', frame)

        if cv2.waitKey(2) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()


tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
t

tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')
tensor(3, device='cuda:0')


KeyboardInterrupt: 

In [9]:
torch.load('Speaktrum_by_SOVA_Final.pt')

OrderedDict([('conv1.weight',
              tensor([[[[-1.8631, -1.4875, -1.4204],
                        [-0.1823,  0.5349, -0.2269],
                        [ 0.4546,  1.6433,  3.2749]]],
              
              
                      [[[ 3.2150, -1.1346,  0.1356],
                        [ 1.2941, -1.9955, -1.5213],
                        [ 3.5298, -0.8771, -2.3824]]],
              
              
                      [[[ 0.4591,  0.8948,  1.3256],
                        [ 0.5632,  2.3217, -0.5738],
                        [ 0.3966,  2.0984,  0.2788]]],
              
              
                      [[[ 0.3295,  2.7651,  3.2154],
                        [ 0.4463, -2.5834, -0.3086],
                        [ 2.9543, -1.2090, -2.7839]]],
              
              
                      [[[-0.9102,  1.3918,  3.8718],
                        [-1.2969, -1.4566,  1.4116],
                        [-1.5211,  0.6811, -1.3385]]],
              
              
               

In [None]:
net = Deep_Emotion()
net.to(device)
print("Model archticture: ", net)
traincsv_file = 'data'+'/'+'train.csv'
validationcsv_file = 'data'+'/'+'val.csv'
train_img_dir = 'data'+'/'+'train/'
validation_img_dir = 'data'+'/'+'val/'

transformation= transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,),(0.5,))])
train_dataset= Plain_Dataset(csv_file=traincsv_file, img_dir = train_img_dir, datatype = 'train', transform = transformation)
validation_dataset= Plain_Dataset(csv_file=validationcsv_file, img_dir = validation_img_dir, datatype = 'val', transform = transformation)
train_loader= DataLoader(train_dataset,batch_size=batchsize,shuffle = True,num_workers=0)
val_loader=   DataLoader(validation_dataset,batch_size=batchsize,shuffle = True,num_workers=0)

criterion= nn.CrossEntropyLoss()
optmizer= optim.Adam(net.parameters(),lr= lr)
Train(epochs, train_loader, val_loader, criterion, optmizer, device)

Model archticture:  Deep_Emotion(
  (conv1): Conv2d(1, 10, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))
  (conv4): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))
  (pool4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (norm): BatchNorm2d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1): Linear(in_features=810, out_features=50, bias=True)
  (fc2): Linear(in_features=50, out_features=7, bias=True)
  (localization): Sequential(
    (0): Conv2d(1, 8, kernel_size=(7, 7), stride=(1, 1))
    (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (2): ReLU(inplace=True)
    (3): Conv2d(8, 10, kernel_size=(5, 5), stride=(1, 1))
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)




Epoch: 1 	Training Loss: 0.00162394 	Validation Loss 0.00158730 	Training Acuuarcy 17.283% 	Validation Acuuarcy 19.532%
Epoch: 2 	Training Loss: 0.00153010 	Validation Loss 0.00160119 	Training Acuuarcy 24.891% 	Validation Acuuarcy 17.470%
Epoch: 3 	Training Loss: 0.00151820 	Validation Loss 0.00152412 	Training Acuuarcy 24.908% 	Validation Acuuarcy 25.049%
Epoch: 4 	Training Loss: 0.00151601 	Validation Loss 0.00152988 	Training Acuuarcy 24.941% 	Validation Acuuarcy 23.934%
Epoch: 5 	Training Loss: 0.00151651 	Validation Loss 0.00152683 	Training Acuuarcy 24.919% 	Validation Acuuarcy 24.909%
Epoch: 6 	Training Loss: 0.00151748 	Validation Loss 0.00153846 	Training Acuuarcy 25.025% 	Validation Acuuarcy 24.575%
Epoch: 7 	Training Loss: 0.00152473 	Validation Loss 0.00159791 	Training Acuuarcy 24.724% 	Validation Acuuarcy 16.746%
Epoch: 8 	Training Loss: 0.00152807 	Validation Loss 0.00153127 	Training Acuuarcy 24.769% 	Validation Acuuarcy 24.937%
Epoch: 9 	Training Loss: 0.00151412 	Val