In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.optim as optim
import time
import os
import copy
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import cv2

emotion_label_to_text = {0:'anger', 1:'disgust', 2:'fear', 3:'happiness', 4: 'sadness', 5: 'surprise', 6: 'neutral'}
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
num_classes = 7
print('Using device', device)
print("Num classes", num_classes)

  from .autonotebook import tqdm as notebook_tqdm


Using device cuda:0
Num classes 7


In [4]:
class ConvNet2(nn.Module):
    def __init__(self, num_classes: int = 7) -> None:
        super().__init__()
        self.num_classes = num_classes
        self.features = nn.Sequential(
            
            # 1st convo layer
            nn.Conv2d(1, 64, kernel_size=(3,3), padding=1),
            nn.ReLU(inplace=True),
#             nn.BatchNorm2d(64),
            nn.Conv2d(64, 64, kernel_size=(3,3), padding=1),
            nn.ReLU(inplace=True),
#             nn.BatchNorm2d(64),
            nn.MaxPool2d(kernel_size=(2,2), stride=2),
            nn.Dropout(),
            
            # 2nd convo layer
            nn.Conv2d(64, 64, kernel_size=(3,3), padding=1),
            nn.ReLU(inplace=True),
#             nn.BatchNorm2d(64),
            nn.Conv2d(64, 64, kernel_size=(3,3), padding=1),
            nn.ReLU(inplace=True),
#             nn.BatchNorm2d(64),
            nn.MaxPool2d(kernel_size=(2,2), stride=2),
            nn.Dropout(),
            
            # 3rd convo layer
            nn.Conv2d(64, 128, kernel_size=(3,3), padding=1),
            nn.ReLU(inplace=True),
#             nn.BatchNorm2d(128),
            nn.Conv2d(128, 128, kernel_size=(3,3), padding=1),
            nn.ReLU(inplace=True),
#             nn.BatchNorm2d(128),
            nn.MaxPool2d(kernel_size=(2,2), stride=2),
            
        )
        self.avgpool = nn.AdaptiveAvgPool2d((2, 2))
        
        #fully connected neural networks
        self.classifier = nn.Sequential(
            nn.Linear(128 * 6 * 6, 1024),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(1024, 1024),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(1024, num_classes),
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.features(x)
#         x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
#         x = F.softmax(x, dim=1)
        return x


In [13]:
model = ConvNet2(num_classes)
model.load_state_dict(torch.load('./model_state_dicts/convnet_ADAM_0.01.pth'))
model = model.to(device)
model.eval()
print("Done")

Done


In [53]:
musics = {'anger': ["metalmusic.mp3"], 
          'disgust': ["eww"], 
          'fear': ["oooooohhh"], 
          'happiness': ["happy birthday"], 
          'sadness': ["sad!"], 
          'surprise': ["oh no!"], 
          'neutral': ["chill"]
         }

In [54]:
def suggest_song(predicted_emotion):
    try:
        song = musics[predicted_emotion][np.random.randint(0,len(musics[predicted_emotion]))]
    except KeyError:
        return "Idk!"
    return song

In [55]:
print('test' != 'test')

False


In [57]:
face_haar_cascade = cv2.CascadeClassifier('data/haarcascade_frontalface_default.xml')

cap=cv2.VideoCapture(0)

predicted_emotion = 'happiness'
next_emotion = 'anger'

while True:
    ret,test_img=cap.read()# captures frame and returns boolean value and captured image
    if not ret:
        continue
    gray_img= cv2.cvtColor(test_img, cv2.COLOR_BGR2GRAY)

    faces_detected = face_haar_cascade.detectMultiScale(gray_img, 1.32, 5)


    try:
        for (x,y,w,h) in faces_detected:
            cv2.rectangle(test_img,(x,y),(x+w,y+h),(255,0,0),thickness=7)
            roi_gray=gray_img[y:y+w,x:x+h]#cropping region of interest i.e. face area from  image
            roi_gray=cv2.resize(roi_gray,(48,48))

            img_pixels = np.array(roi_gray).astype('float32')
            img_pixels = np.expand_dims(img_pixels, axis = 0)
#             img_pixels /= 255

            img_pixels=  torch.Tensor(img_pixels[np.newaxis,: ,: ]).to(device)
            predictions = model(img_pixels)

            _, preds = torch.max(predictions, 1)

            
            predicted_emotion = emotion_label_to_text[preds.item()]
            
            
            if predicted_emotion != next_emotion:
                print("I suggest you", suggest_song(predicted_emotion))

            cv2.putText(test_img, predicted_emotion, (int(x), int(y)), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,255), 2)
            next_emotion = predicted_emotion
        resized_img = cv2.resize(test_img, (1000, 700))
        cv2.imshow('Facial emotion analysis ',resized_img)
    except Exception as error:
        print(error)
        cap.release()
        cv2.destroyAllWindows


    if cv2.waitKey(10) == ord('q'):#wait until 'q' key is pressed
        break

cap.release()
cv2.destroyAllWindows