# Detecting Dog Emotions Using Deep Learning

**Objective**: Build a convolutional neural network to predict whether a dog is happy, sad, or relaxed based on its picture.

Dog dataset used from Flickr.

## Table of Contents
- [1. Understanding the image data and creating dataset labels](#1)
- [2. Creating a PyTorch Dataset](#2)
- [3. Splitting the Training and Test Data](#3)
- [4. Building the Convolutional Neural Network](#4)
- [5. Training the Convolutional Neural Network](#5)
- [6. What are our generated predictions?](#6)

<a name ="1"> </a>
## 1. Understanding the image data and creating dataset labels

In [None]:
IMAGE_DIR = "../input/images/images"
FOLDERS = ["happy", "sad", "relaxed"]
DEVICE = "mps"

In [None]:
import os
import pandas as pd

In [None]:
img_files = []
labels = [] #text labels
label_code = [] #text labels to actual binary code

for folder in FOLDERS:
    fname = os.path.join(IMAGE_DIR, folder)
    #joins together the directory name 'images' and the folder name with emotions
    
    for im in os.listdir(fname):
        impath = os.path.join(fname, im)
        img_files.append(impath)
        labels.append(folder)
        label_code.append(FOLDERS.index(folder))

In [None]:
dataset = pd.DataFrame(dict(filename = img_files, label = labels, code = label_code))

In [None]:
dataset

In [None]:
import torch

In [None]:
torch.manual_seed(0) #to generate numbers in random order 

In [None]:
device = torch.device(DEVICE) #intializing the torch device; mine is a CPU

<a name = "2"></a>
## 2. Creating a PyTorch Dataset

In [None]:
from torchvision.io import read_image
from torchvision import transforms as T
from torch.utils.data import Dataset
import math

In [None]:
class DogDataset(Dataset):
    def __init__(self, dataset):
        self.dataset = dataset
        self.augments = [T.RandomHorizontalFlip(1), T.RandomRotation(90), T.AutoAugment()]
        self.normalize = T.Compose([
            T.ConvertImageDtype(torch.float),
            #apply multiple transformations in order and change the image integers to float
            T.Normalize((.485, .456, .406), (.229, .224, .225))
            #standardize the image intensities and SDs of the dataset into a range
            #the numbers above are used to normalize against imagenet, a popular dataset
        ])
    def __len__(self):
        return self.dataset.shape[0] * (len(self.augments) + 1)
    
    def classes(self):
        return self.dataset["code"].unique() #return the number of unique labels from the dataset
    
    def __getitem__(self, idx):
        augment = math.floor(idx / self.dataset.shape[0])
        idx = idx % self.dataset.shape[0] #ensuring the index is within the original number of rows
        
        row = self.dataset.iloc[idx,:]
        
        img_path = row["filename"]
        image = read_image(img_path)
        
        label = row["code"]
        if augment > 0:
            image = self.augments[augment - 1].forward(image)
            
        image = self.normalize(image)
        
        return image, int(label), img_path;

In [None]:
data = DogDataset(dataset)

<a name = "3"></a>
## 3. Splitting the Training and Test Data

In [None]:
train_size = int(0.8 * len(data))
test_size = len(data) - train_size

train_data, test_data = torch.utils.data.random_split(data, [train_size, test_size], generator = torch.Generator().manual_seed(1))
# random split and the generator to set consistency in order and so that the same values will be
# assigned to both train & test sets

In [None]:
from torch.utils.data import DataLoader #load the data as we train the DL model
BATCH_SIZE = 64 #number of images to be used for training at one instant
EPOCHS = 50 #number of times I'll feed these images to train my model
train = DataLoader(train_data, batch_size = BATCH_SIZE, shuffle = True)
test = DataLoader(test_data, batch_size = BATCH_SIZE, shuffle = True)

<a name = "4"></a>
## 4. Building the Convolutional Neural Network

In [None]:
#creating a new class for the neural network

from torch import nn
class NeuralNetwork(nn.Module):
    def __init__ (self, classes):
        super(NeuralNetwork, self).__init__()
        
        #creating the nn layers
        self.bn = nn.BatchNorm2d(64) ##normalize values in a batch to prevent overfitting & make training faster
        
        #building the network layer-by-layer inside this sequential container
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 64, 4, stride = 2), #look at a 4x4 slice, move over 2 pixels, and do it again and again
            self.bn,
            nn.ReLU(True),
            
            nn.MaxPool2d(2, 2), #shrink the dimensionality of each of our 64 channels to make the algorithm stable
            
            nn.Conv2d(64, 64, 2),
            self.bn,
            
            nn.Conv2d(64, 64, 2),
            self.bn,
            nn.ReLU(True),
            
            nn.MaxPool2d(2, 2)
            
        )
        
        self.dense = nn.Sequential(
            nn.Linear(64 * 46 * 46, 64),
            nn.Linear(64, len(classes))
        )
    
    def forward(self, x):
        x = self.cnn(x) #apply convolutional network to a batch of images (64 of them)
        x = torch.flatten(x, 1) #then we flatten the layers and create a 1 single list of vector
        x = self.dense(x) #then we pass that into the dense layer for prediction
        return x;

In [None]:
labels = data.classes()
labels

In [None]:
# we'll now define our model by passing in our labels and sending the outputted class predictions to the device

model = NeuralNetwork(labels).to(device)
model

<a name = "5"></a>
## 5. Training the Convolutional Neural Network

In [None]:
#generating a loss function

loss_fn = nn.CrossEntropyLoss()

#optimize the weights of the nn against the loss function
optimizer = torch.optim.SGD(model.parameters(), lr = .001)

In [None]:
size = len(train.dataset)

In [None]:
#loop to train the nn

for epoch in range(EPOCHS):
    for batch, (images, labels, img_paths) in enumerate(train):
        optimizer.zero_grad()
        
        images = images.to(device)
        predictions = model(images.float())
        labels = labels.to(device)
        loss = loss_fn(predictions, labels)
        
        loss.backward() #backward propagation so that optimizer can try again to improve weights and improve loss
        optimizer.step()
    
    loss = loss.item()
    print(f"loss: {loss:>7f} [{epoch}]")

In [None]:
torch.save(model, 'dog_model.pth')

<a name = "6"> </a>
## 6. What are our generated predictions?

In [None]:
model = torch.load('../input/trained-dog-model/dog_model.pth')

In [None]:
all_preds = list() #make predictions against our training set
all_labels = list()
all_paths = list()

with torch.no_grad(): #we are in inference mode with this
    for batch, (images, labels, img_paths) in enumerate(test): #looping through our test data
        
        images = images.to(device)
        outputs = model(images.float())
        
        _, preds = torch.max(outputs.data, 1) #gives a class that is the most predictive for each image
        
        all_labels.append(labels)
        all_preds.append(preds)
        all_paths.append(img_paths)

In [None]:
import numpy as np

preds = np.concatenate([p.cpu().numpy() for p in all_preds])
labels = np.concatenate([p.cpu().numpy() for p in all_labels])
paths = np.concatenate([p for p in all_paths])

In [None]:
#we can now get our exact match accuracy by:
((preds == labels).sum()) / len(labels)

In [None]:
#figuring out the accuracy for each class
predictions = pd.DataFrame(dict(pred=preds, label=labels, path=paths))
predictions["correct"] = (predictions["pred"] == predictions["label"])

In [None]:
#convert the prediction code of 0, 1, 2 to a label
predictions["prediction"] = predictions["pred"].apply(lambda x: FOLDERS[int(x)])

In [None]:
predictions["actual"] = predictions["label"].apply(lambda x: FOLDERS[int(x)])

In [None]:
predictions.groupby("prediction").apply(lambda x: x["correct"].sum() / x.shape[0])

In [None]:
disp = predictions.iloc[:30,:].copy()
disp

In [None]:
#show the image
def image_formatter(path):
    return f'<img src="{path}">'

In [None]:
#style our database using this function
disp.style.format({'path': image_formatter})

***We can still improve the accuracy of our model by some simple techniques such as getting more pictures, use a more complex and deeper neural network, augment the dataset more, and increase the epochs. However, my computer has barely scraped by while only training the network so I'll stop this here.***