In [9]:
import cv2 as cv
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data.sampler import SubsetRandomSampler

In [2]:
transform = transforms.Compose([
    transforms.Resize((224, 224)), # Resize images to 224x224
    transforms.ToTensor(), # Convert images to PyTorch tensors
])

# Question for Mateo: How do I know this will train for all categories?
dataset = datasets.ImageFolder(root='./train', transform=transform)

labels = dataset.targets

# Define the split ratios
train_ratio = 0.7
valid_ratio = 0.15
test_ratio = 0.15
# increase training ratio with more data

# Calculate the number of samples for each set
total_samples = len(dataset)
train_size = int(train_ratio * total_samples)
valid_size = int(valid_ratio * total_samples)
test_size = total_samples - train_size - valid_size

# Create indices for the splits
indices = torch.randperm(total_samples)
train_indices = indices[:train_size]
valid_indices = indices[train_size:train_size+valid_size]
test_indices = indices[train_size+valid_size:]

# Create data loaders for each split
train_loader = torch.utils.data.DataLoader(dataset, sampler=SubsetRandomSampler(train_indices))
valid_loader = torch.utils.data.DataLoader(dataset, sampler=SubsetRandomSampler(valid_indices))
test_loader = torch.utils.data.DataLoader(dataset, sampler=SubsetRandomSampler(test_indices))

In [10]:
class NNet(nn.Module):
    def __init__(self, learning_rate=0.001, batch_size=32):
        super(NNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3) # Training error somewhere here
        self.conv2 = nn.Conv2d(32, 32, 3)
        self.bn1 = nn.BatchNorm2d(32)
        self.bn2 = nn.BatchNorm2d(32)
        self.fc1 = nn.Linear(32*2*2, 64)
        self.fc2 = nn.Linear(64, 32)
        self.out = nn.Linear(32, 16)
        self.pool = nn.MaxPool2d(2, 2)
        self.learning_rate = learning_rate
        self.batch_size = batch_size
    
    # activation
    def forward(self, x):
        # RELU first convolution, pooling it
        x = F.relu(self.conv1(x))
        x = self.pool(x) # 2x2 kernel to "scan" image
        
        # second convolution
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        
        # hidden linear layer
        x = x.reshape(-1, 32*2*2)
        x = F.relu(self.fc1(x))

        x = F.relu(self.fc2(x))

        return F.softmax(self.out(x), dim=1)

In [7]:
# train the network
emotions = ['angry', 'disgusted', 'fearful', 'happy', 'neutral', 'sad', 'surprised']

cnn = NNet()

cross_entropy_loss = nn.CrossEntropyLoss()

optimizer = torch.optim.SGD(cnn.parameters(), lr=0.001, momentum=0.9)

In [8]:
for epoch in range(3):
    total_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data

        # zero parameter gradients
        optimizer.zero_grad()

        # forward & backward run, SGD
        outputs = cnn(inputs)
        loss = cross_entropy_loss(outputs, labels)
        loss.backward() # how do we get this?
        optimizer.step() # optimization step

        # display stats
        total_loss += loss.item()
        if i % 2000 == 1999: # print every 2K mini-batches
            print(f'[{epoch + 1}, {i+1:5d}] loss: {loss/2000:.3f}')
            loss = 0.0 # reset loss

print('Training done')



RuntimeError: Given groups=1, weight of size [32, 1, 3, 3], expected input[1, 3, 224, 224] to have 1 channels, but got 3 channels instead

In [3]:
# OpenCV Functions

# Show image
def show_img(img):
    cv.imshow(img, cv.imread(img))
    cv.waitKey(0)
    cv.destroyWindow(img)

# Grayscale
def show_grayscale(img):
    gray = cv.cvtColor(cv.imread(img), cv.COLOR_BGR2GRAY)
    cv.imshow(img, gray)
    cv.waitKey(0)
    cv.destroyWindow(img)

# Edge detection
def show_canny(img):
    canny = cv.Canny(cv.imread(img), 100, 200)
    cv.imshow(img, canny)
    cv.waitKey(0)
    cv.destroyWindow(img)