In [52]:
import cv2 as cv
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import datasets, transforms, utils
from torch.utils.data.sampler import SubsetRandomSampler

In [53]:
transform = transforms.Compose(
    [
        transforms.Resize((48, 48)),  # Resize images to 48x48
        transforms.Grayscale(num_output_channels=1),  # Convert images to grayscale
        transforms.ToTensor(),  # Convert images to PyTorch tensors
    ]
)

dataset = datasets.ImageFolder(root="./train", transform=transform)

labels = dataset.targets

# Define the split ratios
train_ratio = 0.7
valid_ratio = 0.15
test_ratio = 0.15
# increase training ratio with more data

# Calculate the number of samples for each set
total_samples = len(dataset)
train_size = int(train_ratio * total_samples)
valid_size = int(valid_ratio * total_samples)
test_size = total_samples - train_size - valid_size

# Create indices for the splits
indices = torch.randperm(total_samples)
train_indices = indices[:train_size]
valid_indices = indices[train_size : train_size + valid_size]
test_indices = indices[train_size + valid_size :]


# Create data loaders for each split
train_loader = torch.utils.data.DataLoader(
    dataset, sampler=SubsetRandomSampler(train_indices)
)
valid_loader = torch.utils.data.DataLoader(
    dataset, sampler=SubsetRandomSampler(valid_indices)
)
test_loader = torch.utils.data.DataLoader(
    dataset, sampler=SubsetRandomSampler(test_indices)
)

In [54]:
class NNet(nn.Module):
    def __init__(self, learning_rate=0.001, batch_size=32):
        super(NNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 48, kernel_size=3)
        self.conv2 = nn.Conv2d(48, 48, kernel_size=3)
        self.pool = nn.MaxPool2d(3, 3)
        self.fc1 = nn.Linear(48 * 4 * 4, 144) # Adjusted output size
        self.fc2 = nn.Linear(144, 48) # Corrected input size to match the output of self.fc1
        self.out = nn.Linear(48, 24)
        self.learning_rate = learning_rate
        self.batch_size = batch_size

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = x.reshape(-1, 48 * 4 * 4) # Corrected reshaping based on the calculated dimensions
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return F.softmax(self.out(x), dim=1)

In [55]:
# train the network
emotions = ["angry", "disgusted", "fearful", "happy", "neutral", "sad", "surprised"]

cnn = NNet()

cross_entropy_loss = nn.CrossEntropyLoss()

optimizer = torch.optim.SGD(cnn.parameters(), lr=0.001, momentum=0.9)

In [None]:
# training data
for epoch in range(2):
    total_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        print('Inputs: ', inputs)
        print('Labels: ', labels)

        # zero parameter gradients
        optimizer.zero_grad()

        # forward & backward run, SGD
        outputs = cnn(inputs)
        print('Outputs: ', outputs)
        loss = cross_entropy_loss(outputs, labels)
        loss.backward()  # how do we get this?
        optimizer.step()  # optimization step

        # display stats
        total_loss += loss.item()
        if i % 2000 == 1999:  # print every 2K mini-batches
            print(f"[{epoch + 1}, {i+1:5d}] loss: {loss/2000:.3f}")
            loss = 0.0  # reset loss

print("Training done")

In [57]:
PATH = './cifar_net.pth'
torch.save(cnn.state_dict(), PATH)

In [58]:
dataiter = iter(test_loader)
images, labels = next(dataiter)

print('GroundTruth: ', ' '.join(f'{emotions[j]:5s}' for j in range(len(emotions))))

GroundTruth:  angry disgusted fearful happy neutral sad   surprised


In [59]:
# Load trained neural network
net = NNet()
net.load_state_dict(torch.load(PATH))

outputs = net(images)

In [60]:
_, predicted = torch.max(outputs, 1)

print('Predicted: ', ' '.join(f'{emotions[j]:5s}'
                              for j in range(len(emotions))))

Predicted:  angry disgusted fearful happy neutral sad   surprised


In [None]:
# test network accuracy
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        # calculate outputs by running images through the network
        outputs = net(images)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')

In [None]:
# validation

# Assuming validation_loader is your validation data loader
# and criterion is your loss function

def validate(model, validation_loader, criterion):
    model.eval()  # Set the model to evaluation mode
    validation_loss = 0
    correct = 0

    with torch.no_grad():  # No need to track gradients in validation
        for data, target in validation_loader:
            output = model(data)
            validation_loss += criterion(output, target).item()  # Sum up batch loss
            pred = output.data.max(1, keepdim=True)[1]  # Get the index of the max log-probability
            correct += pred.eq(target.data.view_as(pred)).sum().item()

    validation_loss /= len(validation_loader.dataset)
    validation_accuracy = 100. * correct / len(validation_loader.dataset)

    print('\nValidation set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        validation_loss, correct, len(validation_loader.dataset), validation_accuracy))

# Call the function
validate(net, valid_loader, criterion)

In [None]:
# prepare to count predictions for each class
correct_pred = {classname: 0 for classname in emotions}
total_pred = {classname: 0 for classname in emotions}

# again no gradients needed
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        outputs = net(images)
        _, predictions = torch.max(outputs, 1)
        # collect the correct predictions for each class
        for label, prediction in zip(labels, predictions):
            if label == prediction:
                correct_pred[emotions[label]] += 1
            total_pred[emotions[label]] += 1


# print accuracy for each class
for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %')

In [None]:
capture = cv.VideoCapture()
success, image = capture.read()

pred = net(image)
print(pred)

import google.generativeai as genai
import json

genai.configure(api_key=json.load('info.json')['api_key'])

# Set up the model
generation_config = {
  "temperature": 1,
  "top_p": 0.95,
  "top_k": 0,
  "max_output_tokens": 8192,
}

safety_settings = [
  {
    "category": "HARM_CATEGORY_HARASSMENT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_HATE_SPEECH",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
]

model = genai.GenerativeModel(model_name="gemini-1.5-pro-latest",
                              generation_config=generation_config,
                              safety_settings=safety_settings,
                              system_instruction=f"Suggest how I can improve the mood of this person given that they are {pred}.")

convo = model.start_chat(history=[
])

convo.send_message("")
print(convo.last.text)

In [None]:
# adjust device
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

print(device)

cpu


In [None]:
# OpenCV Functions

# Show image
def show_img(img):
    cv.imshow(img, cv.imread(img))
    cv.waitKey(0)
    cv.destroyWindow(img)


# Grayscale
def show_grayscale(img):
    gray = cv.cvtColor(cv.imread(img), cv.COLOR_BGR2GRAY)
    cv.imshow(img, gray)
    cv.waitKey(0)
    cv.destroyWindow(img)


# Edge detection
def show_canny(img):
    canny = cv.Canny(cv.imread(img), 100, 200)
    cv.imshow(img, canny)
    cv.waitKey(0)
    cv.destroyWindow(img)   