<a href="https://colab.research.google.com/github/jdchen5/machinelearninglabs/blob/main/W22/requiredActivity22-3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [70]:

import argparse
import cv2
import numpy as np
import matplotlib.pyplot as plt
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim


from tqdm import tqdm
from __future__ import print_function
from torch.utils.data import DataLoader, random_split, Dataset, Subset
from torchvision.datasets.folder import default_loader
from torchvision import datasets, transforms
from torchvision.datasets import ImageFolder




In [24]:
import cv2
img = cv2.imread("some_image.pgm", cv2.IMREAD_COLOR)



In [104]:
# Custom loader using OpenCV
def img_loader(filename):
    return cv2.imread(filename, cv2.IMREAD_GRAYSCALE)

data = ImageFolder(root='/content/gdrive/My Drive/Pythoncode/W22/faces_4/', loader=img_loader, transform=transforms)
data.classes

['an2i',
 'at33',
 'boland',
 'bpm',
 'ch4f',
 'cheyer',
 'choon',
 'danieln',
 'glickman',
 'karyadi',
 'kawamura',
 'kk49',
 'megak',
 'mitchell',
 'night',
 'phoebe',
 'saavik',
 'steffi',
 'sz24',
 'tammo']

In [105]:
import collections

# Dictionary to hold expression counts for each class
class_expressions = collections.defaultdict(lambda: collections.defaultdict(int))

# Loop over all samples in the dataset
for image_path, class_index in data.samples:
    class_name = data.classes[class_index]
    filename = os.path.basename(image_path)
    # Split the filename and extract the expression
    parts = filename.split('_')
    if len(parts) >= 3:  # Check that there are enough parts to extract expression
        # Concatenate the parts to form the expression string
        expression = parts[1] + "_" + parts[2]  # The third part is assumed to be the expression
        class_expressions[class_name][expression] += 1

# Now 'class_expressions' contains the counts of each unique expression for each class
for class_name, expressions in class_expressions.items():
    print(f"Class '{class_name}' expressions and counts:")
    for expression, count in expressions.items():
        print(f"{expression}: {count}")
    print()

Class 'an2i' expressions and counts:
left_angry: 2
left_happy: 2
left_neutral: 2
left_sad: 2
right_angry: 2
right_happy: 2
right_neutral: 2
right_sad: 2
straight_angry: 2
straight_happy: 2
straight_neutral: 2
straight_sad: 2
up_angry: 2
up_happy: 2
up_neutral: 2
up_sad: 2

Class 'at33' expressions and counts:
left_angry: 2
left_happy: 2
left_neutral: 2
left_sad: 2
right_angry: 2
right_happy: 2
right_neutral: 2
right_sad: 2
straight_angry: 2
straight_happy: 2
straight_neutral: 2
straight_sad: 2
up_angry: 2
up_happy: 2
up_neutral: 2
up_sad: 2

Class 'boland' expressions and counts:
left_angry: 2
left_happy: 2
left_neutral: 2
left_sad: 2
right_angry: 2
right_happy: 2
right_neutral: 2
right_sad: 2
straight_angry: 2
straight_happy: 2
straight_neutral: 2
straight_sad: 2
up_angry: 2
up_happy: 2
up_neutral: 2
up_sad: 2

Class 'bpm' expressions and counts:
left_angry: 2
left_happy: 2
left_neutral: 2
left_sad: 2
right_angry: 2
right_happy: 2
right_neutral: 2
right_sad: 2
straight_angry: 2
straig

In [62]:
# LeNet-5 architecture adapted for face classification

# the Net class to handle a single task at a time
class Net(nn.Module):
    def __init__(self, num_classes=4):  # Adjusted for a default of 4 classes
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5, padding=2)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5, padding=2)
        self.fc1 = nn.Linear(16 * 8 * 8, 120)  # Adjusted based on pooling and padding
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, num_classes)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 8 * 8)  # Adjusted based on pooling and padding
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x



In [87]:
class MitchellNet(nn.Module):
    def __init__(self):
        super(MitchellNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        # Calculate the correct number of flattened features after the last pooling layer
        # Here, we assume the size of the feature map after the convolutions and pooling is 5x5
        # This number (16 * 5 * 5) must match the number of input features of self.fc1
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 4)  # Adjusted for 4 classes

    def forward(self, x):

        #print(f"MitchellNet Input batch size: {x.size(0)}")  # Should always be 64 based on your DataLoader

        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)  # Make sure this matches the output of conv2 layer
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)

       # print(f"Input batch size: {x.size(0)}")  # Should be 64
       # print(f"Output batch size: {x.size(0)}")  # Should also be 64
        return x

In [74]:
class ExpressionNet(nn.Module):
    def __init__(self, num_classes=4):   # Default parameter adjusted for 4 classes
        super(ExpressionNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5, padding=2)  # Added padding to maintain size
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5, padding=2)  # Added padding to maintain size
        # Adjusted based on pooling and padding, assuming input image size allows for this after conv and pool
        self.fc1 = nn.Linear(16 * 8 * 8, 120)  # Adjusted to correct size
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        # Ensure the flattening matches the output size of the final pool layer
        x = x.view(-1, 16 * 8 * 8)  # Adjust this based on your actual dimensions
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


In [73]:
def train(model, device, train_loader, optimizer):
    model.train()
    total_loss = 0
    for data, target in tqdm(train_loader, desc="Training"):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Training loss: {total_loss / len(train_loader)}")

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in tqdm(test_loader, desc="Testing"):
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.cross_entropy(output, target, reduction='sum').item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
    test_loss /= len(test_loader.dataset)
    print(f'Test set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({100. * correct / len(test_loader.dataset):.0f}%)')

In [107]:
class CustomImageDataset(Dataset):
    def __init__(self, root, transform=None):
        self.root = root
        self.transform = transform
        self.samples = []

        # Build a list of samples and labels by parsing filenames
        for filename in os.listdir(root):
            if filename.endswith('.pgm'):
                # Example filename: 'mitchell_left_angry_open.pgm'
                parts = filename.split('_')
                label = parts[2] if len(parts) > 2 else None  # Extract expression label
                self.samples.append((os.path.join(root, filename), label))

        # Map expressions to integer labels
        expressions = sorted(set(label for _, label in self.samples))
        self.class_to_idx = {expression: i for i, expression in enumerate(expressions)}

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        path, expression = self.samples[idx]
        image = default_loader(path)  # Uses PIL to load the image
        label = self.class_to_idx[expression]  # Convert expression to a numerical label

        if self.transform is not None:
            image = self.transform(image)

        return image, label

In [108]:
def filter_for_mitchell(dataset, model, device):
    model.eval()
    indices = []
    predictions = []  # To store prediction results for debugging

    for i in range(len(dataset)):
        img, _ = dataset[i]
        img = img.unsqueeze(0).to(device)  # Add batch dimension
        with torch.no_grad():
            output = model(img)
        pred = output.argmax(dim=1)
        predictions.append(pred.item())  # Store prediction

        if pred.item() == 1:  # Assuming '1' indicates Mitchell
            indices.append(i)

    # Debugging: Print the distribution of predictions
    print("Prediction distribution:", {i: predictions.count(i) for i in set(predictions)})

    # Check if indices list is empty
    if not indices:
        print("No items classified as Mitchell. Please check model predictions or adjust criteria.")
        # Optional: Instead of returning None, consider returning an empty Subset if it suits your application better
        return Subset(dataset, [])  # This avoids TypeError when the calling function expects a dataset-like object
    else:
        print(f"Items classified as Mitchell: {len(indices)}")

    return Subset(dataset, indices)

In [106]:
from torch.utils.data import DataLoader, random_split, SubsetRandomSampler

def main(data_dir):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Transformations
    transform = transforms.Compose([
        transforms.Grayscale(num_output_channels=1),
        transforms.Resize((32, 32)),
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

    # Initialize your custom dataset
    dataset = CustomImageDataset(root=data_dir, transform=transform)
    print(f"Total dataset size: {len(dataset)}")

    # Use the length of class_to_idx to determine the number of unique classes
    num_classes = len(dataset.class_to_idx)
    print(f"Number of unique classes: {num_classes}")

    # Splitting the dataset into train and test
    train_size = int(0.8 * len(dataset))
    test_size = len(dataset) - train_size
    train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

    print(f"Training dataset size: {len(train_dataset)}")
    print(f"Testing dataset size: {len(test_dataset)}")

    test_sample, test_label = test_dataset[0]  # Adjust index as necessary
    print(f"Sample shape: {test_sample.shape}, Label: {test_label}")


    # Create DataLoader instances
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, drop_last=True)
    #test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, drop_last=True)

    # Example of a diagnostic step, not a solution
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=0, sampler=SubsetRandomSampler(range(17)))


    for data, target in test_loader:
        print(f"Batch test dataloder  shape: {data.shape}, Batch target shape: {target.shape}")
        break  # Just to test the first batch

    for data, target in test_loader:
        print(f"Direct Test DataLoader Check - Batch data shape: {data.shape}, Batch target shape: {target.shape}")
        break  # Just to confirm we can get at least one batch


    # Train MitchellNet
    mitchell_model = MitchellNet().to(device)
    optimizer_mitchell = optim.SGD(mitchell_model.parameters(), lr=0.01, momentum=0.9)
    for epoch in range(1, 11):
        train(mitchell_model, device, train_loader, optimizer_mitchell)
        test(mitchell_model, device, test_loader)

    # Filter dataset based on MitchellNet's predictions
    mitchell_filtered_dataset = filter_for_mitchell(dataset, mitchell_model, device)

    # Prepare DataLoader for filtered dataset
    filtered_loader = DataLoader(mitchell_filtered_dataset, batch_size=64, shuffle=True)

"""
    # Train ExpressionNet
    expression_model = ExpressionNet(num_classes=num_classes).to(device)
    optimizer_expression = optim.SGD(expression_model.parameters(), lr=0.01, momentum=0.9)
    for epoch in range(1, 11):
        train(expression_model, device, filtered_loader, optimizer_expression)
        # Note: You might want to adjust how you handle the test set here, depending on your needs

    # Save the model checkpoints
    torch.save(mitchell_model.state_dict(), "mitchell_model_checkpoint.pth")
    torch.save(expression_model.state_dict(), "expression_model_checkpoint.pth")
"""

if __name__ == '__main__':
    # Make sure this directory points to where your images are stored
    data_dir = '/content/gdrive/My Drive/Pythoncode/W22/faces_4/mitchell'
    main(data_dir)

Total dataset size: 28
Number of unique classes: 4
Training dataset size: 22
Testing dataset size: 6
Sample shape: torch.Size([1, 32, 32]), Label: 3


IndexError: list index out of range

In [65]:
def main(data_dir):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    transform = transforms.Compose([transforms.Grayscale(num_output_channels=1),
                                    transforms.Resize((32, 32)),
                                    transforms.ToTensor(),
                                    transforms.Normalize((0.5,), (0.5,))])

    dataset = CustomImageDataset(root=data_dir, transform=transform)
    num_classes = len(dataset.class_to_idx)

    train_size = int(0.8 * len(dataset))
    test_size = len(dataset) - train_size
    train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

    model = Net(num_classes=num_classes).to(device)

    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)  # Decays the learning rate every 10 epochs

    for epoch in range(1, 11):
        print(f"Epoch {epoch}/{10}")
        train(model, device, train_loader, optimizer)
        test(model, device, test_loader)
        scheduler.step()  # Adjust the learning rate

    # Save the model checkpoints
    torch.save(model.state_dict(), "model_checkpoint.pth")

    print("Training completed")

if __name__ == '__main__':
    # Make sure this directory points to where your images are stored
    data_dir = '/content/gdrive/My Drive/Pythoncode/W22/faces/mitchell'
    main(data_dir)

Epoch 1/10


Training: 100%|██████████| 2/2 [00:01<00:00,  1.24it/s]


Training loss: 1.3744913339614868


Testing: 100%|██████████| 1/1 [00:00<00:00,  2.92it/s]


Test set: Average loss: 1.3883, Accuracy: 5/17 (29%)
Epoch 2/10


Training: 100%|██████████| 2/2 [00:01<00:00,  1.36it/s]


Training loss: 1.3768696784973145


Testing: 100%|██████████| 1/1 [00:00<00:00,  3.95it/s]


Test set: Average loss: 1.3907, Accuracy: 5/17 (29%)
Epoch 3/10


Training: 100%|██████████| 2/2 [00:00<00:00,  2.14it/s]


Training loss: 1.3826159238815308


Testing: 100%|██████████| 1/1 [00:00<00:00,  4.07it/s]


Test set: Average loss: 1.3943, Accuracy: 5/17 (29%)
Epoch 4/10


Training: 100%|██████████| 2/2 [00:00<00:00,  2.19it/s]


Training loss: 1.3487340211868286


Testing: 100%|██████████| 1/1 [00:00<00:00,  4.83it/s]


Test set: Average loss: 1.3980, Accuracy: 5/17 (29%)
Epoch 5/10


Training: 100%|██████████| 2/2 [00:00<00:00,  2.17it/s]


Training loss: 1.4275745749473572


Testing: 100%|██████████| 1/1 [00:00<00:00,  4.47it/s]


Test set: Average loss: 1.4004, Accuracy: 5/17 (29%)
Epoch 6/10


Training: 100%|██████████| 2/2 [00:00<00:00,  2.14it/s]


Training loss: 1.3745765686035156


Testing: 100%|██████████| 1/1 [00:00<00:00,  4.12it/s]


Test set: Average loss: 1.4012, Accuracy: 5/17 (29%)
Epoch 7/10


Training: 100%|██████████| 2/2 [00:00<00:00,  2.06it/s]


Training loss: 1.396196722984314


Testing: 100%|██████████| 1/1 [00:00<00:00,  4.30it/s]


Test set: Average loss: 1.4024, Accuracy: 5/17 (29%)
Epoch 8/10


Training: 100%|██████████| 2/2 [00:00<00:00,  2.09it/s]


Training loss: 1.3951307535171509


Testing: 100%|██████████| 1/1 [00:00<00:00,  3.60it/s]


Test set: Average loss: 1.4042, Accuracy: 5/17 (29%)
Epoch 9/10


Training: 100%|██████████| 2/2 [00:01<00:00,  1.98it/s]


Training loss: 1.3787712454795837


Testing: 100%|██████████| 1/1 [00:00<00:00,  4.39it/s]


Test set: Average loss: 1.4059, Accuracy: 5/17 (29%)
Epoch 10/10


Training: 100%|██████████| 2/2 [00:00<00:00,  2.08it/s]


Training loss: 1.3715495467185974


Testing: 100%|██████████| 1/1 [00:00<00:00,  4.33it/s]

Test set: Average loss: 1.4069, Accuracy: 5/17 (29%)
Training completed



