<a href="https://colab.research.google.com/github/audachang/ML_collections/blob/main/face_gender_ML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('gdrive')

Mounted at gdrive


In [2]:
import torch
import torch.nn as nn  # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
import torch.optim as optim  # For all Optimization algorithms, SGD, Adam, etc.
import torchvision.transforms as transforms  # Transformations we can perform on our dataset
import torchvision
import os
import pandas as pd
from PIL import Image
from torch.utils.data import (
    Dataset,
    DataLoader,
)  # Gives easier dataset managment and creates mini batches

import os
import pandas as pd


dpath = './gdrive/MyDrive/mycolab/data/'
imgdir = os.path.join(dpath, '346faces_sameBG_grey_adjBright')
annote_file = './gdrive/MyDrive/mycolab/data/346faces_allData.csv'  # or whatever the path to the downloaded data is

### checking on column names

In [3]:
tmp = pd.read_csv(annote_file)
tmp.iloc[0,]

face                                             AUF_1
face_race                                           AU
face_gender                                          F
image_grey_adj             AUF_1_squared_adjBright.png
Sr_column_grey_adj                            0.480841
manual_oAI_raw                                1.881759
FaceMesh_oAI_raw                              0.763622
FaceMesh_oAI_normalized                       0.845129
prototypicality.N.54.                         4.203704
sensitivity.N.56.                             0.360025
criterion.N.56.                                    0.0
gray_att_upr                                  4.948718
gray_sym_upr                                  6.055556
Name: 0, dtype: object

## Reading structured data

In [4]:
class faceDataset(Dataset):
    def __init__(self, csv_file, root_dir, tarlab, transform=None):
        self.annotations = pd.read_csv(csv_file)

        self.annotations['label'] = \
          self.annotations[tarlab].rank(method='dense', ascending=False).astype(int)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, index):
        img_path = os.path.join(self.root_dir, self.annotations.iloc[index, 3])
        image = Image.open(img_path)
        y_label = torch.tensor(int(self.annotations.loc[index, 'label']))

        if self.transform:
            image = self.transform(image)

        return (image, y_label)

## Setting up training parameters

In [5]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hyperparameters
in_channel = 3
num_classes = 2
learning_rate = 3e-4
batch_size = 32
num_epochs = 10

## Loading dataset

In [6]:
dataset = faceDataset(
    csv_file=annote_file,
    root_dir=imgdir,
    tarlab = 'face_gender',
    transform=transforms.ToTensor()
)
batch_size = 32

## Splitting test and training set

In [7]:
# Train 跟 test 的大小
test_size = int(len(dataset)*0.2)
train_size = len(dataset)- int(len(dataset)*0.2)

# 切割資料集
train_dataset, test_dataset = \
  torch.utils.data.random_split(dataset, [train_size, test_size])

# 套上dataloader
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)

## initiate model

In [8]:
model = torchvision.models.googlenet(pretrained=True)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)


criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)


Downloading: "https://download.pytorch.org/models/googlenet-1378be20.pth" to /root/.cache/torch/hub/checkpoints/googlenet-1378be20.pth


  0%|          | 0.00/49.7M [00:00<?, ?B/s]

## Model training

In [None]:
for epoch in range(num_epochs):
    losses = []

    for batch_idx, (data, targets) in enumerate(train_loader):
        data = data.to(device=device)
        targets = targets.to(device=device)

        scores = model(data)
        loss = criterion(scores, targets)

        losses.append(loss.item())

        optimizer.zero_grad()
        loss.backward()

        optimizer.step()

    print(f"Cost at epoch {epoch} is {sum(losses)/len(losses)}")

Cost at epoch 0 is 4.094897482130262
Cost at epoch 1 is 0.39807577182849246


## Check accuracy on training to see how good our model is


In [None]:
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device)
            y = y.to(device=device)

            scores = model(x)
            _, predictions = scores.max(1)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)

        print(
            f"Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}"
        )

    model.train()


print("Checking accuracy on Training Set")
check_accuracy(train_loader, model)

print("Checking accuracy on Test Set")
check_accuracy(test_loader, model)


Checking accuracy on Training Set
Got 277 / 277 with accuracy 100.00
Checking accuracy on Test Set
Got 68 / 69 with accuracy 98.55
