# Bias Buccaneers Image Recognition Challenge: Quickstart

This notebook will introduce you to the data and describe a workflow to train and evaluate a baseline model on it.

## Initial Setup

We start with loading the required packages.

In [80]:
# !pip install tensorflow
import numpy as np
import pandas as pd
import json
import logging
import os
from pathlib import Path
from functools import lru_cache

from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

import cv2
import imgaug.augmenters as iaa

logger = logging.basicConfig(
    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)

## Load the Data

Make sure to download and uncompress the data (`data_bb1_img_recognition.zip`) in the folder you're working off of.

We first load the file containing the labels, binarize labels of each of the three classes as a numpy array and store them as a list.

In [3]:
# load data
LOADPATH = Path("../Downloads/data_bb1/train/")
assert LOADPATH.exists()
SAVEPATH = './models/'
df = pd.read_csv(LOADPATH / 'labels.csv')
df_labeled = df[df["skin_tone"].notna()] # take only labeled data

# Converting labels to np array
cat = ['skin_tone','gender','age']
lbs = [LabelBinarizer() for i in range(3)]
Y = []
for i in range(3):
    lab = lbs[i].fit_transform(df_labeled[cat[i]])
    if lab.shape[1]==1:
        Y.append(np.hstack((1-lab,lab)))
    else:
        Y.append(lab)

In [9]:
for col in cat:
    print(df[col].value_counts())

monk_3     1700
monk_4     1552
monk_5     1285
monk_2     1283
monk_7      694
monk_6      632
monk_1      515
monk_8      485
monk_9      289
monk_10     118
Name: skin_tone, dtype: int64
female    5106
male      3447
Name: gender, dtype: int64
18_30     4250
31_60     2126
0_17      1901
61_100     276
Name: age, dtype: int64


We then load the images under the training set and convert them to numpy arrays. This may take a while.

In [75]:
@lru_cache(maxsize=1000)
def load_rgb(path: Path, rgb=False) -> np.ndarray:
    img = cv2.imread(str(path))
    if not rgb:
        return img
    return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

def show_img(img):
    cv2.imshow('image',img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

testimg = load_rgb(LOADPATH / df_labeled.loc[1, "name"])

show_img(testimg)

In [30]:
seq = iaa.Sequential([
    iaa.Crop(px=(1, 16), keep_size=False),
    iaa.Fliplr(0.5),
    iaa.GaussianBlur(sigma=(0, 2.0))
])
aug_img = seq.augment_image(testimg)


In [89]:
def create_aug_names(names: pd.Series) -> pd.Series:
    rand_ints = pd.Series(np.random.randint(10000000, 99999999, size=names.shape[0]), index=names.index).astype("string")
    return rand_ints + names.str.replace("TRAIN", "_AUG")

def get_imgs_to_sample(series: pd.Series) -> pd.Series:
    counts = series.value_counts()
    return counts.max() - counts

age_counts = df_labeled["age"].value_counts()
img_to_sample = age_counts.max() - age_counts


def create_augs(df_sample: pd.DataFrame, img_to_sample: pd.Series, seq: iaa.Sequential, colname: str = "age") -> pd.DataFrame:
    new_df = pd.DataFrame()
    for cat, count in img_to_sample.items():
        logging.info(f"Sampling {count} images for {colname} {cat}")
        if count > 0:
            df_sample = df_labeled[df_labeled[colname] == cat].sample(count, replace=True)
            df_sample["aug_name"] = create_aug_names(df_sample["name"])
            sample_imgs = {name: load_rgb(LOADPATH / name) for name in df_sample["name"]}
            augs = seq.augment_images(list(sample_imgs.values()))
            for i, aug in enumerate(augs):
                cv2.imwrite(str(LOADPATH / df_sample.iloc[i, 4]), aug)
            new_df = pd.concat([new_df, df_sample])
    return new_df

#new_df = create_augs(df_labeled, img_to_sample)


In [110]:
# Taken from: https://www.kaggle.com/code/andreagarritano/simple-data-augmentation-with-imgaug/notebook
newseq = iaa.Sequential([
    iaa.Fliplr(0.5),
    iaa.Crop(percent=(0, 0.1)),
    iaa.Sometimes(0.5,
        iaa.GaussianBlur(sigma=(0, 0.5))
    ),
    iaa.LinearContrast((0.75, 1.5)),
    iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5),
    iaa.Multiply((0.8, 1.2), per_channel=0.2),

], random_order=True)

testy = seq.augment_image(testimg)
show_img(testy)

Unnamed: 0,name,skin_tone,gender,age
1,TRAIN0001.png,monk_1,female,18_30
2,TRAIN0002.png,monk_6,male,0_17
4,TRAIN0004.png,monk_3,male,0_17
5,TRAIN0005.png,monk_2,male,0_17
6,TRAIN0006.png,monk_2,female,18_30
...,...,...,...,...
12276,TRAIN9993.png,monk_2,male,18_30
12278,TRAIN9995.png,monk_9,female,18_30
12279,TRAIN9996.png,monk_4,female,18_30
12281,TRAIN9998.png,monk_5,male,18_30


In [91]:
from typing import List
def upsample_imgs(df: pd.DataFrame, sample_cols: List[str], seq: iaa.Sequential) -> pd.DataFrame:
    aug_df = pd.DataFrame()
    for col in sample_cols:
        logging.info(f"Upsampling {col}")
        counts = df[col].value_counts()
        img_to_sample = counts.max() - counts
        aug_df = pd.concat([aug_df, create_augs(df, img_to_sample, seq=seq, colname=col)])
    return aug_df

aug_df = upsample_imgs(df_labeled, df_labeled.columns[1:].tolist(), seq=newseq)

2022-11-19 12:13:37,227 - INFO - Upsampling skin_tone
2022-11-19 12:13:37,231 - INFO - Sampling 0 images for skin_tone monk_3
2022-11-19 12:13:37,233 - INFO - Sampling 148 images for skin_tone monk_4
2022-11-19 12:13:39,538 - INFO - Sampling 415 images for skin_tone monk_5
2022-11-19 12:13:47,298 - INFO - Sampling 417 images for skin_tone monk_2
2022-11-19 12:13:56,334 - INFO - Sampling 1006 images for skin_tone monk_7
2022-11-19 12:14:13,485 - INFO - Sampling 1068 images for skin_tone monk_6
2022-11-19 12:14:34,640 - INFO - Sampling 1185 images for skin_tone monk_1
2022-11-19 12:14:56,576 - INFO - Sampling 1215 images for skin_tone monk_8
2022-11-19 12:15:18,870 - INFO - Sampling 1411 images for skin_tone monk_9
2022-11-19 12:15:29,875 - INFO - Sampling 1582 images for skin_tone monk_10
2022-11-19 12:15:35,872 - INFO - Upsampling gender
2022-11-19 12:15:35,879 - INFO - Sampling 0 images for gender female
2022-11-19 12:15:35,880 - INFO - Sampling 1659 images for gender male
2022-11-19 

In [92]:
for col in df_labeled.columns[1:]:
    print(aug_df[col].value_counts())

monk_6     2158
monk_7     1977
monk_3     1934
monk_5     1905
monk_2     1862
monk_8     1825
monk_10    1760
monk_9     1747
monk_1     1710
monk_4     1675
Name: skin_tone, dtype: int64
male      10115
female     8438
Name: gender, dtype: int64
0_17      4928
31_60     4699
18_30     4554
61_100    4372
Name: age, dtype: int64


## Specify the Model

We define a single model class that is able train on the data in `X` and `Y` and predict outcomes for all three classes.

In [14]:
class PredictionModel():
    def __init__(self, X, Y, idx):
        self.X = X
        self.Y = Y
        self.idx = idx
        self.trainX, self.testX = X[idx[0],:], X[idx[1],:]
        self.trainY, self.testY = [Y[i][idx[0],:] for i in range(3)], [Y[i][idx[1],:] for i in range(3)]
        self.cat = ['skin_tone','gender','age']
        self.loss = ['categorical_crossentropy' for i in range(3)]
        self.metrics = [['accuracy'] for i in range(3)]
        self.models = [None]*3

    # train a model specific for a certain class index in self.cat
    def fit(self, index, model, epochs=5, batch_size=32, save=False, save_location=None, verbose=1):
        
        if verbose: print("Training model for "+self.cat[index])
        model.add(K.layers.Dense(self.trainY[index].shape[1], activation='softmax'))
        model.compile(loss=self.loss[index], optimizer='Adam', metrics=self.metrics[index])
        model.fit(
            self.trainX, self.trainY[index], 
            validation_data=(self.testX,self.testY[index]), 
            batch_size=batch_size, epochs=epochs, verbose=verbose
        )
        if save:
            if os.path.exists(SAVEPATH)==False:
                print('save location '+SAVEPATH+' did not exist. creating')
                os.makedirs(SAVEPATH)
            SAVE_LOCATION = save_location+'model_'+cat[index]+'.h5'
            print("saving model at "+SAVE_LOCATION)
            model.save(SAVE_LOCATION)
        self.models[index] = model
            
    def predict(self, newX):
        predictions = [model.predict(newX) for model in self.models]
        return predictions

## Initialize and Train a Model

We now train a `PredictionModel` to predict the likely skin tone, gender, and age of an input image. This baseline model is initialize on imagenet weights and uses the ResNet50 architecture. We strongly recommend using a GPU to reduce training time.

In [15]:
# function to initialize a model
def initializeModel():
    res_model = ResNet50(include_top=False, weights='imagenet', input_tensor=K.Input(shape=[length,width,3]))

    # freeze all but the last layer
    for layer in res_model.layers[:143]:
        layer.trainable = False
    model = K.models.Sequential()
    model.add(res_model)
    model.add(K.layers.Flatten())
    model.add(K.layers.BatchNormalization())
    model.add(K.layers.Dense(256, activation='relu'))
    model.add(K.layers.Dropout(0.5))
    model.add(K.layers.BatchNormalization())
    model.add(K.layers.Dense(128, activation='relu'))
    model.add(K.layers.Dropout(0.5))
    model.add(K.layers.BatchNormalization())
    model.add(K.layers.Dense(64, activation='relu'))
    model.add(K.layers.Dropout(0.5))
    model.add(K.layers.BatchNormalization())
    return model

nntrain = int(0.7*nn)
np.random.seed(42)
indices = np.random.permutation(nn)
train_idx, test_idx = indices[:nntrain], indices[nntrain:]
mymodel = PredictionModel(X=X, Y=Y, idx=[train_idx,test_idx])

# train model
for i in range(3):
    mymodel.fit(index=i, model=initializeModel(), epochs=5, save=True, save_location=SAVEPATH)

Training model for skin_tone
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
save location ./models/ did not exist. creating
saving model at ./models/model_skin_tone.h5
Training model for gender
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
saving model at ./models/model_gender.h5
Training model for age
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
saving model at ./models/model_age.h5


## Evaluate the Model

We now evaluate the model on the test data. To do this, let's first load up that data and structure it similarly.

In [28]:
# load labels data
TESTPATH = './test/'
df_test = pd.read_csv(TESTPATH+'labels.csv')

# Convert labels to np array
print("Converting test labels to np array")
testY = []
for i in range(3):
    lab = lbs[i].fit_transform(df_test[cat[i]])
    if lab.shape[1]==1:
        testY.append(np.hstack((1-lab,lab)))
    else:
        testY.append(lab)
        
# load and convert images into np array
print("Loading test images")
nt = df_test.shape[0]
all_imgs = [image.load_img(TESTPATH+df_test.iloc[i]['name'], target_size=(length,width)) for i in range(nt)]

print("Converting test images to np array")
testX = np.empty([nt, length, width, 3], dtype=float)
for i in range(nt):
    testX[i,:] = image.img_to_array(all_imgs[i])
testX = K.applications.resnet50.preprocess_input(testX)

Converting test labels to np array
Loading test images
Converting test images to np array


We then obtain predicted labels for skin tone, gender, and age as a list of lists.

In [34]:
pred = mymodel.predict(testX)
predY = [[np.argmax(pred[i][j,:]) for j in range(nt)] for i in range(3)]
predLabels = [[lbs[i].classes_[j] for j in predY[i]] for i in range(3)]

Finally, we calculate the label-wise accuracy and disparity.

In [35]:
# calculate accuracy
acc = {}
for i in range(3):
    icat = cat[i]
    iacc = accuracy_score(df_test[cat[i]], predLabels[i])
    acc[icat] = iacc

# calculate disparity
def disparity_score(ytrue, ypred):
    cm = confusion_matrix(ytrue,ypred)
    cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    all_acc = list(cm.diagonal())
    return max(all_acc) - min(all_acc)

disp = {}
for i in range(3):
    icat = cat[i]
    idisp = disparity_score(df_test[cat[i]], predLabels[i])
    disp[icat] = idisp
disp

results = {'accuracy': acc, 'disparity': disp}
results

{'accuracy': {'skin_tone': 0.25766666666666665,
  'gender': 0.7966666666666666,
  'age': 0.5783333333333334},
 'disparity': {'skin_tone': 0.5514223194748359,
  'gender': 0.1493182689960596,
  'age': 0.7802908824936}}

# Score Model and Prepare Submission

Based on the above metric, we now calculate the score to evaluate your submission. This score will be displayed in your public leaderboard.

In [89]:
def getScore(results):
    acc = results['accuracy']
    disp = results['disparity']
    ad = 2*acc['gender']*(1-disp['gender']) + 4*acc['age']*(1-disp['age']**2) + 10*acc['skin_tone']*(1-disp['skin_tone']**5)
    return ad

title = '8-Bit Bias Bounty Baseline'
    
submission = {
    'submission_name': title,
    'score': getScore(results),
    'metrics': results
}
submission

{'submission_name': '8-Bit Bias Bounty Baseline',
 'score': 4.705572543130653,
 'metrics': {'accuracy': {'skin_tone': 0.25766666666666665,
   'gender': 0.7966666666666666,
   'age': 0.5783333333333334},
  'disparity': {'skin_tone': 0.5514223194748359,
   'gender': 0.1493182689960596,
   'age': 0.7802908824936}}}

Finally, let's export this as a json file to upload as part of filling out your [submission form](https://docs.google.com/forms/d/e/1FAIpQLSfwqtVkJBVRP6TnFp7vHbbH8SlwKZJFIjvGQy7TyYFc8HR1hw/viewform).

In [6]:
with open("baseline_score.json", "w") as f:
    json.dump(submission, f, indent=4)