In [1]:
'''
GOALS
1. To be able to load the saved weights. 
2. Run the gender dataset through the network.
3. Save the final feature representation for
   every input image. (128 dimensional vector)
4. Take the feature representation and the gender
   label and map it down to a 2-D space using t-SNE
5. Do this for every image in the dataset.
'''

'\nGOALS\n1. To be able to load the saved weights. \n2. Run the gender dataset through the network.\n3. Save the final feature representation for\n   every input image. (128 dimensional vector)\n4. Take the feature representation and the gender\n   label and map it down to a 2-D space using t-SNE\n5. Do this for every image in the dataset.\n'

In [2]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import shutil
from math import ceil
from tensorflow.python.tools.inspect_checkpoint import print_tensors_in_checkpoint_file
%matplotlib inline
import csv

In [3]:
#os.environ["CUDA_VISIBLE_DEVICES"]="0"
tf.reset_default_graph()

In [4]:
def FindAgeRange(csvfile):
    df = pd.read_csv(csvfile)
    return (df['age'].min(), df['age'].max())

In [5]:
CSV = "gen_list_balanced.csv"

In [6]:
# Model Parameters
IMG_SIZE      = [96, 96]
BATCH_SIZE    = 32
LEARNING_RATE = 0.001

# For gender classification
NUM_CLASSES_GEN = 2
NUM_CLASSES_AGE = 14

In [7]:
def ReadImages(elem):
    filename = elem["name"][0]
    image_string = tf.read_file(filename)
    image_decoded = tf.image.decode_jpeg(image_string, channels=3)
    image = tf.cast(image_decoded, tf.uint8)
    image = tf.image.resize_images(image, IMG_SIZE)
    elem["name"] = image
    return elem

In [8]:
def AgeLabelFunc(elem):
    label = elem["age"][0]
    label = (label-AGE_MIN)//BIN_SIZE
    elem["age"] = label
    return elem

In [9]:
def GenderLabelFunc(elem):
    elem["gender"] = elem["gender"][0]
    return elem

In [10]:
def SetDatasetParams(dataset, size):
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(1)
    dataset = dataset.shuffle(size)
    dataset = dataset.repeat()
    return dataset

In [11]:
def CreateDataset(csvfile, label_func):
    
    # Get the size of the dataset
    data = pd.read_csv(csvfile)
    size = data.shape[0]

    full_data  = tf.contrib.data.make_csv_dataset(csvfile, batch_size=1)
    full_data  = full_data.map(label_func, num_parallel_calls=100)
    full_data  = full_data.map(ReadImages, num_parallel_calls=100)
    full_data  = SetDatasetParams(full_data, size)

    return full_data, size

In [12]:
dataset, size  = CreateDataset(CSV, GenderLabelFunc)

Instructions for updating:
Use `tf.data.experimental.make_csv_dataset(...)`.


In [13]:
def GetInitOp(dataset):
    it = tf.data.Iterator.from_structure(dataset.output_types, dataset.output_shapes)
    init_op = it.make_initializer(dataset)
    return init_op, it

In [14]:
init_op_data, it_data = GetInitOp(dataset)

In [15]:
def GetNumBatchesPerEpoch(size):
    num_batches = int(size//BATCH_SIZE)
    return num_batches

In [16]:
num_batches = GetNumBatchesPerEpoch(size)

In [17]:
def GetAgeFromBin(bin_val):
    return AGE_MIN + (bin_val*BIN_SIZE)

In [18]:
def ShowImages(imgs, age, GetLabel):
    high = imgs.eval().shape[0]
    print(high)
    idx = np.random.randint(low=1, high=high, size=2)
    plt.figure(figsize=(8, 8))

    plt.subplot(121)
    curr_img = np.asarray(imgs[idx[0]].eval(), dtype=np.uint8)
    curr_lbl = age[idx[0]].eval()

    plt.imshow(curr_img)
    plt.title("(Label: {lbl})".format(lbl=GetLabel(curr_lbl)))

    plt.subplot(122)
    curr_img = np.asarray(imgs[idx[1]].eval(), dtype=np.uint8)
    curr_lbl = age[idx[1]].eval()
    plt.imshow(curr_img, cmap="gray")
    plt.title("(Label: {lbl})".format(lbl=GetLabel(curr_lbl)))

In [19]:
def CheckAgeDataset(init_op, it):
    with tf.Session() as sess:
        sess.run(init_op)
        elem = it.get_next()
        ShowImages(elem["name"], elem["age"], GetAgeFromBin)

In [20]:
#CheckAgeDataset(init_op_data, it_data)

In [21]:
def GetGenFromBin(bin_val):
    if bin_val == 1:
        return "male"
    else:
        return "female"

In [22]:
def CheckGenDataset(init_op, it):
    with tf.Session() as sess:
        sess.run(init_op)
        elem = it.get_next()
        ShowImages(elem["name"], elem["gender"], GetGenFromBin)

In [23]:
#CheckGenDataset(init_op_data, it_data)

In [24]:
def conv2d(x, W, b, strides=1):
    strides_ = [1, strides, strides, 1]
    x = tf.nn.conv2d(x, W, strides=strides_, padding='SAME')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)

In [25]:
def maxpool2d(x, k=2):
    ksize_ = [1, k, k, 1]
    strides_ = [1, k, k, 1]
    return tf.nn.max_pool(x, ksize_, strides_, padding='SAME')

In [26]:
VAL  = IMG_SIZE[0]
FLAT = ceil(ceil(ceil(VAL/2)/2))/2

In [27]:
weights = {
    'wc1': tf.get_variable('W0', shape=(3, 3, 3, 32), initializer=tf.contrib.layers.xavier_initializer()),
    'wc2': tf.get_variable('W1', shape=(3, 3, 32, 64), initializer=tf.contrib.layers.xavier_initializer()),
    'wc3': tf.get_variable('W2', shape=(3, 3, 64, 128), initializer=tf.contrib.layers.xavier_initializer()),
    'wd1': tf.get_variable('W3', shape=(FLAT*FLAT*128, 128), initializer=tf.contrib.layers.xavier_initializer()),
    'age': tf.get_variable('W4', shape=(128, NUM_CLASSES_AGE), initializer=tf.contrib.layers.xavier_initializer()),
    'gen': tf.get_variable('W5', shape=(128, NUM_CLASSES_GEN), initializer=tf.contrib.layers.xavier_initializer())
}

biases = {
    'bc1': tf.get_variable('B0', shape=(32), initializer=tf.contrib.layers.xavier_initializer()),
    'bc2': tf.get_variable('B1', shape=(64), initializer=tf.contrib.layers.xavier_initializer()),
    'bc3': tf.get_variable('B2', shape=(128), initializer=tf.contrib.layers.xavier_initializer()),
    'bd1': tf.get_variable('B3', shape=(128), initializer=tf.contrib.layers.xavier_initializer()),
    'age': tf.get_variable('B4', shape=(NUM_CLASSES_AGE), initializer=tf.contrib.layers.xavier_initializer()),
    'gen': tf.get_variable('B5', shape=(NUM_CLASSES_GEN), initializer=tf.contrib.layers.xavier_initializer())
}

In [28]:
load_dir = 'checkpoints-alpha-2-extreme-bias/'

if not os.path.exists(load_dir):
    print("[INFO] Checkpoint directory doesn't exist.")
    
load_path = os.path.join(load_dir, 'best_validation')

In [29]:
#checkpoint = tf.train.latest_checkpoint(load_dir)
#print_tensors_in_checkpoint_file(checkpoint, all_tensors=True, tensor_name='')

In [30]:
def GetFeatureVector(x, weights, biases):
    
    conv1 = conv2d(x, weights['wc1'], biases['bc1'])
    conv1 = maxpool2d(conv1, k=2) 
    
    conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
    conv2 = maxpool2d(conv2, k=2)
    
    conv3 = conv2d(conv2, weights['wc3'], biases['bc3'])
    conv3 = maxpool2d(conv3, k=2)
    
    fc1 = tf.reshape(conv3, [-1, weights['wd1'].get_shape().as_list()[0]])
    fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
    fc1 = tf.nn.relu(fc1)
    
    return fc1

In [31]:
# feature_vecs[i] shape will be (BATCH_SIZE, 128)
feature_vecs = []

# labels[i] will have shape
labels = []

In [32]:
# Running one batch from the dataset through the network.
# Getting the one dimensional feature vector out.

elem = it_data.get_next()
x = elem["name"]
y = elem["gender"]
feature_vec = GetFeatureVector(x, weights, biases)

In [33]:
# Running this op will initialize weights and biases
init_op = tf.global_variables_initializer()
saver = tf.train.Saver()

In [34]:
def GenerateFeatureVectors(sess):
    print("[INFO] Generating feature vectors from the model.")
    
    saver.restore(sess=sess, save_path=load_path)
    sess.run(init_op_data)
    
    for i in range(num_batches):
        vec, label = sess.run([feature_vec, y])
        feature_vecs.append(vec)
        labels.append(label)
        
        print("[INFO] Finished passing batch #{} ".format(i))

In [35]:
with tf.Session() as sess:
    GenerateFeatureVectors(sess)

[INFO] Generating feature vectors from the model.
INFO:tensorflow:Restoring parameters from checkpoints-alpha-2-extreme-bias/best_validation
[INFO] Finished passing batch #0 
[INFO] Finished passing batch #1 
[INFO] Finished passing batch #2 
[INFO] Finished passing batch #3 
[INFO] Finished passing batch #4 
[INFO] Finished passing batch #5 
[INFO] Finished passing batch #6 
[INFO] Finished passing batch #7 
[INFO] Finished passing batch #8 
[INFO] Finished passing batch #9 
[INFO] Finished passing batch #10 
[INFO] Finished passing batch #11 
[INFO] Finished passing batch #12 
[INFO] Finished passing batch #13 
[INFO] Finished passing batch #14 
[INFO] Finished passing batch #15 
[INFO] Finished passing batch #16 
[INFO] Finished passing batch #17 
[INFO] Finished passing batch #18 
[INFO] Finished passing batch #19 
[INFO] Finished passing batch #20 
[INFO] Finished passing batch #21 
[INFO] Finished passing batch #22 
[INFO] Finished passing batch #23 
[INFO] Finished passing batch

In [36]:
def GenerateTSV(vecs, file):
    with open(file, 'wt') as out_file:
        tsv_writer = csv.writer(out_file, delimiter='\t')
    
        # batch.shape=(BATCH_SIZE, 128)
        for batch in vecs:
            for i in range(BATCH_SIZE):
                row = batch[i].tolist()
                if isinstance(row, int):  
                    tsv_writer.writerow([row])
                else:
                    tsv_writer.writerow(row)

In [37]:
file = 'vec-alpha-2-extreme-bias-balanced-.tsv'
GenerateTSV(feature_vecs, file)

file = 'lbl-alpha-2-extreme-bias-balanced.tsv'
GenerateTSV(labels, file)