In [2]:
import sys
import os

# Add the 'oracle' directory to the Python path
sys.path.append(os.path.join(os.getcwd(), 'oracle'))
import oracle

In [3]:
res = oracle.q1_fish_train_test_data(23607)
print(res[0])

('Wearing_Lipstick', 'Big_Lips')


In [4]:
attributes = res[0]
train_img = res[1]
train_labels = res[2]
test_img = res[3]
test_labels = res[4]

In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [6]:
train_img = np.array(train_img)
train_labels = np.array(train_labels)
test_img = np.array(test_img)
test_labels = np.array(test_labels)

In [7]:
# We segregate the data into 4 classes
train_img_0 = train_img[train_labels == 0]
train_img_1 = train_img[train_labels == 1]
train_img_2 = train_img[train_labels == 2]
train_img_3 = train_img[train_labels == 3]

test_img_0 = test_img[test_labels == 0]
test_img_1 = test_img[test_labels == 1]
test_img_2 = test_img[test_labels == 2]
test_img_3 = test_img[test_labels == 3]

In [8]:
# we flatten the images
train_img_0 = train_img_0.reshape(train_img_0.shape[0], -1)
train_img_1 = train_img_1.reshape(train_img_1.shape[0], -1)
train_img_2 = train_img_2.reshape(train_img_2.shape[0], -1)
train_img_3 = train_img_3.reshape(train_img_3.shape[0], -1)

test_img_0 = test_img_0.reshape(test_img_0.shape[0], -1)
test_img_1 = test_img_1.reshape(test_img_1.shape[0], -1)
test_img_2 = test_img_2.reshape(test_img_2.shape[0], -1)
test_img_3 = test_img_3.reshape(test_img_3.shape[0], -1)

In [9]:
# We calculate the mean of each class
m0 = np.mean(train_img_0, axis=0)
m1 = np.mean(train_img_1, axis=0)
m2 = np.mean(train_img_2, axis=0)
m3 = np.mean(train_img_3, axis=0)

# We calculate the covariance matrix of each class
s1 = np.cov(train_img_0.T)
s2 = np.cov(train_img_1.T)
s3 = np.cov(train_img_2.T)
s4 = np.cov(train_img_3.T)

('Wearing_Lipstick', 'Big_Lips') are our attributes.
So, our classes 0,1,2,3 are:
<br>
0: Not wearing lipstick, not having big lips
<br>
1: Not wearing lipstick, having big lips
<br>
2: Wearing lipstick, not having big lips
<br>
3: Wearing lipstick, having big lips
<br>
So we segregate the data into 2 pairs of binary classes.
<br>
So, new classes are:
<br>
a: big lips no/yes : 0&2/1&3 | a1: 0&2, a2: 1&3
<br>
b: wearing lipstick no/yes : 0&1/2&3 | b1: 0&1, b2: 2&3
<br> 

### Attribute 'a': Big Lips classification


In [15]:
train_yes = np.concatenate((train_img_1, train_img_3), axis=0)
train_no = np.concatenate((train_img_0, train_img_2), axis=0)

m_yes = np.mean(train_yes, axis=0)
m_no = np.mean(train_no, axis=0)

s_yes = np.cov(train_yes.T)
s_no = np.cov(train_no.T)

s_W = s_yes + s_no
s_W_inv = np.linalg.inv(s_W)

w = np.dot(s_W_inv, (m_yes - m_no))
b = -0.5 * np.dot(w.T, (m_yes + m_no))

w_big_lips = w
b_big_lips = b

print(np.dot(w_big_lips.T, m_yes) + b_big_lips)
print(np.dot(w_big_lips.T, m_no) + b_big_lips)

0.34417260521691295
-0.3441725267878981


In [11]:
# Flatten the test images
test_img_flattened = test_img.reshape(test_img.shape[0], -1)

predicted_labels = np.zeros(test_img_flattened.shape[0])
# We calculate the accuracy of the model
for i in range(test_img_flattened.shape[0]):
    if np.dot(w.T, test_img_flattened[i]) + b > 0:
        predicted_labels[i] = 1
    else:
        predicted_labels[i] = 0

correct = 0
for i in range(test_img_flattened.shape[0]):
    if predicted_labels[i] == 1 and (test_labels[i] == 1 or test_labels[i] == 3):
        correct += 1
    elif predicted_labels[i] == 0 and (test_labels[i] == 0 or test_labels[i] == 2):
        correct += 1
accuracy = correct / test_img_flattened.shape[0]
big_lips_prediction = predicted_labels
print(accuracy)



0.607


### Attribute 'b': Wearing Lipstick classification

In [None]:
train_no = np.concatenate((train_img_0, train_img_1), axis=0)
train_yes = np.concatenate((train_img_2, train_img_3), axis=0)

m_yes = np.mean(train_yes, axis=0)
m_no = np.mean(train_no, axis=0)

s_yes = np.cov(train_yes.T)
s_no = np.cov(train_no.T)

s_W = s_yes + s_no
s_W_inv = np.linalg.inv(s_W)

w = np.dot(s_W_inv, (m_yes - m_no))
b = -0.5 * np.dot(w.T, (m_yes + m_no))

w_lipstick = w
b_lipstick = b

print(np.dot(w_lipstick.T, m_yes) + b_lipstick)
print(np.dot(w_lipstick.T, m_no) + b_lipstick)

2.4104791214123003
-2.410480251311691


: 

In [13]:
# Flatten the test images
test_img_flattened = test_img.reshape(test_img.shape[0], -1)

predicted_labels = np.zeros(test_img_flattened.shape[0])
# We calculate the accuracy of the model
for i in range(test_img_flattened.shape[0]):
    if np.dot(w.T, test_img_flattened[i]) + b > 0:
        predicted_labels[i] = 1
    else:
        predicted_labels[i] = 0

correct = 0
for i in range(test_img_flattened.shape[0]):
    if predicted_labels[i] == 1 and (test_labels[i] == 2 or test_labels[i] == 3):
        correct += 1
    elif predicted_labels[i] == 0 and (test_labels[i] == 0 or test_labels[i] == 1):
        correct += 1
accuracy = correct / test_img_flattened.shape[0]
lipstick_prediction = predicted_labels
print(accuracy)


0.891


In [14]:
i = 0
final_predictions = []
while i < test_img.shape[0]:
    # not wearing lipstick:
    if lipstick_prediction[i] == 0:
        # not having big lips
        if big_lips_prediction[i] == 0:
            final_predictions.append(0)
        # having big lips
        else:
            final_predictions.append(1)
    # wearing lipstick:
    else:
        # not having big lips
        if big_lips_prediction[i] == 0:
            final_predictions.append(2)
        # having big lips
        else:
            final_predictions.append(3)
    i += 1

correct = 0
for i in range(test_img_flattened.shape[0]):
    if final_predictions[i] == test_labels[i]:
        correct += 1
accuracy = correct / test_img_flattened.shape[0]
print(accuracy)


0.546
