# DataSet

In [1]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
cat_image_path = "../data/cifar10_images/cat"
dog_image_path = "../data/cifar10_images/dog"

In [3]:
def load_image_from_path(path):
    images = []
    labels = []
    label = path.split('/')[-1]
    for file_name in os.listdir(path):
        image_path = os.path.join(path, file_name)
        image = cv2.imread(image_path)
        image = cv2.resize(image, (32, 32))
        image = image.reshape(-1)
        image = image / 255.0
        images.append(image)

        if label == 'cat':
            labels.append(0)
        elif label == 'dog':
            labels.append(1)
        else:
            raise ValueError(f"Unexpected label: {label}. Expected 'cat' or 'dog'.")

    return images, labels

In [4]:
cat_images, cat_labels = load_image_from_path(cat_image_path)
dog_images, dog_labels = load_image_from_path(dog_image_path)

print(f"data shape: {cat_images[0].shape}")
print(f"label shape: {cat_labels[0]}")
print(f"number of cat data: {len(cat_labels)}")
print(f"number of dog data: {len(dog_labels)}")

data shape: (3072,)
label shape: 0
number of cat data: 500
number of dog data: 500


In [5]:
images = np.array(cat_images + dog_images)
labels = np.array(cat_labels + dog_labels)

print(f"number of all data: {len(labels)}")

number of all data: 1000


In [6]:
train_x, test_x, train_y, test_y = train_test_split(images, labels, test_size=0.2, random_state=42)

print(f"number of train data: {len(train_y)}")
print(f"number of test data: {len(test_y)}")
print(f"check data set randomly: {test_y[:10]}")

number of train data: 800
number of test data: 200
check data set randomly: [1 1 1 1 0 1 1 1 1 0]


In [7]:
train_y = train_y.reshape((800, 1))
test_y = test_y.reshape((200, 1))

# HyperParameter

In [8]:
num_features = 3072
num_samples = 800
learning_rate = 0.0001
num_iterations = 1000

# Model

In [9]:
W = np.random.randn(num_features, 1)
b = np.random.randn(1)

# Loss

In [10]:
def binary_cross_entropy(y_true, y_pred):
    epsilon = 1e-15
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)  # Numerical stability
    loss = -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
    
    return loss

# Non-linear function

In [11]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# Train

In [12]:
for i in range(num_iterations):
    z = np.dot(train_x, W) + b
    y_hat = sigmoid(z)

    loss = binary_cross_entropy(train_y, y_hat)
    
    dW = -2 * np.dot(train_x.T, (train_y - y_hat)) / num_samples
    db = -2 * np.sum(train_y - y_hat) / num_samples
    
    W -= learning_rate * dW
    b -= learning_rate * db
    
    if i % 100 == 0:
        print(f"Iteration {i}, Loss: {loss}")

print(f"Final Loss: {loss}")

Iteration 0, Loss: 13.12426847601438
Iteration 100, Loss: 11.255564349799931
Iteration 200, Loss: 9.284834882718325
Iteration 300, Loss: 7.661026573233191
Iteration 400, Loss: 6.617800229866082
Iteration 500, Loss: 6.04894052472222
Iteration 600, Loss: 5.743913858662859
Iteration 700, Loss: 5.5751612928500185
Iteration 800, Loss: 5.470948937222914
Iteration 900, Loss: 5.394003558489992
Final Loss: 5.329005712792484


# Predict

In [13]:
z = np.dot(test_x, W) + b
test_y_hat = sigmoid(z)

test_y_pred = (test_y_hat > 0.5).astype(int)
accuracy = np.mean(test_y_pred == test_y)

print(f"Test Set Accuracy: {accuracy * 100:.2f}%")

Test Set Accuracy: 48.00%
