<a href="https://colab.research.google.com/github/doranosu/alex/blob/main/quasialex.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import numpy as np

transform = transforms.Compose([
    transforms.ToTensor()
])

train_dataset = torchvision.datasets.CIFAR10(root='./data/', train=True, transform=transform, download=True)
test_dataset = torchvision.datasets.CIFAR10(root='./data/', train=False, transform=transform, download=True)

train_images = []
train_labels = []
for img, label in train_dataset:
    train_images.append(img.numpy())
    train_labels.append(label)

test_images = []
test_labels = []
for img, label in test_dataset:
    test_images.append(img.numpy())
    test_labels.append(label)

train_images = np.array(train_images)
train_labels = np.array(train_labels)
test_images = np.array(test_images)
test_labels = np.array(test_labels)

print(train_images.shape)
print(train_labels.shape)
print(test_images.shape)
print(test_labels.shape)


100%|██████████| 170M/170M [00:10<00:00, 16.1MB/s]


(50000, 3, 32, 32)
(50000,)
(10000, 3, 32, 32)
(10000,)


In [9]:
import cv2

def conv2d_forward(X, W, b, stride=1, padding=0):
    N, C, H, W_in = X.shape
    F, _, HH, WW = W.shape
    out_h = (H + 2*padding - HH)//stride + 1
    out_w = (W_in + 2*padding - WW)//stride + 1

    X_pad = np.pad(X, ((0,0),(0,0),(padding,padding),(padding,padding)), mode='constant')
    out = np.zeros((N, F, out_h, out_w))

    for n in range(N):
        for f in range(F):
            for i in range(out_h):
                for j in range(out_w):
                    h_start = i * stride
                    w_start = j * stride
                    window = X_pad[n, :, h_start:h_start+HH, w_start:w_start+WW]
                    out[n, f, i, j] = np.sum(window * W[f]) + b[f]
    return out

def relu_forward(X):
    return np.maximum(0, X)

def maxpool_forward(X, size=2, stride=2):
    N, C, H, W_in = X.shape
    out_h = (H - size)//stride + 1
    out_w = (W_in - size)//stride + 1
    out = np.zeros((N, C, out_h, out_w))

    for n in range(N):
        for c in range(C):
            for i in range(out_h):
                for j in range(out_w):
                    h_start = i * stride
                    w_start = j * stride
                    window = X[n, c, h_start:h_start+size, w_start:w_start+size]
                    out[n, c, i, j] = np.max(window)
    return out

def fc_forward(X, W, b):
    return X @ W + b

def softmax_crossentropy_loss(logits, labels):
    shifted_logits = logits - np.max(logits, axis=1, keepdims=True)
    exp_logits = np.exp(shifted_logits)
    probs = exp_logits / np.sum(exp_logits, axis=1, keepdims=True)

    N = logits.shape[0]
    loss = -np.sum(np.log(probs[np.arange(N), labels])) / N

    dlogits = probs.copy()
    dlogits[np.arange(N), labels] -= 1
    dlogits /= N

    return loss, dlogits



In [4]:
def alexnet_forward(X, params):
    # X: (N, 3, 32, 32)のバッチ
    # params: パラメータの辞書

    # Conv1
    out = conv2d_forward(X, params['W1'], params['b1'], stride=1, padding=1)
    out = relu_forward(out)
    out = maxpool_forward(out, size=2, stride=2)  # → (N, 64, 16, 16)

    # Conv2
    out = conv2d_forward(out, params['W2'], params['b2'], stride=1, padding=2)
    out = relu_forward(out)
    out = maxpool_forward(out, size=2, stride=2)  # → (N, 192, 8, 8)

    # Conv3
    out = conv2d_forward(out, params['W3'], params['b3'], stride=1, padding=1)
    out = relu_forward(out)  # → (N, 384, 8, 8)

    # Conv4
    out = conv2d_forward(out, params['W4'], params['b4'], stride=1, padding=1)
    out = relu_forward(out)  # → (N, 256, 8, 8)

    # Conv5
    out = conv2d_forward(out, params['W5'], params['b5'], stride=1, padding=1)
    out = relu_forward(out)
    out = maxpool_forward(out, size=2, stride=2)  # → (N, 256, 4, 4)

    # Flatten
    N = out.shape[0]
    out = out.reshape(N, -1)  # (N, 256*4*4)

    # FC1
    out = fc_forward(out, params['W6'], params['b6'])
    out = relu_forward(out)

    # FC2
    out = fc_forward(out, params['W7'], params['b7'])
    out = relu_forward(out)

    # FC3
    out = fc_forward(out, params['W8'], params['b8'])  # 出力ロジット (N, 10)

    return out


In [6]:
def initialize_params():
    params = {}
    # Conv layers
    params['W1'] = np.random.randn(64, 3, 3, 3) * np.sqrt(2. / (3*3*3))
    params['b1'] = np.zeros(64)
    params['W2'] = np.random.randn(192, 64, 5, 5) * np.sqrt(2. / (64*5*5))
    params['b2'] = np.zeros(192)
    params['W3'] = np.random.randn(384, 192, 3, 3) * np.sqrt(2. / (192*3*3))
    params['b3'] = np.zeros(384)
    params['W4'] = np.random.randn(256, 384, 3, 3) * np.sqrt(2. / (384*3*3))
    params['b4'] = np.zeros(256)
    params['W5'] = np.random.randn(256, 256, 3, 3) * np.sqrt(2. / (256*3*3))
    params['b5'] = np.zeros(256)
    # FC layers
    params['W6'] = np.random.randn(256*4*4, 4096) * np.sqrt(2. / (256*4*4))
    params['b6'] = np.zeros(4096)
    params['W7'] = np.random.randn(4096, 4096) * np.sqrt(2. / 4096)
    params['b7'] = np.zeros(4096)
    params['W8'] = np.random.randn(4096, 10) * np.sqrt(2. / 4096)
    params['b8'] = np.zeros(10)
    return params


In [7]:
# パラメータ初期化
params = initialize_params()

# 小さいバッチを作る（ここでは例えば32個だけ）
batch_X = train_images[:32]  # (32,3,32,32)
batch_y = train_labels[:32]  # (32,)

# 順伝播してロジットを得る
logits = alexnet_forward(batch_X, params)

# softmaxクロスエントロピーloss計算
loss, probs = softmax_crossentropy_loss(logits, batch_y)

print(f"loss = {loss:.4f}")


loss = 2.6349


In [12]:
def fc_backward(dout, X, W, b):
    """
    dout: (N, output_dim) 上流から流れてきた勾配
    X: (N, input_dim) forward時の入力
    W: (input_dim, output_dim)
    b: (output_dim,)

    戻り値:
    - dX: 入力Xに対する勾配
    - dW: 重みWに対する勾配
    - db: バイアスbに対する勾配
    """
    dX = dout @ W.T
    dW = X.T @ dout
    db = np.sum(dout, axis=0)
    return dX, dW, db
def relu_backward(dout, X):
    """
    dout: 上流から流れてきた勾配 (N, ...)
    X: forward時の入力 (N, ...)

    戻り値:
    - dX: 入力Xに対する勾配
    """
    dX = dout * (X > 0)
    return dX
def maxpool_backward(dout, X, size=2, stride=2):
    """
    dout: 上流から流れてきた勾配 (N, C, out_h, out_w)
    X: forward時の入力 (N, C, H, W)

    戻り値:
    - dX: 入力Xに対する勾配
    """
    N, C, H, W = X.shape
    out_h, out_w = dout.shape[2], dout.shape[3]
    dX = np.zeros_like(X)

    for n in range(N):
        for c in range(C):
            for i in range(out_h):
                for j in range(out_w):
                    h_start = i * stride
                    w_start = j * stride
                    window = X[n, c, h_start:h_start+size, w_start:w_start+size]
                    max_val = np.max(window)
                    # 最大値のところだけ上流の勾配を渡す
                    for ii in range(size):
                        for jj in range(size):
                            if window[ii, jj] == max_val:
                                dX[n, c, h_start+ii, w_start+jj] += dout[n, c, i, j]
                                break
                        else:
                            continue
                        break
    return dX
