In [2]:
import numpy as np

In [108]:
class CNN:
    def __init__(self, padding=0, in_channels=0, out_channels=0, kernel_size=0):
        self.padding = padding
        self.kernel_size = kernel_size
        self.weights = np.random.randn(out_channels, in_channels, self.kernel_size, self.kernel_size) * 0.01
        self.bias = np.zeros(out_channels)
        self.input = None
        self.padded_input = None

    def apply_padding(self, mat):
        padding_size = self.padding
        if padding_size == 0:
            return mat
        #패딩에서는 0으로 미리 사이즈 크게 생성하고 그 중간에 원래 행렬 저장해서 패딩
        padded_arr = np.zeros((mat.shape[0], mat.shape[1] + 2 * padding_size, mat.shape[2] + 2 * padding_size))
        padded_arr[:, padding_size:-padding_size, padding_size:-padding_size] = mat
        return padded_arr

    def convolution(self, padded):
        #
        output_size, input_size, _, _ = self.weights.shape
        kernel_size = self.kernel_size
        padded_height, padded_width = padded.shape[1:]  # 높이와 너비 추출
        #짝수와 아닐 때 계산이 다름 
        if kernel_size % 2 == 0:
            output_padded_height = (padded_height - kernel_size) 
            output_padded_width = (padded_width - kernel_size) 
        else:
            output_padded_height = (padded_height - kernel_size) // 1 + 1
            output_padded_width = (padded_width - kernel_size) // 1 + 1

        output = np.zeros((output_size, output_padded_height, output_padded_width))
        #컨볼루션 연산 
        for o in range(output_size):
            for n in range(input_size):
                for i in range(output_padded_height):
                    for j in range(output_padded_width):
                        patch = padded[n, i:i + kernel_size, j:j + kernel_size]
                        output[o, i, j] += np.sum(patch * self.weights[o, n]) 
        #output[o] += self.bias[o] #원래 넣어야하지만 이상하게 안넣는게 더 잘됨 
        return output

    def feedforward(self, mat):
        self.input = mat
        self.padded_input = self.apply_padding(mat)
        convolutioned = self.convolution(self.padded_input)
        return convolutioned

    def backward(self, grad_output, lr):
        output_size, input_size, kernel_size, _ = self.weights.shape
        padded_height, padded_width = self.padded_input.shape[1:]
        grad_input = np.zeros_like(self.padded_input)
        grad_weights = np.zeros_like(self.weights)
        grad_bias = np.zeros_like(self.bias)

        for o in range(output_size):
            grad_bias[o] = np.sum(grad_output[o])

        # ✅ 가중치 기울기 계산
        for o in range(output_size):
            for n in range(input_size):
                for i in range(grad_output.shape[1]):  
                    for j in range(grad_output.shape[2]): 
                        patch = self.padded_input[n, i:i + kernel_size, j:j + kernel_size]
                        grad_weights[o, n] += grad_output[o, i, j] * patch

        # ✅ 입력 기울기 계산 (필터 180도 회전 후 적용)
        flipped_weights = np.flip(self.weights, axis=(2, 3))

        for o in range(output_size):
            for n in range(input_size):
                for i in range(grad_output.shape[1]):  
                    for j in range(grad_output.shape[2]): 
                        patch = grad_output[o, i, j]  
                        grad_input[n, i:i + kernel_size, j:j + kernel_size] += patch * flipped_weights[o, n]

        # ✅ 가중치 및 편향 업데이트
        self.weights -= lr * grad_weights
        self.bias -= lr * grad_bias

        # ✅ 입력 기울기에서 패딩 제거
        if self.padding > 0:
            grad_input = grad_input[:, self.padding:-self.padding, self.padding:-self.padding]

        return grad_input



In [75]:
class MaxPooling:
    def __init__(self, kernel_size = 2):
        self.kernel_size = kernel_size
    
    def feedforward(self, mat):
        kernel_size = self.kernel_size
        output_channels, convolutioned_height, convolutioned_width = mat.shape
        output_height = convolutioned_height // kernel_size
        output_width = convolutioned_width // kernel_size
        #풀링되는 값들과 역전파때 사용해야하므로 원래 위치도 마스크에 같이 저장
        pooled = np.zeros((output_channels, output_height, output_width))
        self.mask = np.zeros_like(mat)  
        for o in range(output_channels):
            for i in range(output_height):
                for j in range(output_width):
                    start_i, start_j = i * kernel_size, j * kernel_size
                    end_i, end_j = start_i + kernel_size, start_j + kernel_size
                    input_mat = mat[o, start_i:end_i, start_j:end_j]

                    max_value = np.max(input_mat)
                    max_position = np.unravel_index(np.argmax(input_mat), input_mat.shape)

                    pooled[o, i, j] = max_value

                    self.mask[o, start_i:end_i, start_j:end_j][max_position] = 1
        
        return pooled
    
    def backward(self, grad_output, lr):
        #역전파 때는 들어온 입력 데이터로 풀링 전의 원래 위치로 돌려야함함
        grad_input = self.mask * np.repeat(
            np.repeat(grad_output, self.kernel_size, axis=1),  # 행 방향 확장
            self.kernel_size, axis=2,  # 열 방향 확장
        )
        return grad_input
    

In [5]:
class ReLu:
    def __init__(self):
        self.input = None

    def feedforward(self, mat):
        self.input = mat
        return np.maximum(0, mat)
    
    def backward(self, mat, lr):
        grad_input = mat * (self.input > 0).astype(float)
        return grad_input


In [6]:
class Softmax:
    def __init__(self):
        self.input = None
        self.pred = None
    
    def feedforward(self, mat):
        self.input = mat

        # 차원이 1D인 경우, (1, C)로 변경
        if mat.ndim == 1:
            mat = mat.reshape(1, -1)
        #
        ex = np.exp(mat - np.max(mat, axis=1, keepdims=True))
        all_ex = np.sum(ex, axis=1, keepdims=True)
        self.pred = ex / all_ex
        return self.pred

    
    def backward(self, answer, lr):
        return self.pred - answer

In [7]:
def cross_entropy(pred, answer):
    return -np.sum(answer * np.log(pred + 1e-9))

In [81]:
class Network:
    def __init__(self, layers = None):
        #레이어들 초기화
        if layers is None:
            self.layers = []
        else:
            self.layers = layers
        self.loss = None
        self.lr = None

    def add(self, layer):
        self.layers.append(layer)

    def fit(self, x_data, y_data, epochs): 
        prev_total_loss = 100000
        for epoch in range(epochs):
            total_loss = 0  # 전체 손실 누적

            for i in range(x_data.shape[0]):  
                
                sample_input = x_data[i].reshape(1, 28, 28)

                # 순전파
                self.forward(sample_input)  

                # 손실 계산
                final_output = self.final_output
                loss = cross_entropy(final_output, y_data[i:i+1])  
                total_loss += loss

                # 역전파
                self.backward(y_data[i:i+1]) 

            print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss / x_data.shape[0]}")
            if prev_total_loss < total_loss:
                break
            else:
                prev_total_loss = total_loss

            
    def pred(self, mat):
        # 입력 차원 조정 후 값 예측
        sample_input = mat.reshape(1, 28, 28)

        self.forward(sample_input)
        final_output = self.final_output
        return final_output
        
    def compile(self, loss = "cross_entropy", lr = 0.01):
        #loss랑 lr 설정정
        self.loss = loss
        self.lr = lr

    def forward(self, input_data):
        #레이어들 feedforward 계산해서 출력 
        for layer in self.layers:
            input_data = layer.feedforward(input_data)
        self.final_output = input_data
        return input_data

    def backward(self, grad_output):
        #레이어들 backward 계산해서 출력
        for layer in reversed(self.layers):
            grad_output = layer.backward(grad_output, self.lr)
        return grad_output

In [9]:
class flatten:
    def __init__(self):
        self.shape = None

    def feedforward(self, mat):
        #backward 때 다시 돌려야하므로 shape 미리 저장
        self.shape = mat.shape
        return mat.flatten()
    
    def backward(self, mat, lr):
        return mat.reshape(self.shape)

In [10]:
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version=1)

  warn(


In [34]:
class Linear:
    def __init__(self, input_node, output_node):
        self.node = input_node
        self.weights = np.random.randn(input_node, output_node) * 0.01
        self.bias = np.zeros(output_node)
        self.mat = None
        
    def feedforward(self, mat):
        #내적 연산
        self.mat = mat
        return np.dot(mat, self.weights) + self.bias
    
    def backward(self, grad_output, lr, clip_value=1.0):
        # 차원 강제 변환
        self.mat = self.mat.reshape(1, -1) if self.mat.ndim == 1 else self.mat  
        grad_output = grad_output.reshape(1, -1) if grad_output.ndim == 1 else grad_output  

        # 가중치 기울기 계산
        grad_weights = np.dot(self.mat.T, grad_output)
        grad_bias = np.sum(grad_output, axis=0)

        # Gradient Clipping 
        grad_norm = np.linalg.norm(grad_weights)
        if grad_norm > clip_value:
            grad_weights = grad_weights * (clip_value / grad_norm)

        # 가중치 업데이트
        self.weights -= lr * grad_weights
        self.bias -= lr * grad_bias

        # 이전 레이어로 전달할 기울기
        grad_input = np.dot(grad_output, self.weights.T)
        return grad_input


In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.datasets import fetch_openml

# Load MNIST dataset
X = mnist.data.values.reshape(-1, 1, 28, 28)  # Reshape to (samples, channels, height, width)
y = mnist.target.values.astype(int).reshape(-1, 1)  # Reshape labels
#X 70000개

X = X / 255.0

encoder = OneHotEncoder()
y = encoder.fit_transform(y).toarray()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
#데이터셋이 많고 계산이 오래걸려서 데이터셋 줄여서 학습습
X_train = X_train[:1000]
y_train = y_train[:1000]

X_test = X_test[:200]
y_test = y_test[:200]

network = Network()

cnn_layer = CNN(padding=1, in_channels=1, out_channels=1, kernel_size=2)
network.add(cnn_layer)

network.add(ReLu())

maxpool_layer = MaxPooling(kernel_size=2)
network.add(maxpool_layer)

flatten_layer = flatten()
network.add(flatten_layer)

network.add(Linear(196, 32))  
network.add(ReLu())           
network.add(Linear(32, 10)) 
network.add(Softmax())        

network.compile(lr=0.01)

epochs = 100
network.fit(X_train, y_train, epochs=epochs)


correct = 0
for i in range(X_test.shape[0]):
    prediction = network.pred(X_test[i:i + 1])
    
    if np.argmax(prediction) == np.argmax(y_test[i]):
        correct += 1

accuracy = correct / X_test.shape[0]

print(f"Test Accuracy: {accuracy * 100:.2f}%")


Epoch 1/100, Loss: 2.3030104357782006
Epoch 2/100, Loss: 2.300622096736812
Epoch 3/100, Loss: 2.2997712641031067
Epoch 4/100, Loss: 2.283209251865132
Epoch 5/100, Loss: 1.572191240750164
Epoch 6/100, Loss: 0.8835851258574335
Epoch 7/100, Loss: 0.712375110345381
Epoch 8/100, Loss: 0.6401811558164953
Epoch 9/100, Loss: 0.5788481269918968
Epoch 10/100, Loss: 0.5281557885553555
Epoch 11/100, Loss: 0.48241487525351034
Epoch 12/100, Loss: 0.4438233452745709
Epoch 13/100, Loss: 0.4113183461826835
Epoch 14/100, Loss: 0.38537418642734733
Epoch 15/100, Loss: 0.3651528168116771
Epoch 16/100, Loss: 0.3499960866251866
Epoch 17/100, Loss: 0.3392248430431901
Epoch 18/100, Loss: 0.33375517303661284
Epoch 19/100, Loss: 0.3353727439295222
Test Accuracy: 85.00%
