In [1]:
!pip install numpy pandas matplotlib
import numpy as np
import pandas as pd
import pickle as pk
import os


[notice] A new release of pip is available: 23.3.2 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


Defaulting to user installation because normal site-packages is not writeable


In [120]:
np.seterr(all='ignore')
def tanH(x):
    return np.tanh(x)

def deriv_tanH(fx):
    return 1 - fx ** 2

def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def deriv_sigmoid(fx):
    return fx * (1 - fx)


def ReLU(Z):
    return np.maximum(0, Z)


def deriv_ReLU(Z):
    return Z > 0


# def softmax(Z):
#     Z = np.exp(Z)
#     return (Z / np.sum(Z))

# correct solution:
def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0) # only difference

def clip_gradients(gradient, maxValue):
    """Clip the gradient to between -maxValue and maxValue."""
    return np.clip(gradient, -maxValue, maxValue)

def cross_entropy_loss(y_true, y_pred):
    # Clip predicted values to avoid numerical instability
    y_pred = np.clip(y_pred, 1e-15, 1 - 1e-15)

    # Compute cross-entropy loss
    loss = -np.sum(y_true * np.log(y_pred))

    return loss

def one_hot(Y):
    one_hot_Y = np.zeros((Y.size, 10))
    one_hot_Y[np.arange(Y.size), Y] = 1
    one_hot_Y = one_hot_Y.T
    return one_hot_Y


def get_predictions(A2):
    return np.argmax(A2, 0)


def get_accuracy(predictions, Y):
    return np.sum(predictions == Y) / Y.size

In [132]:
class NeuralNetwork:
    NEURONS = 0
    UNIFORM_LOW = 0
    UNIFORM_HIGH = 1
    def __init__(self):
        np.random.seed(1)
        self.hidden_layer0 = np.random.uniform(self.UNIFORM_LOW, np.sqrt(2 / 784), (self.NEURONS, 784))
        # print(f"{self.hidden_layer0 = }")
        self.hidden_layer0_bias = np.random.uniform(self.UNIFORM_LOW, self.UNIFORM_HIGH, (self.NEURONS, 1))
        self.hidden_layer1 = np.random.uniform(self.UNIFORM_LOW, self.UNIFORM_HIGH, (self.NEURONS, self.NEURONS))
        # print(f"{self.hidden_layer1 = }")
        self.hidden_layer1_bias = np.random.uniform(self.UNIFORM_LOW, self.UNIFORM_HIGH, (self.NEURONS, 1))
        self.output_layer = np.random.uniform(self.UNIFORM_LOW, self.UNIFORM_HIGH, (10, self.NEURONS))
        # print(f"{self.output_layer = }")
        self.output_layer_bias = np.random.uniform(self.UNIFORM_LOW, self.UNIFORM_HIGH, (10, 1))

    def forward_propagate(self, x_in):
        x_in = np.reshape(x_in, (x_in.shape[0],1))
        # print(f"{x_in = }")
        hidden_layer_0_out = ReLU((np.dot(self.hidden_layer0, x_in)) + self.hidden_layer0_bias)
        # print(f"{self.hidden_layer0 = }")
        # print(f"{hidden_layer_0_out = }")
        hidden_layer_1_out = ReLU((np.dot(self.hidden_layer1, hidden_layer_0_out)) + self.hidden_layer1_bias)
        # print(f"{self.hidden_layer1 = }")
        # print(f"{hidden_layer_1_out = }")
        y_pred = softmax((np.dot(self.output_layer, hidden_layer_1_out)) + self.output_layer_bias)
        # print(f"{self.output_layer = }")
        # print(f"{y_pred = }")
        return hidden_layer_0_out, hidden_layer_1_out, y_pred

    def backward_propagate(self, learn_rate, hidden_layer_0_out, hidden_layer_1_out, y_pred, x_train, y_test, l2_lambda):
        M = y_test.size
        x_train = np.reshape(x_train, (x_train.shape[0], 1))
        one_hot_y = one_hot(y_test)

        delta_out = y_pred - one_hot_y
        delta_h1 = (self.output_layer.T @ delta_out) * deriv_ReLU(hidden_layer_1_out)
        delta_h0 = (self.hidden_layer1.T @ delta_h1) * deriv_ReLU(hidden_layer_0_out)

        # Compute the gradients
        gradient_output_layer = (1 / M) * delta_out @ hidden_layer_1_out.T + (l2_lambda / M) * self.output_layer
        gradient_hidden_layer1 = (1 / M) * delta_h1 @ hidden_layer_0_out.T + (l2_lambda / M) * self.hidden_layer1
        gradient_hidden_layer0 = (1 / M) * delta_h0 @ x_train.T + (l2_lambda / M) * self.hidden_layer0

        # Clip the gradients
        # max_grad_value = 0.5  # This value is a hyperparameter you might need to tune
        # gradient_output_layer = clip_gradients(gradient_output_layer, max_grad_value)
        # gradient_hidden_layer1 = clip_gradients(gradient_hidden_layer1, max_grad_value)
        # gradient_hidden_layer0 = clip_gradients(gradient_hidden_layer0, max_grad_value)

        # Update the weights using the clipped gradients
        self.output_layer -= learn_rate * gradient_output_layer
        self.hidden_layer1 -= learn_rate * gradient_hidden_layer1
        self.hidden_layer0 -= learn_rate * gradient_hidden_layer0
        
    def train(self, epochs, learn_rate, x_trains, y_trains, x_tests, y_tests, batch_size=10, l2_lambda=0.01):
        for epoch in range(1, epochs, 1):
            for i in range(0, x_trains.shape[0], batch_size):
                x_batch = x_trains[i:i+batch_size]
                y_batch = y_trains[i:i+batch_size]
                # Train in batches
                for x_train, y_train in zip(x_batch, y_batch):
                    h0, h1, y_out = self.forward_propagate(x_train)
                    self.backward_propagate(learn_rate, h0, h1, y_out, x_train, y_train, l2_lambda)
                predictions = []
                for x_test, _ in zip(x_tests, y_tests):
                    _, _, y_out = self.forward_propagate(x_test)
                    if np.isnan(y_out).any():
                        raise ValueError("Nan in output\n" + str(y_out))
                    predictions.append(get_predictions(y_out)[0])
                accuracy = get_accuracy(np.array(predictions), y_tests)
                print(f"\rEpoch: {epoch} (Accuracy: {100 * accuracy :0.2f}%)", end="")
                if accuracy > 0.9:
                    print()
                    print("Reached Accuracy, Saving Model!")
                    return

    def get_prediction_as_json(self, x_in):
        _, _, out = self.forward_propagate(x_in)
        return {str(x): out[x] for x in range(10)}

    def test_input(self, index, x_test, y_test):
        x = x_test[index]
        x.shape += (1,)
        _, _, out = self.forward_propagate(x)
        pred = get_predictions(out)
        print(f"Prediction : {pred[0]}\nActual : {y_test[index]}")

    @staticmethod
    def save_model(obj, name='mnist_model.pickle'):
        with open(name, 'wb') as file:
            pk.dump(obj, file)
        print(f"Model {name} Saved!")

    @staticmethod
    def load_model(name='mnist_model.pickle'):
        with open(name, 'rb') as file:
            print(f"Model {name} loaded.")
            return pk.load(file)

In [105]:
# Set Train Data
train_data = pd.concat([pd.read_csv('./polluted_data/train.csv'),pd.read_csv('./data/train.csv')])
train_data = np.array(train_data)
m, n = train_data.shape
np.random.shuffle(train_data)
train_data_dev = train_data.T
Y_trains = train_data_dev[0]
X_trains = train_data_dev[1:n].T
X_trains = X_trains / 255

In [9]:
# Set Test Data
test_data = pd.read_csv('./data/mnist_test.csv')
test_data = pd.concat([test_data, pd.read_csv('./polluted_data/mnist_test.csv')])
test_data = np.array(test_data)
m1, n1 = test_data.shape
np.random.shuffle(test_data)
test_data_dev = test_data.T
Y_tests = test_data_dev[0]
X_tests = test_data_dev[1:n1].T
X_tests = X_tests / 255

array([9, 4, 3, ..., 0, 5, 7], dtype=int64)

In [89]:
# train model
# nn = NeuralNetwork()
interrupt_counter = 0

In [134]:
epoch_count = 10_000
learn_rate = 0.01
batch_size = 1000
l2_lambda = 0.0003

In [135]:
model_name = f"model_{interrupt_counter}.pickle"
if os.path.exists(model_name):
    nn = NeuralNetwork.load_model(model_name)
else:
    nn = NeuralNetwork()
    interrupt_counter = 0
try:
    print(f"{epoch_count = }, {learn_rate = }, {batch_size = }, {l2_lambda = }")
    np.random.shuffle(test_data)
    test_data_dev = test_data.T
    Y_tests = test_data_dev[0]
    X_tests = test_data_dev[1:n1].T
    X_tests = X_tests / 255
    nn.train(epoch_count,learn_rate,X_trains,Y_trains,X_tests,Y_tests, batch_size=batch_size, l2_lambda=l2_lambda)
except KeyboardInterrupt as e:
    interrupt_counter += 1
    print()
    print(f"Pausing training at {model_name}...")
except ValueError as e:
    interrupt_counter = 0
    print(e)

nn.save_model(nn, name=model_name)

epoch_count = 10000, learn_rate = 0.01, batch_size = 1000, l2_lambda = 0.0003
Epoch: 4 (Accuracy: 11.35%)

In [122]:
# x = np.array([[0.35924991, 0.81438234, 0.80672   , 0.39580896, 0.78656139,
#         0.66606848, 0.85239075, 0.11444438, 0.18347001, 0.2011318 ,
#         0.66177095, 0.22103212, 0.13258092, 0.34119548, 0.95474173,
#         0.49565299, 0.82043591, 0.37857788, 0.77740504, 0.83533551,
#         0.74015923, 0.92627585, 0.35261986, 0.61174406, 0.36646459,
#         0.50213432, 0.28470452, 0.30681534, 0.40271346, 0.97423887],
#        [0.34673833, 0.27358905, 0.66215268, 0.12169938, 0.22637134,
#         0.447563  , 0.13716957, 0.77000734, 0.51997415, 0.27636401,
#         0.91485431, 0.35950252, 0.50103088, 0.96180632, 0.7488849 ,
#         0.36720474, 0.40227126, 0.90528075, 0.99283279, 0.43255661,
#         0.78447159, 0.23401017, 0.41827488, 0.48127486, 0.71334866,
#         0.82665229, 0.26703485, 0.65300824, 0.674449  , 0.83493142],
#        [0.64296124, 0.55791865, 0.28570211, 0.2497653 , 0.17247606,
#         0.2508637 , 0.56635266, 0.5870433 , 0.25727518, 0.42454387,
#         0.37056349, 0.47786774, 0.15269189, 0.75659268, 0.74777632,
#         0.21840999, 0.17226923, 0.86008133, 0.92777535, 0.87996945,
#         0.49223198, 0.74574783, 0.11275197, 0.64012699, 0.60324638,
#         0.49957939, 0.33833081, 0.10022464, 0.1223997 , 0.94474106],
#        [0.28782778, 0.8586247 , 0.67979745, 0.47983673, 0.80073389,
#         0.41487942, 0.91120461, 0.1071562 , 0.19339933, 0.18872722,
#         0.31377922, 0.53279637, 0.32433066, 0.77386264, 0.43183812,
#         0.78540316, 0.7724684 , 0.68649393, 0.38240758, 0.56852594,
#         0.17341751, 0.61519227, 0.73899849, 0.62932334, 0.37831265,
#         0.87657365, 0.5382955 , 0.22471949, 0.77927106, 0.85580268],
#        [0.41790902, 0.68800199, 0.2669509 , 0.12418732, 0.8143539 ,
#         0.76599369, 0.84421721, 0.23028548, 0.50565261, 0.8492062 ,
#         0.41829619, 0.26910249, 0.94715171, 0.66293384, 0.992613  ,
#         0.89074848, 0.80518375, 0.34092644, 0.70241424, 0.94341493,
#         0.86827217, 0.18766418, 0.41627882, 0.83143696, 0.85002916,
#         0.44142204, 0.31687153, 0.98426207, 0.98248553, 0.58102327],
#        [0.32873827, 0.80819802, 0.67257025, 0.50483565, 0.77911132,
#         0.13847376, 0.18012337, 0.30008198, 0.88825398, 0.10909787,
#         0.93985247, 0.21575092, 0.50281134, 0.49573601, 0.96646676,
#         0.55136538, 0.21602155, 0.74050855, 0.12592641, 0.27018857,
#         0.60667243, 0.63980893, 0.99530861, 0.95431141, 0.85896458,
#         0.50474138, 0.24178954, 0.26622655, 0.44721483, 0.89099666],
#        [0.50221851, 0.76042918, 0.94718833, 0.98290949, 0.29698004,
#         0.26824745, 0.23925624, 0.47358281, 0.76139402, 0.751783  ,
#         0.89066232, 0.782753  , 0.29097717, 0.56314722, 0.59360812,
#         0.39211098, 0.42855781, 0.4914807 , 0.85065083, 0.34340315,
#         0.6637582 , 0.22887733, 0.54287622, 0.98181773, 0.22060316,
#         0.91034899, 0.29022654, 0.55880341, 0.6883957 , 0.73006939],
#        [0.33724554, 0.35947668, 0.9232329 , 0.7432118 , 0.10301281,
#         0.95361329, 0.84814175, 0.29075088, 0.98515912, 0.12099808,
#         0.52068204, 0.60629196, 0.97919631, 0.3570158 , 0.51651664,
#         0.98478887, 0.46088296, 0.54486058, 0.9748424 , 0.25831619,
#         0.79121022, 0.87223058, 0.72701879, 0.13040422, 0.34531434,
#         0.37821878, 0.93368346, 0.32132991, 0.67915276, 0.54057071],
#        [0.82955186, 0.89540854, 0.42798417, 0.20672079, 0.72437821,
#         0.44983437, 0.3808123 , 0.82048906, 0.68336787, 0.73404236,
#         0.85517952, 0.44092122, 0.75493651, 0.37389442, 0.66409759,
#         0.20279993, 0.62146772, 0.70360758, 0.30399791, 0.23320118,
#         0.4063471 , 0.98878951, 0.15617456, 0.19488063, 0.99586538,
#         0.73634718, 0.69586969, 0.11609058, 0.67228301, 0.83697677],
#        [0.54854701, 0.57275361, 0.73781451, 0.42024723, 0.6129595 ,
#         0.14034209, 0.1022073 , 0.95643268, 0.17798475, 0.64084498,
#         0.62024994, 0.12709384, 0.13029494, 0.32105629, 0.84584728,
#         0.18047398, 0.10228426, 0.34622783, 0.19931071, 0.49276264,
#         0.97129163, 0.70320194, 0.77150675, 0.57799489, 0.70297992,
#         0.14799186, 0.22504387, 0.26713752, 0.86837752, 0.35223651]])

# y = np.array([[ 794.34835741],
#        [ 814.37301254],
#        [ 989.69508841],
#        [ 747.87486727],
#        [ 982.79903332],
#        [ 924.53502133],
#        [ 998.66777509],
#        [ 863.18601651],
#        [1002.02453053],
#        [ 819.11167586],
#        [ 975.83001157],
#        [ 926.99422914],
#        [1038.5269067 ],
#        [ 913.16274961],
#        [ 912.37774107],
#        [ 813.47935851],
#        [ 952.33753563],
#        [ 855.15980942],
#        [ 842.43387158],
#        [ 828.11920047],
#        [ 860.63658796],
#        [ 839.45020888],
#        [ 985.36269363],
#        [ 958.98995882],
#        [ 841.61818052],
#        [ 884.38365779],
#        [ 906.53093421],
#        [ 963.42998207],
#        [ 793.94157442],
#        [ 933.32668517]])

# # np.exp(993)
# print(softmax(np.dot(x, y)))

[[0.]
 [0.]
 [0.]
 [0.]
 [1.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]]


In [None]:
import matplotlib.pyplot as plt
import cv2
import random

def rotate_image(image, angle):
    rows, cols = image.shape
    M = cv2.getRotationMatrix2D((cols/2,rows/2), angle, 1)
    rotated_image = cv2.warpAffine(image, M, (cols, rows))
    return rotated_image

def add_noise(image, noise_level):
    noise = np.random.normal(scale=noise_level, size=image.shape)
    noisy_image = image + noise
    noisy_image = np.clip(noisy_image, 0, 255).astype(np.uint8)
    return noisy_image

def scale_image(image, scale_factor, target_size=(28, 28)):
    scaled_image = cv2.resize(image, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_LINEAR)

    # Calculate dimensions of scaled image and padding needed
    rows, cols = scaled_image.shape
    pad_rows = max(0, (target_size[0] - rows) // 2)
    pad_cols = max(0, (target_size[1] - cols) // 2)

    # Create padded image with zeros
    padded_image = np.zeros(target_size, dtype=scaled_image.dtype)

    # Calculate coordinates for pasting scaled image
    paste_start_row = max(0, (rows - target_size[0]) // 2)
    paste_end_row = min(rows, paste_start_row + target_size[0])
    paste_start_col = max(0, (cols - target_size[1]) // 2)
    paste_end_col = min(cols, paste_start_col + target_size[1])

    # Paste scaled image onto padded image
    padded_image[pad_rows:pad_rows + rows, pad_cols:pad_cols + cols] = scaled_image[paste_start_row:paste_end_row, paste_start_col:paste_end_col]

    return padded_image

def modify_image(image, rotation, noise, scale):
    modified_image = rotate_image(image, rotation)
    modified_image = add_noise(modified_image, noise)
    modified_image = scale_image(modified_image, scale)
    return modified_image

In [None]:
dataset_to_pollute = os.listdir('./data')
print(*dataset_to_pollute)
new_dataset_path = "./polluted_data/{file_name}"
for dataset in dataset_to_pollute:
    original = pd.read_csv(f'./data/{dataset}')
    new_dataset = pd.DataFrame(columns=list(original.columns))
    print("starting on polluting", dataset, original.shape)
    for i in range(original.shape[0]):
        try:
            label, x_in = original.loc[i].values[0], original.loc[i].values[1:]
            x_in = x_in.reshape(28,28)
            rotation = random.randint(0,180)  # Rotation angle in degrees
            noise = random.randint(0,30)  # Noise level
            scale = random.random() + 0.5 # Scaling factor
            x_in = modify_image(np.array(x_in, dtype=np.uint8), rotation, noise, scale)
            x_in = x_in.flatten()
            new_row = [label] + x_in.tolist()
            new_dataset.loc[i] = new_row
        except KeyError as e:
            print(f"error in file {dataset} at row {i}")
            break
    print("done on polluting", dataset)
    new_dataset.to_csv(new_dataset_path.format(file_name=dataset), index=False)

mnist_test.csv mnist_train.csv small_train.csv train.csv
starting on polluting mnist_test.csv (10000, 785)
done on polluting mnist_test.csv
starting on polluting mnist_train.csv (60000, 785)
done on polluting mnist_train.csv
starting on polluting small_train.csv (2, 785)
done on polluting small_train.csv
starting on polluting train.csv (42000, 785)
done on polluting train.csv
