### Dependencias

In [1]:
import cupy as cp
import numpy as np
import pandas as pd

### Función de activación

In [2]:
def sigmoid(x):
    return 1 / (1 + cp.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

def softmax(z):#z: vector resultante antes de funcion de activación en la última capa
    exp_z = cp.exp(z - cp.max(z, axis=1, keepdims=True))  
    return exp_z / cp.sum(exp_z, axis=1, keepdims=True)#retorna las probabilidades de las posibles clases

#mide la diferencia entre distribucion de prob. creada por softmax y los targets reales
def cross_entropy_loss(predictions, targets):
    return -cp.sum(targets * cp.log(predictions + 1e-9)) / targets.shape[0]

#### Encoder que cambia el formato de labels tal que coincida con las predicciones

In [3]:
def one_hot(y, num_classes):
    if y.ndim > 1:  # Flatten the array if necessary
        y = y.flatten()
        
    one_hot_labels = cp.zeros((y.shape[0], num_classes))
    one_hot_labels[cp.arange(y.shape[0]), y] = 1
    return one_hot_labels

### Clase MLP

In [4]:

class MultiLayerNetwork:
    def __init__(self, layer_sizes):
        self.layer_sizes = layer_sizes
        self.weights = []
        self.biases = []

        # Initialize weights and biases for each layer
        for i in range(len(layer_sizes) - 1):
            weight = cp.random.randn(layer_sizes[i], layer_sizes[i + 1]) * cp.sqrt(2. / layer_sizes[i])
            bias = cp.zeros((1, layer_sizes[i + 1]))
            self.weights.append(weight)
            self.biases.append(bias)

    def forward(self, inputs):
        self.activations = [inputs]
        a = inputs

        for i in range(len(self.weights) - 1):
            z = cp.dot(a, self.weights[i]) + self.biases[i]
            a = sigmoid(z)
            self.activations.append(a)

        # Output uses softmax for multiclass classification
        z = cp.dot(a, self.weights[-1]) + self.biases[-1]
        a = softmax(z)
        self.activations.append(a)
        return a

    def predict(self, inputs):
        inputs = cp.array(inputs, ndmin=2)
        a = inputs

        for i in range(len(self.weights) - 1):
            z = cp.dot(a, self.weights[i]) + self.biases[i]
            a = sigmoid(z)
        
        # Output with softmax
        z = cp.dot(a, self.weights[-1]) + self.biases[-1]
        a = softmax(z)

        return a

    def backward(self, targets, learning_rate):
        m = targets.shape[0]  # number of training examples
        delta_weights = [0] * len(self.weights)
        delta_biases = [0] * len(self.biases)

        # Calculate the initial error (difference between prediction and target for the output layer)
        error = self.activations[-1] - targets

        for i in reversed(range(len(self.weights))):
            # Calculate the delta for the current layer
            delta = error
            delta_weights[i] = cp.dot(self.activations[i].T, delta) / m
            delta_biases[i] = cp.sum(delta, axis=0, keepdims=True) / m

            if i != 0:
                # Propagate the error to the previous layer
                error = cp.dot(delta, self.weights[i].T) * sigmoid_derivative(self.activations[i])

            # Update weights and biases
            self.weights[i] -= learning_rate * delta_weights[i]
            self.biases[i] -= learning_rate * delta_biases[i]

            
    def train(self, inputs, targets, epochs, learning_rate):
        targets = one_hot(targets, 10)

        errors = []

        for epoch in range(epochs):
            predictions = self.forward(inputs)
            error = cross_entropy_loss(predictions, targets)
            self.backward(targets, learning_rate)
            errors.append(error)
            
            if epoch % 10 == 0:
                print(f'Epoch {epoch}, Error: {error}')

        return errors

### Data Prep

In [14]:
#Obtener datos (estoy usando pd porque anda considerablemente más rápido que np)
train_data = pd.read_csv(r'train_data.csv')
train_data = train_data.to_numpy()
    
#Cortar en features y labales
train_samples = train_data.shape[0]
features = train_data[:train_samples, 1:-1]  # Features for training    
labels = train_data[:train_samples, -1]  #Labels for training

labels = labels.reshape(-1, 1)  # Reshape to (299, 1)

X_train = cp.array(features)
y_train = cp.array(labels).flatten()

print(train_data.shape)
print(features.shape)
print(labels.shape)

(51000, 3074)
(51000, 3072)
(51000, 1)


### Train

In [7]:
num_classes = 10
input_size = 3072
hidden_layers = [1024, 512]  # Tamaños de las capas ocultas
output_size = 10
layer_sizes = [input_size] + hidden_layers + [output_size]

model = MultiLayerNetwork(layer_sizes)
epochs = 300
learning_rate = 0.05

errors = model.train(X_train, y_train, epochs, learning_rate)
#Evaluar
predictions = model.predict(X_train)
accuracy = cp.mean(predictions.argmax(axis=1) == y_train)
print(f'Test Accuracy: {accuracy}')

Epoch 0, Error: 2.434616586756469
Epoch 10, Error: 2.2016131371311425
Epoch 20, Error: 2.147303683400798
Epoch 30, Error: 2.1059907710491026
Epoch 40, Error: 2.0716307448243194
Epoch 50, Error: 2.042959699512731
Epoch 60, Error: 2.0180059806836033
Epoch 70, Error: 1.9996770994787252
Epoch 80, Error: 1.9876027903696372
Epoch 90, Error: 1.9668331717220198
Epoch 100, Error: 1.9494130841504729
Epoch 110, Error: 1.9598391616711845
Epoch 120, Error: 1.924952577757793
Epoch 130, Error: 1.9148793024377004
Epoch 140, Error: 1.9235766829625383
Epoch 150, Error: 1.9021999998211392
Epoch 160, Error: 1.8969955182916938
Epoch 170, Error: 1.8845436886405007
Epoch 180, Error: 1.8870631390831094
Epoch 190, Error: 1.8956960514070356
Epoch 200, Error: 1.8682549251757685
Epoch 210, Error: 1.8677767123827986
Epoch 220, Error: 1.839600233228658
Epoch 230, Error: 1.8578504513418803
Epoch 240, Error: 1.841699166172649
Epoch 250, Error: 1.8250604872433152
Epoch 260, Error: 1.8407677740399162
Epoch 270, Error: 

## Búsqueda de parámetros

#### Grid Searh para ajustar complejitud de 1 capa oculta

In [13]:

epochs = 200
layer_sizes = [input_size] + hidden_layers + [output_size]
lr = 0.05
for i in range(0, 5):  # Cambia el 5 por el número de potencias de 10 que desees
    val = pow(10, i)
    hidden_layers = [val]  # Tamaños de las capas ocultas
    print(f"layer size: {val}")
    layer_sizes = [input_size] + hidden_layers + [output_size]
    print(layer_sizes)

    model = MultiLayerNetwork(layer_sizes)
    errors = model.train(X_train, y_train, epochs, lr)

    #Evaluar
    predictions = model.predict(X_train)
    accuracy = cp.mean(predictions.argmax(axis=1) == y_train)
    print(f'Test Accuracy: {accuracy}')
    print("")

layer size: 1
[3072, 1, 10]
Epoch 0, Error: 2.302621414426933
Epoch 10, Error: 2.3025529936149853
Epoch 20, Error: 2.302498165861214
Epoch 30, Error: 2.302448581355747
Epoch 40, Error: 2.3024037301658726
Epoch 50, Error: 2.3023631576199195
Epoch 60, Error: 2.302326454222536
Epoch 70, Error: 2.302293250202761
Epoch 80, Error: 2.3022632113558354
Epoch 90, Error: 2.302236035506209
Epoch 100, Error: 2.302211449387694
Epoch 110, Error: 2.302189205853871
Epoch 120, Error: 2.30216908136942
Epoch 130, Error: 2.3021508737480465
Epoch 140, Error: 2.3021344001100186
Epoch 150, Error: 2.302119495036605
Epoch 160, Error: 2.3021060089017698
Epoch 170, Error: 2.3020938063636947
Epoch 180, Error: 2.302082765000622
Epoch 190, Error: 2.3020727740770814
Test Accuracy: 0.1069423929098966

layer size: 10
[3072, 10, 10]
Epoch 0, Error: 2.579213115582387
Epoch 10, Error: 2.3516751832167127
Epoch 20, Error: 2.333705394466311
Epoch 30, Error: 2.322258709739571
Epoch 40, Error: 2.314787074289977
Epoch 50, Error

KeyboardInterrupt: 

### Grid Search para ajustar numero de capas ocultas

In [18]:

num_classes = 10
input_size = 3072
hidden_layer_size = 256
hidden_layers = []  # Tamaños de las capas ocultas
output_size = 10
layer_sizes = [input_size] + hidden_layers + [output_size]
epochs = 100
lr = 0.0001

for i in range(0, 10):  # Cambia el 5 por el número de potencias de 10 que desees
    hidden_layers.append(hidden_layer_size)  # Tamaños de las capas ocultas
    print(f"hidden layers: {hidden_layers}")
    layer_sizes = [input_size] + hidden_layers + [output_size]

    model = MultiLayerNetwork(layer_sizes)
    errors = model.train(X_train, y_train, epochs, lr)

    #Evaluar
    predictions = model.predict(X_train)
    accuracy = cp.mean(predictions.argmax(axis=1) == y_train)
    print(f'Test Accuracy: {accuracy}')
    print("")

hidden layers: [256]
Epoch 0, Error: 105.86025886841283
Test Accuracy: 0.09698996655518395

hidden layers: [256, 256]
Epoch 0, Error: 108.85456644116445
Test Accuracy: 0.14046822742474915

hidden layers: [256, 256, 256]
Epoch 0, Error: 105.7057006341719
Test Accuracy: 0.14046822742474915

hidden layers: [256, 256, 256, 256]
Epoch 0, Error: 110.30727016766471
Test Accuracy: 0.14046822742474915

hidden layers: [256, 256, 256, 256, 256]
Epoch 0, Error: 125.7849884262953
Test Accuracy: 0.14046822742474915

hidden layers: [256, 256, 256, 256, 256, 256]
Epoch 0, Error: 111.10845764177878
Test Accuracy: 0.14046822742474915

hidden layers: [256, 256, 256, 256, 256, 256, 256]
Epoch 0, Error: 106.9112295635407
Test Accuracy: 0.14046822742474915

hidden layers: [256, 256, 256, 256, 256, 256, 256, 256]
Epoch 0, Error: 107.25215446902249
Test Accuracy: 0.14046822742474915

hidden layers: [256, 256, 256, 256, 256, 256, 256, 256, 256]
Epoch 0, Error: 105.26643352242563
Test Accuracy: 0.14046822742474

### Modelo entrenado con parámetros encontrados

## Regularizaciones:

#### Dropout


In [8]:

class MultiLayerNetwork_Dropout:
    def __init__(self, layer_sizes, dropout_rate=0.5):
        self.layer_sizes = layer_sizes
        self.dropout_rate = dropout_rate
        self.weights = []
        self.biases = []
        self.dropout_masks = []

        # Initialize weights and biases for each layer
        for i in range(len(layer_sizes) - 1):
            weight = cp.random.randn(layer_sizes[i], layer_sizes[i + 1]) * cp.sqrt(2. / layer_sizes[i])
            bias = cp.zeros((1, layer_sizes[i + 1]))
            self.weights.append(weight)
            self.biases.append(bias)
            # Initialize dropout mask
            self.dropout_masks.append(None)

    def forward(self, inputs, training=True):
        self.activations = [inputs]
        a = inputs

        for i in range(len(self.weights) - 1):
            z = cp.dot(a, self.weights[i]) + self.biases[i]
            a = sigmoid(z)

            if training:
                # Apply dropout
                dropout_mask = (cp.random.rand(*a.shape) > self.dropout_rate) / (1.0 - self.dropout_rate)
                a *= dropout_mask
                self.dropout_masks[i] = dropout_mask

            self.activations.append(a)

        # Output uses softmax for multiclass classification
        z = cp.dot(a, self.weights[-1]) + self.biases[-1]
        a = softmax(z)
        self.activations.append(a)
        return a

    def predict(self, inputs):
        inputs = cp.array(inputs, ndmin=2)
        return self.forward(inputs, training=False)

    def backward(self, targets, learning_rate):
        m = targets.shape[0]  # number of training examples
        delta_weights = [0] * len(self.weights)
        delta_biases = [0] * len(self.biases)

        # Calculate the initial error (difference between prediction and target for the output layer)
        error = self.activations[-1] - targets

        for i in reversed(range(len(self.weights))):
            # Calculate the delta for the current layer
            delta = error
            delta_weights[i] = cp.dot(self.activations[i].T, delta) / m
            delta_biases[i] = cp.sum(delta, axis=0, keepdims=True) / m

            if i != 0:
                # Propagate the error to the previous layer
                error = cp.dot(delta, self.weights[i].T) * sigmoid_derivative(self.activations[i])
                if self.dropout_masks[i-1] is not None:
                    error *= self.dropout_masks[i-1]

            # Update weights and biases
            self.weights[i] -= learning_rate * delta_weights[i]
            self.biases[i] -= learning_rate * delta_biases[i]

    def train(self, inputs, targets, epochs, learning_rate):
        targets = one_hot(targets, 10)

        errors = []

        for epoch in range(epochs):
            predictions = self.forward(inputs, training=True)
            error = cross_entropy_loss(predictions, targets)
            self.backward(targets, learning_rate)
            errors.append(error)

            if epoch % 10 == 0:
                print(f'Epoch {epoch}, Error: {error}')

        return errors

In [9]:
num_classes = 10
input_size = 3072
hidden_layers = [1024, 512]  # Tamaños de las capas ocultas
output_size = 10
layer_sizes = [input_size] + hidden_layers + [output_size]
epochs = 300
learning_rate = 0.05
dropout_rate = 0.2


model = MultiLayerNetwork_Dropout(layer_sizes, dropout_rate)
errors = model.train(X_train, y_train, epochs, learning_rate)
#Evaluar
predictions = model.predict(X_train)
accuracy = cp.mean(predictions.argmax(axis=1) == y_train)
print(f'Test Accuracy: {accuracy}')
print("")

Epoch 0, Error: 2.6304590288090957
Epoch 10, Error: 2.416289035108642
Epoch 20, Error: 2.435132986179687
Epoch 30, Error: 2.4479345915304282
Epoch 40, Error: 2.451278092300832
Epoch 50, Error: 2.44242108255252
Epoch 60, Error: 2.432301446343906
Epoch 70, Error: 2.4304058688526666
Epoch 80, Error: 2.422964107740824
Epoch 90, Error: 2.4141150109396317
Epoch 100, Error: 2.4118779038003746
Epoch 110, Error: 2.401839477399119
Epoch 120, Error: 2.3984448654187545
Epoch 130, Error: 2.394719815923066
Epoch 140, Error: 2.387000660320394
Epoch 150, Error: 2.3858565064480373
Epoch 160, Error: 2.3812473908587264
Epoch 170, Error: 2.3785743566890107
Epoch 180, Error: 2.378455048475347
Epoch 190, Error: 2.378545869353678
Epoch 200, Error: 2.374787308657348
Epoch 210, Error: 2.3716144230227494
Epoch 220, Error: 2.3708065203260587
Epoch 230, Error: 2.3644337455754894
Epoch 240, Error: 2.3631216344800503
Epoch 250, Error: 2.36319663858445
Epoch 260, Error: 2.3599197629709803
Epoch 270, Error: 2.3570970

### Feature engineering

#### Porcentaje para cada canal RGB

In [10]:
red_pixels = []
green_pixels = []
blue_pixels = []
train_data = train_data[:, 1:-1]

# Iterate through each row in the original matrix
for row in train_data:
    pr = []
    pg = []
    pb = []
    for i in range(len(row)):
        if i % 3 == 0:
            pr.append(row[i])
        
        if i % 3 == 1:
            pg.append(row[i])

        if i % 3 == 2:
            pb.append(row[i])

    red_pixels.append(pr)
    green_pixels.append(pg)
    blue_pixels.append(pb)


#### Más datos

In [11]:
#intensidad media por conal
mean_red = np.mean(red_pixels, axis=1)
mean_green = np.mean(green_pixels, axis=1)
mean_blue = np.mean(blue_pixels, axis=1)

#varianza
var_red = np.var(red_pixels, axis=1)
var_green = np.var(green_pixels, axis=1)
var_blue = np.var(blue_pixels, axis=1)

#desviación estándar
std_red = np.std(red_pixels, axis=1)
std_green = np.std(green_pixels, axis=1)
std_blue = np.std(blue_pixels, axis=1)

#contraste
contrast_red = np.ptp(red_pixels, axis=1) # ptp: peak to peak (max - min)
contrast_green = np.ptp(green_pixels, axis=1)
contrast_blue = np.ptp(blue_pixels, axis=1)

channel_data = np.column_stack((
                                mean_red, mean_green, mean_blue,
                                var_red, var_green, var_blue,
                                std_red, std_green, std_blue,
                                contrast_red, contrast_green, contrast_blue
                                ))

print(channel_data.shape)



(51000, 12)


In [None]:
#Cortar en features y labales
channel_data_train_samples = channel_data.shape[0]
features = channel_data  # Features for training

features = cp.array(features)

print(channel_data.shape)
print(features.shape )
print(labels.shape)

X_train = cp.append(X_train, cp.array(features), axis=1)

### Train modelo con dataset de parámetros por imagen

In [16]:
num_classes = 10
input_size = 3084
hidden_layers = [1024, 512]  # Tamaños de las capas ocultas
output_size = 10
layer_sizes = [input_size] + hidden_layers + [output_size]
epochs = 1000
learning_rate = 0.1
dropout_rate = 0.2

model = MultiLayerNetwork(layer_sizes)
errors = model.train(X_train, y_train, epochs, learning_rate)
#Evaluar
predictions = model.predict(X_train)

accuracy = cp.mean(predictions.argmax(axis=1) == y_train)
print(f'Test Accuracy: {accuracy}')

Epoch 0, Error: 2.5113275348122652
Epoch 10, Error: 2.2445904367446388
Epoch 20, Error: 2.238371108577897
Epoch 30, Error: 2.186311058398496
Epoch 40, Error: 2.1144387106637783
Epoch 50, Error: 2.079136299688558
Epoch 60, Error: 2.046947599714221
Epoch 70, Error: 2.0257240532966465
Epoch 80, Error: 2.018456819466521
Epoch 90, Error: 1.9744938873579285
Epoch 100, Error: 1.9919819534151637
Epoch 110, Error: 1.990961507554027
Epoch 120, Error: 1.9689962318405776
Epoch 130, Error: 1.973475511774524
Epoch 140, Error: 1.9230950615772477
Epoch 150, Error: 1.9318461595505794
Epoch 160, Error: 1.8970906832134782
Epoch 170, Error: 1.9211776316373792
Epoch 180, Error: 1.861067385326333
Epoch 190, Error: 1.8655231958553895
Epoch 200, Error: 1.9612651063469637
Epoch 210, Error: 1.8733193397794161
Epoch 220, Error: 1.881340530620989
Epoch 230, Error: 1.8517431658204806
Epoch 240, Error: 1.880459759175634
Epoch 250, Error: 1.8315925189345055
Epoch 260, Error: 1.8183835988981012
Epoch 270, Error: 1.82

## UTIL


### Data prep pero cortando 20% para test

In [None]:
#Obtener datos (estoy usando pd porque anda considerablemente más rápido que np)
train_data = pd.read_csv(r'C:/Users/kueru/Documents/VSCode/semestre_9/Deep_Learning/T2/train_data_2.csv')
train_data = train_data.to_numpy()

#Cortar en features y labales
train_samples = train_data.shape[0]
features = train_data[:train_samples, :-1]  # Features for training
labels = train_data[:train_samples, -1]   # Labels for training

features = cp.array(features)
labels = cp.array(labels, ndmin=2)
labels = labels.reshape(-1, 1)  # Reshape to (299, 1)

print(train_data.shape)
print(features.shape )
print(labels.shape)


indices = cp.arange(train_samples)

#cortar 20% test 
test_size = 0.2
test_samples = int(test_size * train_samples)
train_indices, test_indices = indices[test_samples:], indices[:test_samples]

X_train, X_test = features[train_indices], features[test_indices]
y_train, y_test = labels[train_indices], labels[test_indices]

train_indices = cp.array(train_indices)
test_indices = cp.array(test_indices)
X_train = cp.array(X_train)
X_test = cp.array(X_test)
y_train = cp.array(y_train)
y_test = cp.array(y_test)

In [28]:
#Obtener datos (estoy usando pd porque anda considerablemente más rápido que np)
train_data = pd.read_csv(r'C:/Users/kueru/Documents/VSCode/semestre_9/Deep_Learning/T2/train_data.csv')
train_data = train_data.to_numpy()
    
#Cortar en features y labales
train_samples = train_data.shape[0]
features = train_data[:train_samples, 1:-1]  # Features for training
labels = train_data[:train_samples, -1]   # Labels for training

# Reduce pixel values
features = features / 255.0 
# flatten the label values
labels = labels.flatten()

features = cp.array(features)
labels = cp.array(labels, ndmin=2)
labels = labels.reshape(-1, 1)  # Reshape to (299, 1)

print(train_data.shape)
print(features.shape )
print(labels.shape)


indices = cp.arange(train_samples)


X_train = cp.array(features)

num_classes = 10
input_size = 3072
hidden_layers = [100, 100]  # Tamaños de las capas ocultas
output_size = 10
layer_sizes = [input_size] + hidden_layers + [output_size]

model = MultiLayerNetwork(layer_sizes)
epochs = 1
learning_rate = 0.001

errors = model.train(X_train, y_train, epochs, learning_rate, 2.5)
print(errors)
#Evaluar
predictions = model.predict(X_train)
accuracy = cp.mean(predictions.argmax(axis=1) == y_train)
print(f'Test Accuracy: {accuracy}')

KeyboardInterrupt: 