# Implementación completa del problema de modelaje estadístico

1. Escriba una clase iterable que cree los datos y los entregue en lotes de tamaño k (por defecto 32). cada vez que se le soliciten los datos, debe entregar un lote de datos de forma aleatoria.

## Carga de librerías

In [1]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.optimizers import Adam

## Creación de datos

In [2]:
# Número de datos
N = 500

# Generar un conjunto de datos de muestra
np.random.seed(0)
x = np.random.randn(N, 2)
y = np.random.randint(0, 2, (N, 1))


In [3]:
#w = tf.Variable(np.random.randn(2, 1))
w = tf.Variable([[-2.0], [2.0]])
#b = tf.Variable([0.])
b = tf.Variable([0.])

## Visualización de datos

In [4]:
# Separar los puntos de datos en dos grupos basados en su clase
class_0 = x[y.ravel() == 0]
class_1 = x[y.ravel() == 1]
class_1 += 2.5

# Recodifica la matriz de datos
x[y.ravel() == 1] += 2.5

# convierte x en un tensor constante
x = tf.constant(x)
y = tf.constant(y)

# ajusta los tipos de datos para evitar problemas de cálculo
w = tf.cast(w, tf.float32)
b = tf.cast(b, tf.float32)
x = tf.cast(x, tf.float32)
y = tf.cast(y, tf.float32)


In [5]:
class DataIterator:
    def __init__(self, x, y, batch_size=32):
        self.x = x
        self.y = y
        self.batch_size = batch_size
        self.indices = np.arange(len(x))
        np.random.shuffle(self.indices)
        self.current_index = 0
    
    def __iter__(self):
        return self
    
    def __next__(self):
        if self.current_index >= len(self.x):
            raise StopIteration
        
        batch_indices = self.indices[self.current_index:self.current_index+self.batch_size]
        batch_x = self.x[batch_indices]
        batch_y = self.y[batch_indices]
        
        self.current_index += self.batch_size
        
        return batch_x, batch_y

In [6]:
class Trainer:
    def __init__(self, loss_fn, prediction_fn, data_generator):
        self.loss_fn = loss_fn
        self.prediction_fn = prediction_fn
        self.data_generator = data_generator
    
    def train_step(self, x, y):
        with tf.GradientTape() as tape:
            y_pred = self.prediction_fn(x)
            loss = self.loss_fn(y, y_pred)
        grads = tape.gradient(loss, [w, b])
        optimizer.apply_gradients(zip(grads, [w, b]))
        accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.round(y_pred), y), tf.float32))
        return loss, accuracy
    
    def fit(self, epochs):
        losses = []
        accuracies = []
        for epoch in range(epochs):
            avg_loss = 0.
            avg_accuracy = 0.
            for batch_x, batch_y in self.data_generator:
                loss, accuracy = self.train_step(batch_x, batch_y)
                avg_loss += loss / len(self.data_generator)
                avg_accuracy += accuracy / len(self.data_generator)
            losses.append(avg_loss)
            accuracies.append(avg_accuracy)
            print("Epoch:", epoch, "loss:", avg_loss.numpy(), "accuracy:", avg_accuracy.numpy())
        return losses, accuracies
    
    def get_parameters(self):
        return w.numpy(), b.numpy()
    
    def evaluate(self, x_test, y_test):
        y_pred = self.prediction_fn(x_test)
        loss = self.loss_fn(y_test, y_pred)
        accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.round(y_pred), y_test), tf.float32))
        print("Evaluation - loss:", loss.numpy(), "accuracy:", accuracy.numpy())
        return loss, accuracy
    
    def predict(self, x):
        return self.prediction_fn(x)
    
    def plot_metrics(self, losses, accuracies):
        plt.figure(figsize=(12, 4))
        plt.subplot(1, 2, 1)
        plt.plot(losses)
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.title('Training Loss')
        
        plt.subplot(1, 2, 2)
        plt.plot(accuracies)
        plt.xlabel('Epochs')
        plt.ylabel('Accuracy')
        plt.title('Training Accuracy')
        
        plt.tight_layout()
        plt.show()

In [9]:
#optimizer = Adam()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.1)

# función predictora
def pred(x):
    y_hat = 1 / (1 + tf.exp(-tf.add(tf.matmul(x,w),b)))
    return y_hat

# pérdida
def loss_fn(y,y_pred):
    loss = -tf.reduce_mean(y * tf.math.log(y_pred) + (1 - y) * tf.math.log(1 - y_pred))
    return loss

In [10]:
# Crear instancias de las clases DataGenerator y Trainer
data_generator = DataIterator(x, y, batch_size=32)
trainer = Trainer(loss_fn, pred, data_generator)

# Entrenar el modelo durante varias épocas
epochs = 10
losses, accuracies = trainer.fit(epochs)

# Obtener los parámetros del modelo entrenado
trained_w, trained_b = trainer.get_parameters()

# Evaluar el modelo en un conjunto de datos de prueba
x_test = np.random.randn(100, 2)
y_test = np.random.randint(0, 2, (100, 1))
loss, accuracy = trainer.evaluate(x_test, y_test)

# Realizar predicciones utilizando el modelo entrenado
x_pred = np.random.randn(10, 2)
predictions = trainer.predict(x_pred)

# Graficar las métricas de entrenamiento (pérdida y precisión)
trainer.plot_metrics(losses, accuracies)

TypeError: Only integers, slices (`:`), ellipsis (`...`), tf.newaxis (`None`) and scalar tf.int32/tf.int64 tensors are valid indices, got array([195,  38,  87, 268,  81, 379, 486, 178, 418, 493, 114,  90, 152,
       146, 405, 191, 453, 108, 480, 222, 465,  52,  59,  86, 351, 291,
        10,  14, 369,  88, 371, 122])