In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
#convertimos valores en un rango de 0,1
def sigmoid(z):

    return 1 / (1 + np.exp(-z))

In [4]:
#funcion de perdida para la regresion logistica
def cross_entropy_loss(y, y_pred):
    """
    Calcular la pérdida utilizando la función de entropía cruzada.
    """
    m = len(y)
    loss = - (1 / m) * np.sum(y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred))
    return loss


In [6]:
# Clase para regresión logística
class LogisticRegression:
    def __init__(self, input_size):
        self.input_size = input_size
        self.weights = np.zeros((input_size, 1))  # Inicializar pesos a cero
        self.bias = 0  # Inicializar sesgo a cero

    def forward(self, X):
        """
        Calcular la salida del modelo (hipótesis) para las entradas dadas.
        """
        z = np.dot(X, self.weights) + self.bias
        return sigmoid(z)

    def backward(self, X, y, y_pred, learning_rate):
        """
        Realizar retropropagación para actualizar los parámetros del modelo.
        """
        m = len(y)
        dw = np.dot(X.T, (y_pred - y)) / m  # Gradiente respecto a los pesos
        db = np.mean(y_pred - y)  # Gradiente respecto al sesgo

        # Actualizar parámetros
        self.weights -= learning_rate * dw
        self.bias -= learning_rate * db

    def train(self, X, y, num_epochs, learning_rate):
        """
        Entrenar el modelo utilizando descenso de gradiente estocástico.
        """
        for epoch in range(num_epochs):
            # Calcular la salida del modelo
            y_pred = self.forward(X)

            # Calcular la pérdida
            loss = cross_entropy_loss(y, y_pred)

            # Realizar retropropagación para actualizar parámetros
            self.backward(X, y, y_pred, learning_rate)

            # Imprimir la pérdida en cada época
            if epoch % 10 == 0:
                print(f'Epoch {epoch}, Loss: {loss}')

    def predict(self, X):
        """
        Realizar predicciones utilizando el modelo entrenado.
        """
        # Calcular las predicciones
        y_pred = self.forward(X)

        # Redondear las predicciones a 0 o 1
        y_pred = np.round(y_pred)

        return y_pred

In [7]:


# Cargar los datos
full_data = pd.read_csv('/content/drive/MyDrive/laboratorios_IA/primerParcial/primer ejercicio/train.csv')
test_df = pd.read_csv('/content/drive/MyDrive/laboratorios_IA/primerParcial/primer ejercicio/test.csv')


In [8]:
print(full_data.info())
#print(test_df.columns)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72901 entries, 0 to 72900
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   id               72901 non-null  int64  
 1   CustomerId       72901 non-null  int64  
 2   Surname          72901 non-null  object 
 3   CreditScore      72901 non-null  int64  
 4   Geography        72901 non-null  object 
 5   Gender           72901 non-null  object 
 6   Age              72901 non-null  float64
 7   Tenure           72901 non-null  int64  
 8   Balance          72901 non-null  float64
 9   NumOfProducts    72901 non-null  int64  
 10  HasCrCard        72901 non-null  float64
 11  IsActiveMember   72901 non-null  float64
 12  EstimatedSalary  72901 non-null  float64
 13  Exited           72901 non-null  int64  
dtypes: float64(5), int64(6), object(3)
memory usage: 7.8+ MB
None


In [9]:
# Seleccionar características y etiquetas
X_train_sample = full_data.drop(columns=['id', 'CustomerId', 'Surname', 'Exited'])
y_train_sample = full_data['Exited']

In [10]:
# Codificar variables categóricas utilizando One-Hot Encoding
X_train_encoded = pd.get_dummies(X_train_sample)

In [11]:
# Normalizar las características
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_encoded)

In [12]:
# Añadir una columna de unos para el sesgo
X_train_scaled_with_bias = np.hstack((np.ones((X_train_scaled.shape[0], 1)), X_train_scaled))


In [14]:
# Definir hiperparámetros
input_size = X_train_scaled.shape[1]  # Número de características
learning_rate = 0.01
num_epochs = 100

In [15]:
X_train = X_train_encoded.astype(float)

In [None]:
# Inicializar y entrenar el modelo
input_size = X_train_scaled_with_bias.shape[1]  # Obtener el tamaño de las características
print(X_train_scaled_with_bias.shape)  # Verificar la forma de los datos
model = LogisticRegression(input_size)
model.train(X_train_scaled_with_bias, y_train_sample, num_epochs, learning_rate)

(72901, 14)


In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression

# Cargar el conjunto de datos
df = pd.read_csv("/content/drive/MyDrive/laboratorios_IA/primerParcial/primer ejercicio/train.csv")

# Preprocesamiento de datos
label_encoder = LabelEncoder()
df['Geography'] = label_encoder.fit_transform(df['Geography'])
df['Gender'] = label_encoder.fit_transform(df['Gender'])

X = df.drop(columns=['id', 'CustomerId', 'Surname', 'Exited'])
y = df['Exited']

# Dividir el conjunto de datos en entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Entrenamiento de múltiples clasificadores binarios
classifiers = {}
for class_label in np.unique(y_train):
    y_train_class = (y_train == class_label).astype(int)
    classifier = LogisticRegression()
    classifier.fit(X_train, y_train_class)
    classifiers[class_label] = classifier

# Predicción
y_pred = np.zeros_like(y_test)
for class_label, classifier in classifiers.items():
    y_pred_class = classifier.predict(X_test)
    y_pred[y_pred_class == 1] = class_label

# Calcular precisión
accuracy = np.mean(y_pred == y_test)
print("Precisión del conjunto de prueba:", accuracy)


Precisión del conjunto de prueba: 0.7915094986626432


In [4]:
precision_porcentaje = accuracy * 100
print("Precisión del conjunto de prueba:", precision_porcentaje, "%")


Precisión del conjunto de prueba: 79.15094986626431 %
