In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

# Load the Palmer Penguins dataset
penguins = pd.read_csv('penguins.csv')

# Recode the 'sex' column to binary labels 0 and 1
penguins['Sex'] = penguins['Sex'].replace(to_replace=['MALE', 'FEMALE'], value=[0, 1])

# Split the dataset into training and testing sets
X = penguins.iloc[:, [0, 1, 2]].to_numpy()  # select the first 3 columns as features
y = penguins.iloc[:, 3].to_numpy()         # select the 'sex' column as the target variable
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Define the sigmoid function
def sigmoid(x):
    return 1.0 / (1 + np.exp(-x))

# Define the gradient ascent algorithm
def grad_ascent(data, labels, alpha=0.001, max_iter=500):
    data_matrix = np.mat(data)
    label_matrix = np.mat(labels).transpose()
    m, n = np.shape(data_matrix)
    weights = np.ones((n, 1))
    for i in range(max_iter):
        h = sigmoid(data_matrix * weights)
        error = label_matrix - h
        weights = weights + alpha * data_matrix.transpose() * error
    return weights

# Define the predict function
def predict(X, weights):
    return np.where(sigmoid(X @ weights) > 0.5, 1, 0)

# Normalize the training set using min-max normalization
def normalize(X):
    X_norm = X.copy()
    num_features = X.shape[1]
    for i in range(num_features):
        min_val = np.min(X[:, i])
        max_val = np.max(X[:, i])
        X_norm[:, i] = (X[:, i] - min_val) / (max_val - min_val)
    return X_norm

# Normalize the training and testing sets using min-max normalization
X_train_norm, X_train_ranges, X_train_minvals = autoNorm(X_train)
X_test_norm = (X_test - X_train_minvals) / X_train_ranges

# Train the logistic regression model using gradient ascent
X_train_norm = np.hstack((np.ones((X_train_norm.shape[0], 1)), X_train_norm))  # add a column of 1s for the bias term
weights = grad_ascent(X_train_norm, y_train)

# Evaluate the model's performance on the testing set
X_test_norm = np.hstack((np.ones((X_test_norm.shape[0], 1)), X_test_norm))  # add a column of 1s for the bias term
y_pred = predict(X_test_norm, weights)
conf_mat = confusion_matrix(y_test, y_pred)
print(conf_mat)

# Plot the decision boundary
plt.scatter(X_train_norm[:, 1], X_train_norm[:, 2], c=y_train)
x1 = np.min(X_train_norm[:, 1]) - 0.1
x2 = np.max(X_train_norm[:, 1]) + 0.1
y1 = (-weights[0] - weights[1] * x1) / weights[2]


NameError: name 'autoNorm' is not defined