In [20]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_iris  # placeholder (we'll use penguins below)
import tensorflow as tf
import seaborn as sns

# -------------------------
# Load penguins dataset
# -------------------------
penguins = sns.load_dataset("penguins").dropna()

# One-vs-rest classification for Adelie
penguins["is_adelie"] = (penguins["species"] == "Adelie").astype(int)

# Features and target
X = penguins[["bill_length_mm", "bill_depth_mm", "flipper_length_mm", "body_mass_g"]]
y = penguins["is_adelie"]

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# -------------------------
# Scale data
# -------------------------
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# -------------------------
# Logistic Regression baseline
# -------------------------
lr = LogisticRegression(penalty='none', solver='lbfgs', max_iter=1000)
lr.fit(X_train_scaled, y_train)
y_pred_lr = lr.predict(X_test_scaled)
y_prob_lr = lr.predict_proba(X_test_scaled)[:, 1]

print("=== Scikit-learn Logistic Regression ===")
for feature, coef in zip(X.columns, lr.coef_[0]):
    print(f"{feature}: {coef:.6f}")
print(f"Intercept: {lr.intercept_[0]:.6f}")
print(f"Training accuracy: {accuracy_score(y_train, lr.predict(X_train_scaled)):.4f}")
print(f"Test accuracy: {accuracy_score(y_test, y_pred_lr):.4f}\n")

# -------------------------
# TensorFlow Neural Network
# -------------------------
tf.random.set_seed(42)

# Single dense layer = logistic regression
tf_model = tf.keras.Sequential([
    tf.keras.Input(shape=(X_train_scaled.shape[1],)),
    tf.keras.layers.Dense(1, activation='sigmoid', name="output")
])

optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
tf_model.compile(optimizer=optimizer, loss='binary_crossentropy')
tf_model.fit(X_train_scaled, y_train, epochs=2000, verbose=0)


history = tf_model.fit(X_train_scaled, y_train, epochs=100, verbose=0)

# Evaluate
train_loss, train_acc = tf_model.evaluate(X_train_scaled, y_train, verbose=0)
test_loss, test_acc = tf_model.evaluate(X_test_scaled, y_test, verbose=0)

# Get learned weights
weights, bias = tf_model.get_layer("output").get_weights()
weights = weights.flatten()

print("=== TensorFlow Neural Network (1-layer sigmoid) ===")
for feature, w in zip(X.columns, weights):
    print(f"{feature}: {w:.6f}")
print(f"Bias (intercept): {bias[0]:.6f}")
print(f"Training accuracy: {train_acc:.4f}")
print(f"Test accuracy: {test_acc:.4f}")

# -------------------------
# Compare predictions
# -------------------------
y_prob_tf = tf_model.predict(X_test_scaled).flatten()

comparison = pd.DataFrame({
    "Prob_sklearn": y_prob_lr,
    "Prob_TF": y_prob_tf
})
comparison["Diff"] = np.abs(comparison["Prob_sklearn"] - comparison["Prob_TF"])
print("\nMax difference in predicted probabilities:", comparison["Diff"].max())
print("\nFirst 10 probability comparisons:")
print(comparison.head(10))


InvalidParameterError: The 'penalty' parameter of LogisticRegression must be a str among {'l1', 'l2', 'elasticnet'} or None. Got 'none' instead.

In [15]:
logits_lr = np.dot(X_test, lr_model.coef_[0]) + lr_model.intercept_[0]
logits_tf = np.dot(X_test, weights.flatten()) + bias[0]

print(np.corrcoef(logits_lr, logits_tf)[0,1])

-0.6250888748134507
