In [19]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [20]:
# Load the penguins dataset
penguins = sns.load_dataset('penguins')
# Drop rows with missing values
penguins.dropna(inplace=True)
# Features
X = penguins[['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g']]
# Target: 1 if Adelie, 0 otherwise
y = (penguins['species'] == 'Adelie').astype(int)

In [21]:
scaler = StandardScaler()
X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

In [22]:
model = LogisticRegression()
model.fit(X, y)

In [23]:
intercept = model.intercept_[0]
coefficients = model.coef_[0]

In [24]:
# Print model coefficients
print("Model Coefficients:")
for feature, coef in zip(X.columns, model.coef_[0]):
    print(f"{feature}: {coef:.4f}")
print(f"Intercept: {model.intercept_[0]:.4f}\n")

Model Coefficients:
bill_length_mm: -4.1752
bill_depth_mm: 2.1214
flipper_length_mm: -0.5548
body_mass_g: 0.6047
Intercept: -1.0475



In [25]:
def manual_prob(row):
    linear_sum = intercept + np.dot(coefficients, row)
    return 1 / (1 + np.exp(-linear_sum))

In [26]:
y_prob_manual = X.apply(manual_prob, axis=1)

In [27]:
 model.predict_proba(X)

array([[8.18135332e-03, 9.91818647e-01],
       [5.07552452e-02, 9.49244755e-01],
       [1.00303416e-01, 8.99696584e-01],
       [1.38764304e-03, 9.98612357e-01],
       [1.90252985e-03, 9.98097470e-01],
       [2.01225409e-02, 9.79877459e-01],
       [2.92102633e-03, 9.97078974e-01],
       [1.64054313e-01, 8.35945687e-01],
       [5.42701549e-04, 9.99457298e-01],
       [2.38729902e-05, 9.99976127e-01],
       [3.90321932e-03, 9.96096781e-01],
       [9.50332863e-03, 9.90496671e-01],
       [1.35792655e-02, 9.86420734e-01],
       [4.85834014e-04, 9.99514166e-01],
       [8.58182830e-02, 9.14181717e-01],
       [4.61230369e-03, 9.95387696e-01],
       [3.03355776e-03, 9.96966442e-01],
       [5.50370217e-04, 9.99449630e-01],
       [7.92055237e-03, 9.92079448e-01],
       [2.97141528e-02, 9.70285847e-01],
       [4.44214835e-04, 9.99555785e-01],
       [3.51224684e-02, 9.64877532e-01],
       [9.86256598e-02, 9.01374340e-01],
       [4.01826178e-03, 9.95981738e-01],
       [1.578078

In [28]:
# Create results DataFrame
results = X.copy()
results['Actual'] = y.values
results['Predicted'] = model.predict(X)
results['Probability_Adelie'] = model.predict_proba(X)[:,1]
results['Probability_Adelie_Manual'] = y_prob_manual

# Show first few rows
print(results.head(10))

   bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  Actual  \
0       -0.896042       0.780732          -1.426752    -0.568475       1   
1       -0.822788       0.119584          -1.069474    -0.506286       1   
2       -0.676280       0.424729          -0.426373    -1.190361       1   
3       -1.335566       1.085877          -0.569284    -0.941606       1   
4       -0.859415       1.747026          -0.783651    -0.692852       1   
5       -0.932669       0.323014          -1.426752    -0.723946       1   
6       -0.877728       1.238450          -0.426373     0.582014       1   
7       -0.529772       0.221299          -1.355296    -1.252549       1   
8       -0.987609       2.052171          -0.712196    -0.506286       1   
9       -1.720150       2.001313          -0.212006     0.239977       1   

   Predicted  Probability_Adelie  Probability_Adelie_Manual  
0          1            0.991819                   0.991819  
1          1            0.949245       

In [29]:
model.score(X, y)

0.984984984984985