In [None]:
# Importing necessary libraries
import pandas as pd
import numpy as np
import seaborn as sns

In [None]:
# Load the Penguins dataset
df = sns.load_dataset('penguins')

In [None]:
# Drop rows with missing values for simplicity
df = df.dropna()

In [None]:
# Encode the target variable (species) as numerical
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
df['species_encoded'] = label_encoder.fit_transform(df['species'])

# Feature and target
X = df['sex']  # Example feature
y = df['species_encoded']  # Target variable

# Convert 'sex' to numerical values (Male: 1, Female: 0)
df['sex_encoded'] = df['sex'].map({'Male': 1, 'Female': 0})


## Gini Impurity

In [None]:
# Function to calculate Gini Impurity
def gini_impurity(labels):
    proportions = np.bincount(labels) / len(labels)
    return 1 - np.sum(proportions ** 2)

## Information Gain

In [None]:
# Function to calculate Information Gain
def information_gain(parent, left, right):
    # Calculate Entropy
    def entropy(labels):
        proportions = np.bincount(labels) / len(labels)
        return -np.sum([p * np.log2(p) for p in proportions if p > 0])
    
    parent_entropy = entropy(parent)
    n = len(parent)
    n_left, n_right = len(left), len(right)
    weighted_entropy = (n_left / n) * entropy(left) + (n_right / n) * entropy(right)
    return parent_entropy - weighted_entropy

### Use Case

In [None]:
# Perform a split based on 'sex_encoded'
left_split = df[df['sex_encoded'] == 0]['species_encoded']
right_split = df[df['sex_encoded'] == 1]['species_encoded']

GINI Impurities

In [None]:
# Calculate Gini Impurity for parent and splits
parent_gini = gini_impurity(df['species_encoded'])
left_gini = gini_impurity(left_split)
right_gini = gini_impurity(right_split)

In [None]:
# Print Gini Impurities
print("Parent Gini Impurity:", parent_gini)
print("Left Split Gini Impurity:", left_gini)
print("Right Split Gini Impurity:", right_gini)

Information Gain

In [None]:
# Calculate Information Gain for the split
parent = df['species_encoded']
info_gain = information_gain(parent, left_split, right_split)

In [None]:
# Print Information Gain
print("Information Gain for 'sex' split:", info_gain)

Parent Gini Impurity: 0.6383680978275572
Left Split Gini Impurity: 0.638236914600551
Right Split Gini Impurity: 0.6383928571428572
Information Gain for 'sex' split: 0.00010530129858543624
