# 🧠 Feature Engineering with Pandas and Scikit-learn
This notebook provides comprehensive examples of feature engineering techniques for machine learning workflows.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Create a synthetic dataset
data = {
    'age': [25, 30, 35, 40, np.nan, 50],
    'income': [50000, 60000, 70000, 80000, 90000, None],
    'gender': ['Male', 'Female', 'Female', 'Male', 'Other', None],
    'signup_date': ['2022-01-01', '2022-02-15', '2022-03-20', '2022-04-10', '2022-05-05', '2022-06-01'],
    'country': ['UK', 'US', 'UK', 'US', 'FR', 'DE']
}
df = pd.DataFrame(data)
df

## 🔍 Missing Value Handling

In [None]:
df['age'].fillna(df['age'].mean(), inplace=True)
df['income'].fillna(df['income'].median(), inplace=True)
df['gender'].fillna(df['gender'].mode()[0], inplace=True)
df

## 🧮 Categorical Encoding

In [None]:
df['gender_encoded'] = df['gender'].astype('category').cat.codes
df = pd.get_dummies(df, columns=['country'], prefix='country')
df

## 🗓 Date Feature Extraction

In [None]:
df['signup_date'] = pd.to_datetime(df['signup_date'])
df['signup_month'] = df['signup_date'].dt.month
df['signup_dayofweek'] = df['signup_date'].dt.dayofweek
df

## 📊 Binning

In [None]:
df['age_bin'] = pd.cut(df['age'], bins=[0, 30, 40, 60], labels=['Young', 'Mid-age', 'Senior'])
df

## ➕ Interaction Features

In [None]:
df['income_per_age'] = df['income'] / df['age']
df

## 🧮 Polynomial Features

In [None]:
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree=2, include_bias=False)
poly_features = poly.fit_transform(df[['age', 'income']])
poly_df = pd.DataFrame(poly_features, columns=poly.get_feature_names_out(['age', 'income']))
poly_df.head()

## 📈 Group-wise Aggregations

In [None]:
df['gender_group_mean_income'] = df.groupby('gender')['income'].transform('mean')
df

## 🔻 Dimensionality Reduction

In [None]:
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
reduced = pca.fit_transform(poly_features)
reduced_df = pd.DataFrame(reduced, columns=['PC1', 'PC2'])
reduced_df.head()