    Task: Complete Pipeline for a Dataset
1. Objective: Build a complex pipeline with multiple transformations.
2. Steps:
    - Load a sample dataset.
    - Define a transformation pipeline with both imputation and scaling.

In [1]:
# Write your code from here
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split

# Load a sample dataset (for demonstration purposes, a simple dataset with missing values)
data = {
    'Age': [25, 30, 35, None, 40, None, 45],
    'Salary': [50000, 60000, 70000, 80000, None, 90000, 100000],
    'Name': ['John', 'Alice', 'Bob', 'Charlie', 'David', 'Eve', 'Frank']
}
df = pd.DataFrame(data)

# Define the numeric columns (Age and Salary) and the non-numeric ones (Name)
numeric_columns = ['Age', 'Salary']
non_numeric_columns = ['Name']

# Define the pipeline for the numerical columns
numeric_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),  # Impute missing values with mean
    ('scaler', StandardScaler())  # Standardize numerical columns
])

# Define the column transformer to apply the pipeline only to numeric columns
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_pipeline, numeric_columns)
    ], 
    remainder='passthrough'  # Keep non-numeric columns unchanged
)

# Apply the transformations using the ColumnTransformer
df_transformed = preprocessor.fit_transform(df)

# Convert the result back into a DataFrame, maintaining the column names
df_transformed_df = pd.DataFrame(df_transformed, columns=numeric_columns + non_numeric_columns)

# Output the original and transformed data
print("Original Data:")
print(df)
print("\nTransformed Data:")
print(df_transformed_df)


Original Data:
    Age    Salary     Name
0  25.0   50000.0     John
1  30.0   60000.0    Alice
2  35.0   70000.0      Bob
3   NaN   80000.0  Charlie
4  40.0       NaN    David
5   NaN   90000.0      Eve
6  45.0  100000.0    Frank

Transformed Data:
       Age    Salary     Name
0 -1.67332 -1.581139     John
1 -0.83666 -0.948683    Alice
2      0.0 -0.316228      Bob
3      0.0  0.316228  Charlie
4  0.83666       0.0    David
5      0.0  0.948683      Eve
6  1.67332  1.581139    Frank


In [None]:
# Task: Imputation Function








# Scaling Function









# Combined Transformation Function







