<a href="https://colab.research.google.com/github/aadi-kanwar/MLOps/blob/main/Sem_6_Exp_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Sample dataset creation
# Create a small dataset with numerical and categorical features
data = {
    'age': [25, 32, 47, 51, 62],
    'income': [50000, 64000, 120000, 85000, 98000],
    'gender': ['male', 'female', 'female', 'male', 'female'],
    'purchased': [0, 1, 1, 0, 1]  # Target variable
}
df = pd.DataFrame(data)

# Split features and target variable
X = df.drop(columns='purchased')
y = df['purchased']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Define preprocessing steps
# Numerical features: Standardization
# Categorical features: One-hot encoding
numerical_features = ['age', 'income']
categorical_features = ['gender']

numerical_transformer = StandardScaler()
categorical_transformer = OneHotEncoder()

# Combine transformations using ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ]
)

# Create a pipeline with preprocessing
pipeline = Pipeline([
    ('preprocessor', preprocessor)  # Preprocessing step
])

# Fit and transform the training data
X_train_transformed = pipeline.fit_transform(X_train)

# Transform the test data
X_test_transformed = pipeline.transform(X_test)

# Display results
print("Transformed Training Data:\n", X_train_transformed.toarray() if hasattr(X_train_transformed, 'toarray') else X_train_transformed)
print("\nTransformed Test Data:\n", X_test_transformed.toarray() if hasattr(X_test_transformed, 'toarray') else X_test_transformed)


Transformed Training Data:
 [[ 0.52489066  1.22474487  1.          0.        ]
 [-1.39970842 -1.22474487  0.          1.        ]
 [ 0.87481777  0.          0.          1.        ]]

Transformed Test Data:
 [[-0.78733599 -0.73484692  1.          0.        ]
 [ 1.83711731  0.45490524  1.          0.        ]]
