## Custom Transformers

A custom transformer CustomScaler is defined for z-score scaling (standardizing) of data. Since scaling doesn’t require learning any parameters from the data, fit method just returns self. The transform method applies the z-score scaling formula.

In [1]:
from sklearn.base import BaseEstimator, TransformerMixin
import numpy as np

class CustomScaler(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        return self  # nothing to fit
    
    def transform(self, X, y=None):
        return (X - np.mean(X)) / np.std(X)  # simple z-score scaling

Similarly, `OutlierRemoval` is a transformer that removes outliers based on the Interquartile Range (IQR) and `FeatureInteraction` creates interaction terms between features. These are examples of custom transformations that might be useful in pre-processing steps of a machine learning pipeline.

In [2]:
from sklearn.base import BaseEstimator, TransformerMixin
import numpy as np

class OutlierRemoval(BaseEstimator, TransformerMixin):
    def __init__(self, factor=1.5):
        self.factor = factor
        
    def fit(self, X, y=None):
        self.median = np.median(X, axis=0)
        self.iqr = np.percentile(X, 75, axis=0) - np.percentile(X, 25, axis=0)
        return self
    
    def transform(self, X, y=None):
        lower_bound = self.median - self.factor * self.iqr
        upper_bound = self.median + self.factor * self.iqr
        return X[((X >= lower_bound) & (X <= upper_bound)).all(axis=1)]


## Custom Classifiers

Here two custom classifiers are defined - `MajorityClassClassifier`, which always predicts the majority class from the training data, and `ThresholdClassifier`, which makes predictions based on a threshold on a particular feature.

In [3]:
from sklearn.base import BaseEstimator, ClassifierMixin
from scipy.stats import mode

class MajorityClassClassifier(BaseEstimator, ClassifierMixin):
    def fit(self, X, y):
        self.majority_class = mode(y)[0][0]
        return self
    
    def predict(self, X):
        return [self.majority_class] * len(X)

In [4]:
class ThresholdClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, threshold=0.5, feature_index=0):
        self.threshold = threshold
        self.feature_index = feature_index
    
    def fit(self, X, y):
        return self  # nothing to fit
    
    def predict(self, X):
        return (X[:, self.feature_index] > self.threshold).astype(int)

In [5]:
import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
from sklearn.utils.multiclass import unique_labels

class CustomRandomClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, p=0.5):
        self.p = p

    def fit(self, X, y):
        # Check that X and y have correct shape
        X, y = check_X_y(X, y)
        # Store the classes seen during fit
        self.classes_ = unique_labels(y)

        return self

    def predict(self, X):
        # Check is fit had been called
        check_is_fitted(self)
        # Input validation
        X = check_array(X)
        
        # Generate random predictions
        random_preds = np.random.choice([0, 1], size=len(X), p=[1-self.p, self.p])
        return random_preds

Additional classifiers like CustomRandomClassifier which randomly predicts classes, HybridClassifier which averages predictions from a Random Forest and an XGBoost model, and SklearnTFHybrid which averages predictions from a Random Forest and a simple Pytorch neural network are also defined.

In [6]:
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb

class HybridClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, sklearn_classifier=RandomForestClassifier(), xgb_classifier=xgb.XGBClassifier()):
        self.sklearn_classifier = sklearn_classifier
        self.xgb_classifier = xgb_classifier
        
    def fit(self, X, y):
        self.sklearn_classifier.fit(X, y)
        self.xgb_classifier.fit(X, y)
        return self
    
    def predict(self, X):
        sklearn_preds = self.sklearn_classifier.predict(X)
        xgb_preds = self.xgb_classifier.predict(X)
        final_preds = (sklearn_preds + xgb_preds) / 2  # Averaging predictions
        return final_preds.round().astype(int)  # Round to nearest integer to get class labels


In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.ensemble import RandomForestClassifier
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.preprocessing import StandardScaler

class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(4, 64)  # Assuming 4 features as input
        self.fc2 = nn.Linear(64, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        return x

class SklearnTorchHybrid(BaseEstimator, ClassifierMixin):
    def __init__(self, sklearn_classifier=RandomForestClassifier()):
        self.sklearn_classifier = sklearn_classifier
        self.torch_model = SimpleNN()
        self.criterion = nn.BCELoss()
        self.optimizer = optim.Adam(self.torch_model.parameters(), lr=0.001)
        
    def fit(self, X, y):
        self.sklearn_classifier.fit(X, y)
        scaler = StandardScaler().fit(X)
        X_scaled = scaler.transform(X)
        X_tensor = torch.tensor(X_scaled, dtype=torch.float32)
        y_tensor = torch.tensor(y, dtype=torch.float32).view(-1, 1)
        
        for epoch in range(10):  # Assuming 10 epochs
            self.optimizer.zero_grad()
            outputs = self.torch_model(X_tensor)
            loss = self.criterion(outputs, y_tensor)
            loss.backward()
            self.optimizer.step()
            
        return self
    
    def predict(self, X):
        sklearn_preds = self.sklearn_classifier.predict(X)
        scaler = StandardScaler().fit(X)
        X_scaled = scaler.transform(X)
        X_tensor = torch.tensor(X_scaled, dtype=torch.float32)
        with torch.no_grad():
            torch_preds = (self.torch_model(X_tensor) > 0.5).numpy().astype(int).flatten()
        final_preds = (sklearn_preds + torch_preds) / 2  # Averaging predictions
        return final_preds.round().astype(int)  # Round to nearest integer to get class labels

## Custom Pipeline

A scikit-learn pipeline is created combining the CustomScaler transformer and HybridClassifier. This pipeline is then fitted on the training data and evaluated on the test data, showcasing how custom transformers and classifiers can be integrated into scikit-learn pipelines for streamlined model training and evaluation.

In [8]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# Load data
iris = load_iris()
X, y = iris.data, iris.target

# For simplicity, convert to a binary classification problem
y_binary = (y == 0).astype(int)  # 1 if setosa, 0 otherwise

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y_binary, test_size=0.2, random_state=42)

In [9]:
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report
import xgboost as xgb
from sklearn.ensemble import RandomForestClassifier

# Assume CustomScaler, ThresholdClassifier, and HybridClassifier are defined as before

pipeline = Pipeline([
    ('scaler', CustomScaler()),
    ('classifier', HybridClassifier(
        sklearn_classifier=RandomForestClassifier(random_state=42),
        xgb_classifier=xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
    ))
])

# Fit and evaluate
pipeline.fit(X_train, y_train)
predictions = pipeline.predict(X_test)
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        20
           1       1.00      1.00      1.00        10

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

