<a href="https://colab.research.google.com/github/isaacbull/MLcourse1/blob/main/transformclasse_skpipeline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Building custom transformers using OOP.

In [None]:
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin

class CustomTransformer(BaseEstimator, TransformerMixin):
    """Base class for custom transformers"""
    def __init__(self):
        pass

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        raise NotImplementedError("Subclass must implement transform method")

class Imputer(CustomTransformer):
    """Imputes missing values"""
    def __init__(self, strategy='mean'):
        self.strategy = strategy

    def fit(self, X, y=None):
        if self.strategy == 'mean':
            self.fill_values = X.mean()
        elif self.strategy == 'median':
            self.fill_values = X.median()
        else:
            raise ValueError("Invalid strategy")
        return self

    def transform(self, X):
        return X.fillna(self.fill_values)

class FeatureScaler(CustomTransformer):
    """Scales numerical features"""
    def __init__(self, method='standardization'):
        self.method = method

    def fit(self, X, y=None):
        if self.method == 'standardization':
            self.mean = X.mean()
            self.std = X.std()
        elif self.method == 'normalization':
            self.min = X.min()
            self.max = X.max()
        else:
            raise ValueError("Invalid method")
        return self

    def transform(self, X):
        if self.method == 'standardization':
            return (X - self.mean) / self.std
        elif self.method == 'normalization':
            return (X - self.min) / (self.max - self.min)

# Example usage:
data = pd.DataFrame({'A': [1, 2, None, 4], 'B': [5, 6, 7, 8]})

imputer = Imputer(strategy='mean')
scaler = FeatureScaler(method='standardization')

data_imputed = imputer.fit_transform(data)
data_scaled = scaler.fit_transform(data_imputed)

print(data_scaled)


          A         B
0 -1.069045 -1.161895
1 -0.267261 -0.387298
2  0.000000  0.387298
3  1.336306  1.161895


In [None]:
scaler = FeatureScaler(method='normalization')

data_imputed = imputer.fit_transform(data)
data_scaled = scaler.fit_transform(data_imputed)
print(data_scaled)


          A         B
0  0.000000  0.000000
1  0.333333  0.333333
2  0.444444  0.666667
3  1.000000  1.000000


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression

# Create sample data
np.random.seed(42)
X = np.random.rand(100, 2)
y = 2 * X[:, 0] + 3 * X[:, 1] + np.random.randn(100)

# Convert to pandas DataFrame for potential future use
data = pd.DataFrame(X, columns=['feature1', 'feature2'])
data['target'] = y


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


### creating an object to train machine learning models

In [None]:
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

class BaseModel(BaseEstimator, RegressorMixin):
    """Base class for regression models"""
    def __init__(self):
        pass

    def fit(self, X, y):
        raise NotImplementedError("Subclass must implement fit method")

    def predict(self, X):
        raise NotImplementedError("Subclass must implement predict method")

    def evaluate(self, X, y):
        y_pred = self.predict(X)
        mse = mean_squared_error(y, y_pred)
        return mse

class LinearRegressionModel(BaseModel):
    """Linear Regression Model"""
    def __init__(self):
        self.model = LinearRegression()

    def fit(self, X, y):
        self.model.fit(X, y)
        return self

    def predict(self, X):
        return self.model.predict(X)

# Example usage:
model = LinearRegressionModel()
model.fit(X_train, y_train)
mse = model.evaluate(X_test, y_test)
print(mse)


0.7144483936493808


In [None]:
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
X, y = make_classification(random_state=0)
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    random_state=0)
pipe = Pipeline([('scaler', StandardScaler()), ('svc', SVC())])
# The pipeline can be used as any other estimator
# and avoids leaking the test set into the train set
pipe.fit(X_train, y_train).score(X_test, y_test)
# An estimator's parameter can be set using '__' syntax
pipe.set_params(svc__C=10).fit(X_train, y_train).score(X_test, y_test)

0.76

dynamic programming

In [None]:
import pdb

pdb.set_trace()
def fib_recursive(n):
    if n <= 1:
        return n
    return fib_recursive(n-1) + fib_recursive(n-2)

print(fib_recursive(10))  # Output: 55



sys.settrace() should not be used when the debugger is being used.
This may cause the debugger to stop working correctly.
If this is needed, please check: 
http://pydev.blogspot.com/2007/06/why-cant-pydev-debugger-work-with.html
to see how to restore the debug tracing back correctly.
Call Location:
  File "/usr/lib/python3.10/bdb.py", line 336, in set_trace
    sys.settrace(self.trace_dispatch)



--Return--
None
> [0;32m<ipython-input-3-f8514371b23a>[0m(3)[0;36m<cell line: 3>[0;34m()[0m
[0;32m      1 [0;31m[0;32mimport[0m [0mpdb[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m      2 [0;31m[0;34m[0m[0m
[0m[0;32m----> 3 [0;31m[0mpdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m      4 [0;31m[0;32mdef[0m [0mfib_recursive[0m[0;34m([0m[0mn[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m      5 [0;31m    [0;32mif[0m [0mn[0m [0;34m<=[0m [0;36m1[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m
[0;31m    [... skipped 1 hidden frame][0m

[0;31m    [... skipped 1 hidden frame][0m

[0;31m    [... skipped 1 hidden frame][0m

[0;31m    [... skipped 1 hidden frame][0m

> [0;32m/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py[0m(3464)[0;36mrun_ast_nodes[0;34m()[0m
[0;32m   3462 [0;31m                    [0mto_run[0m[0;34m.[0m[0mappend[0m[0;34m([0m[0;34m(


sys.settrace() should not be used when the debugger is being used.
This may cause the debugger to stop working correctly.
If this is needed, please check: 
http://pydev.blogspot.com/2007/06/why-cant-pydev-debugger-work-with.html
to see how to restore the debug tracing back correctly.
Call Location:
  File "/usr/lib/python3.10/bdb.py", line 361, in set_quit
    sys.settrace(None)




