<a href="https://colab.research.google.com/github/jarekwan/jarwan_projekt/blob/main/start.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split, GridSearchCV, KFold
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression

titanic_url = ('https://raw.githubusercontent.com/amueller/'
               'scipy-2017-sklearn/091d371/notebooks/datasets/titanic3.csv')

data = pd.read_csv(titanic_url)

num_features = ["age", "fare"]
num_pipe = Pipeline([
    ("imputation", SimpleImputer(strategy="median")),
    ("scaler", StandardScaler())
])

cat_features = ["sex", "embarked"]
cat_pipe = Pipeline([
    ("imputer", SimpleImputer(strategy = "most_frequent")),
    ("encoder", OneHotEncoder(drop = "first", handle_unknown = "ignore"))
])

preprocessor = ColumnTransformer(transformers=[
    ("numeric", num_pipe, num_features),
    ("categorical", cat_pipe, cat_features)
])

preprocessor.fit(data[num_features+cat_features])

pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("model", LogisticRegression())
])

In [None]:
pipeline

In [None]:
X = data.drop(["survived"], axis=1)
y = data["survived"]

pipeline.fit(X, y)

In [None]:
pipeline.predict(X)

Gotowe transformatory:
* https://scikit-learn.org/stable/api/sklearn.preprocessing.html
* https://feature-engine.trainindata.com/en/1.8.x/user_guide/wrappers/Wrapper.html

Nie są to jedyne źródła.

In [None]:
import numpy as np
A = np.array([
    [1, 2],
    [2, 3],
    [3, 7]
])

In [2]:
import pandas as pd
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.validation import check_is_fitted


class MyStandardScaler(BaseEstimator, TransformerMixin):
    def __init__(self, with_std=True):
        self.with_std = with_std

    def fit(self, X, y=None):
        if hasattr(X, "to_numpy"):  # Obsługa DataFrame
            self.feature_names_in_ = X.columns.to_numpy()
            X = X.to_numpy()
        else:
            self.feature_names_in_ = np.array([f"x{i}" for i in range(X.shape[1])])

        self.means_ = X.mean(axis=0)
        self.stds_ = X.std(axis=0) if self.with_std else None
        self.n_features_in_ = X.shape[1]
        return self

    def transform(self, X):
        check_is_fitted(self, attributes=["means_"])
        if hasattr(X, "to_numpy"):
            X = X.to_numpy()
        X = X - self.means_
        if self.with_std:
            X = X / self.stds_
        return X

    def get_feature_names_out(self, input_features=None):
        check_is_fitted(self, attributes=["n_features_in_"])
        return (
            np.asarray(input_features)
            if input_features is not None
            else self.feature_names_in_
        )


# 🔎 Twoje dane:
X = pd.DataFrame({
    "wiek": [20, 30, 40],
    "dochód": [1000, 2000, 3000]
})

# ✅ Użycie:
scaler = MyStandardScaler()
scaler.fit(X)

print(scaler.get_feature_names_out())
# ➤ ['wiek' 'dochód']



['wiek' 'dochód']


In [5]:
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
from sklearn.utils.validation import check_is_fitted

class MyStandardScaler(BaseEstimator, TransformerMixin, OneToOneFeatureMixin):
    def __init__(self, with_std=True):
        self.with_std = with_std

    def fit(self, X, y=None):
        self.means_ = X.mean(axis=0)
        self.stds_ = X.std(axis=0) if self.with_std else None
        self.n_features_in_ = X.shape[1]

        if hasattr(X, "columns"):
            self.feature_names_in_ = X.columns.to_numpy()
        else:
            self.feature_names_in_ = np.array([f"x{i}" for i in range(X.shape[1])])

        return self

    def transform(self, X):
        check_is_fitted(self)
        X = X - self.means_
        if self.with_std:
            X = X / self.stds_
        return X

    def get_feature_names_out(self, input_features=None):
        return super().get_feature_names_out(input_features)




In [6]:
import numpy as np

class MyMinMaxScaler:
    def fit(self, X, y=None):
        # zapamiętujemy min i max z każdej kolumny
        self.min_ = X.min(axis=0)
        self.max_ = X.max(axis=0)
        self.range_ = self.max_ - self.min_
        return self

    def transform(self, X):
        # (X - min) / (max - min)
        return (X - self.min_) / self.range_

    def fit_transform(self, X, y=None):
        # jedno wywołanie fit i transform
        return self.fit(X).transform(X)
