In [1]:
import pandas as pd
import numpy as np
import sklearn # scikit-learn kutubxonasi

# Onlayn dataset joylashgan manzilini ko'rsatamiaz
URL = "https://github.com/ageron/handson-ml2/blob/master/datasets/housing/housing.csv?raw=true"
df = pd.read_csv(URL)

from sklearn.model_selection import train_test_split
train_set, test_set = train_test_split(df, test_size=0.2, random_state=47)

housing = train_set.drop("median_house_value", axis=1)
housing_labels = train_set["median_house_value"].copy()

housing_num = housing.drop("ocean_proximity", axis=1)

In [2]:
from sklearn.base import BaseEstimator, TransformerMixin
# bizga kerak ustunlar indekslari
rooms_ix, bedrooms_ix, population_ix, households_ix = 3, 4, 5, 6

class CombinedAttributesAdder(BaseEstimator, TransformerMixin):
    def __init__(self, add_bedrooms_per_room = True):
        self.add_bedrooms_per_room = add_bedrooms_per_room
    def fit(self, X, y=None):
        return self # bizni funksiyamiz faqat transformer. estimator emas
    def transform(self, X):
        rooms_per_household = X[:, rooms_ix] / X[:, households_ix]
        population_per_household = X[:, population_ix] / X[:, households_ix]
        if self.add_bedrooms_per_room: # add_bedrooms_per_room ustuni ixtiyoriy bo'ladi
            bedrooms_per_room = X[:, bedrooms_ix] / X[:, rooms_ix]
            return np.c_[X, rooms_per_household, population_per_household, bedrooms_per_room]
        else:
            return np.c_[X, rooms_per_household, population_per_household]

In [3]:
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler

In [6]:
num_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('attribs_adder', CombinedAttributesAdder(add_bedrooms_per_room=True)),
    ('standart_scaler', StandardScaler()),
])

In [7]:
num_pipeline.fit_transform(housing_num)

array([[ 1.13457498, -0.92454961, -1.56217152, ...,  0.32292104,
         0.06230093, -0.62923556],
       [ 1.23455722, -1.33745855, -1.00534875, ..., -0.71454095,
        -0.13223737,  1.30521836],
       [ 1.08958298, -0.81663023,  0.26738901, ...,  0.60083437,
        -0.02383541, -1.10560977],
       ...,
       [ 1.22955811, -1.33276641,  0.02875068, ...,  0.01616245,
        -0.03725734, -0.41270958],
       [-1.26999787,  0.83500553,  0.50602734, ..., -0.27911044,
        -0.07000862,  0.06578307],
       [ 0.54967888, -0.73686373,  0.90375789, ...,  0.36826167,
        -0.00932096, -1.05240397]])

In [10]:
from sklearn.compose import ColumnTransformer

num_attribs = list(housing_num)
cat_attribs = ['ocean_proximity']

full_pipeline = ColumnTransformer([
    ('num', num_pipeline, num_attribs),
    ('cat', OneHotEncoder(), cat_attribs)
])
full_pipeline.fit_transform(housing)

array([[ 1.13457498, -0.92454961, -1.56217152, ...,  0.        ,
         0.        ,  0.        ],
       [ 1.23455722, -1.33745855, -1.00534875, ...,  0.        ,
         0.        ,  1.        ],
       [ 1.08958298, -0.81663023,  0.26738901, ...,  0.        ,
         0.        ,  0.        ],
       ...,
       [ 1.22955811, -1.33276641,  0.02875068, ...,  0.        ,
         0.        ,  1.        ],
       [-1.26999787,  0.83500553,  0.50602734, ...,  0.        ,
         1.        ,  0.        ],
       [ 0.54967888, -0.73686373,  0.90375789, ...,  0.        ,
         0.        ,  0.        ]])

In [11]:
housing_prepared = full_pipeline.fit_transform(housing)

In [13]:
housing_prepared[0:5,:]

array([[ 1.13457498, -0.92454961, -1.56217152, -0.76871007, -0.88873485,
        -0.74981406, -0.90950767, -0.36475082,  0.32292104,  0.06230093,
        -0.62923556,  1.        ,  0.        ,  0.        ,  0.        ,
         0.        ],
       [ 1.23455722, -1.33745855, -1.00534875, -0.02440378,  0.57305529,
        -0.26559678,  0.59407809, -0.25820021, -0.71454095, -0.13223737,
         1.30521836,  0.        ,  0.        ,  0.        ,  0.        ,
         1.        ],
       [ 1.08958298, -0.81663023,  0.26738901, -0.63694963, -0.86688084,
        -0.82975988, -0.84563854, -0.10933576,  0.60083437, -0.02383541,
        -1.10560977,  0.        ,  1.        ,  0.        ,  0.        ,
         0.        ],
       [-1.22500587,  0.92415633,  0.58557345, -0.56873324, -0.64105612,
        -0.38097313, -0.57685595, -0.07790412, -0.16337679,  0.04032646,
        -0.32428161,  0.        ,  0.        ,  0.        ,  1.        ,
         0.        ],
       [-1.16501652,  0.78339192, -1