In [19]:
import pandas as pd
import numpy as np
import sklearn 


# Online datasets joylashgan manzilni korsatamiz
url = 'https://github.com/ageron/handson-ml2/blob/master/datasets/housing/housing.csv?raw=true'
df = pd.read_csv(url)

from sklearn.model_selection import train_test_split
train_set, test_set = train_test_split(df, test_size=0.2, random_state=45)

housing = train_set.drop('median_house_value', axis=1)
housing_labels = train_set['median_house_value'].copy()

housing_num = housing.drop('ocean_proximity', axis=1)

In [20]:
from sklearn.base import BaseEstimator, TransformerMixin

# bizga kerak ustunlar indekslari
rooms_ix, bedrooms_ix, population_ix, households_ix = 3, 4, 5, 6

class CombinedAttributesAdder(BaseEstimator, TransformerMixin):
    def __init__(self, add_bedrooms_per_room = True):
        self.add_bedrooms_per_room = add_bedrooms_per_room
    def fit(self, X, y=None):
        return self # bizni funksiyamiz faqat transformer. estimator emas
    def transform(self, X):
        rooms_per_household = X[:, rooms_ix] / X[:, households_ix]
        population_per_household = X[:, population_ix] / X[:, households_ix]
        if self.add_bedrooms_per_room: # add_bedrooms_per_room ustuni ixtiyoriy bo'ladi
            bedrooms_per_room = X[:, bedrooms_ix] / X[:, rooms_ix]
            return np.c_[X, rooms_per_household, population_per_household, bedrooms_per_room]
        else:
            return np.c_[X, rooms_per_household, population_per_household]

# yangi Pipline(konver) yaratamiz


In [21]:
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler

# yangi konver yaratamiz

In [22]:
num_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('attribs_adder', CombinedAttributesAdder()),
    ('std_scaler', StandardScaler())
])

num_pipeline.fit_transform(housing_num)

array([[ 0.94382381, -0.70482757, -1.08618132, ...,  0.48780807,
        -0.05917339, -0.89171157],
       [-0.96871548,  1.35954492,  0.66045582, ...,  0.04334566,
        -0.07066601, -0.40625808],
       [-0.84886968,  1.23783588, -0.68921833, ...,  0.2211216 ,
         0.01691036, -0.6163858 ],
       ...,
       [-0.8838247 ,  1.41571832, -0.37164794, ..., -0.21236402,
        -0.09138373, -0.0657545 ],
       [ 0.88889449, -0.72823316, -0.84800353, ..., -0.27762707,
         0.06623893, -0.00187635],
       [ 0.72410651, -0.66737864,  0.58106323, ...,  0.6187937 ,
        -0.0326693 , -1.03207012]])

# yangi konverda matnli malumotlarni sonli malumotga ozgartirib yuborish

In [23]:
from sklearn.compose import ColumnTransformer


In [24]:
 num_attribs = list(housing_num)
 cat_attribs = ['ocean_proximity']

 full_pipeline = ColumnTransformer([
    ('num', num_pipeline, num_attribs),
    ('cat', OneHotEncoder(), cat_attribs)
 ])



In [27]:
housing_perepared =  full_pipeline.fit_transform(housing)

In [28]:
housing_perepared[:5]

array([[ 9.43823814e-01, -7.04827574e-01, -1.08618132e+00,
         2.40758465e-01, -1.00358863e-01, -2.50026204e-01,
        -6.83765323e-02,  7.80490000e-01,  4.87808074e-01,
        -5.91733911e-02, -8.91711568e-01,  0.00000000e+00,
         1.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00],
       [-9.68715480e-01,  1.35954492e+00,  6.60455825e-01,
         4.59582385e-01,  3.49671998e-01,  7.73617686e-02,
         4.13777023e-01, -1.79806073e-01,  4.33456570e-02,
        -7.06660143e-02, -4.06258085e-01,  0.00000000e+00,
         1.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00],
       [-8.48869676e-01,  1.23783588e+00, -6.89218332e-01,
        -6.01713625e-01, -7.33735629e-01, -6.40546844e-01,
        -7.33958071e-01,  2.87061385e-01,  2.21121603e-01,
         1.69103628e-02, -6.16385798e-01,  0.00000000e+00,
         1.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00],
       [ 1.13857325e+00, -1.14485255e