<a href="https://colab.research.google.com/github/abdulhamidll/Data-Science/blob/main/4_qadam_machine_learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import sklearn
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
url = "https://github.com/ageron/handson-ml2/blob/master/datasets/housing/housing.csv?raw=true"
df = pd.read_csv(url)
df.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
0,-122.23,37.88,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0,NEAR BAY
1,-122.22,37.86,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0,NEAR BAY
2,-122.24,37.85,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0,NEAR BAY
3,-122.25,37.85,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0,NEAR BAY
4,-122.25,37.85,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0,NEAR BAY


In [3]:
from sklearn.model_selection import train_test_split
train_set, test_set = train_test_split(df, test_size=0.2, random_state=42)
X_train = train_set.drop("median_house_value", axis=1)
y = train_set['median_house_value'].copy()
X_num = X_train.drop("ocean_proximity", axis=1)

In [4]:
X_num

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income
14196,-117.03,32.71,33.0,3126.0,627.0,2300.0,623.0,3.2596
8267,-118.16,33.77,49.0,3382.0,787.0,1314.0,756.0,3.8125
17445,-120.48,34.66,4.0,1897.0,331.0,915.0,336.0,4.1563
14265,-117.11,32.69,36.0,1421.0,367.0,1418.0,355.0,1.9425
2271,-119.80,36.78,43.0,2382.0,431.0,874.0,380.0,3.5542
...,...,...,...,...,...,...,...,...
11284,-117.96,33.78,35.0,1330.0,201.0,658.0,217.0,6.3700
11964,-117.43,34.02,33.0,3084.0,570.0,1753.0,449.0,3.0500
5390,-118.38,34.03,36.0,2101.0,569.0,1756.0,527.0,2.9344
860,-121.96,37.58,15.0,3575.0,597.0,1777.0,559.0,5.7192


In [5]:
from sklearn.base import BaseEstimator, TransformerMixin
# bizga kerak ustunlar indekslari
rooms_ix, bedrooms_ix, population_ix, households_ix = 3, 4, 5, 6

class CombinedAttributesAdder(BaseEstimator, TransformerMixin):
    def __init__(self, add_bedrooms_per_room = True):
        self.add_bedrooms_per_room = add_bedrooms_per_room
    def fit(self, X, y=None):
        return self # bizni funksiyamiz faqat transformer. estimator emas
    def transform(self, X):
        rooms_per_household = X[:, rooms_ix] / X[:, households_ix]
        population_per_household = X[:, population_ix] / X[:, households_ix]
        if self.add_bedrooms_per_room: # add_bedrooms_per_room ustuni ixtiyoriy bo'ladi
            bedrooms_per_room = X[:, bedrooms_ix] / X[:, rooms_ix]
            return np.c_[X, rooms_per_household, population_per_household, bedrooms_per_room]
        else:
            return np.c_[X, rooms_per_household, population_per_household]

In [6]:
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler

num_pipeline = Pipeline([
          ('imputer', SimpleImputer(strategy='median')),
          ('attribs_adder', CombinedAttributesAdder(add_bedrooms_per_room = True)),
          ('std_scaler', StandardScaler())             
])

In [8]:
from sklearn.compose import ColumnTransformer

num_attribs = list(X_num)
cat_attribs = ['ocean_proximity']

full_pipeline = ColumnTransformer([
    ('num', num_pipeline, num_attribs),
    ('cat', OneHotEncoder(), cat_attribs)
])

In [9]:
X_prepared = full_pipeline.fit_transform(X_train)

In [10]:
X_prepared

array([[ 1.27258656, -1.3728112 ,  0.34849025, ...,  0.        ,
         0.        ,  1.        ],
       [ 0.70916212, -0.87669601,  1.61811813, ...,  0.        ,
         0.        ,  1.        ],
       [-0.44760309, -0.46014647, -1.95271028, ...,  0.        ,
         0.        ,  1.        ],
       ...,
       [ 0.59946887, -0.75500738,  0.58654547, ...,  0.        ,
         0.        ,  0.        ],
       [-1.18553953,  0.90651045, -1.07984112, ...,  0.        ,
         0.        ,  0.        ],
       [-1.41489815,  0.99543676,  1.85617335, ...,  0.        ,
         1.        ,  0.        ]])

In [11]:
### Liner Regression
from sklearn.linear_model import LinearRegression
LR_model = LinearRegression()
LR_model.fit(X_prepared, y)

LinearRegression()

In [12]:
test_data = X_train.sample(10)
test_data

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,ocean_proximity
4157,-118.19,34.12,46.0,3387.0,820.0,2833.0,813.0,2.987,<1H OCEAN
14801,-117.12,32.59,28.0,2793.0,706.0,1825.0,676.0,2.6724,NEAR OCEAN
6587,-118.23,34.22,37.0,1376.0,237.0,618.0,226.0,5.9771,<1H OCEAN
11916,-117.4,33.95,32.0,1979.0,491.0,954.0,444.0,2.4408,INLAND
11483,-118.0,33.73,26.0,2236.0,280.0,809.0,282.0,6.7395,<1H OCEAN
11446,-117.97,33.67,17.0,4466.0,640.0,2166.0,666.0,6.979,<1H OCEAN
2162,-119.82,36.78,36.0,1370.0,289.0,812.0,282.0,2.6127,INLAND
5984,-117.73,34.12,26.0,6459.0,894.0,2487.0,885.0,6.2089,INLAND
2433,-119.63,36.6,33.0,1589.0,294.0,1102.0,307.0,1.9676,INLAND
1963,-120.58,38.77,15.0,2155.0,394.0,857.0,356.0,4.03,INLAND


In [14]:
test_label = y.loc[test_data.index]
test_label

4157     176900.0
14801    144500.0
6587     431800.0
11916    117300.0
11483    342800.0
11446    330700.0
2162      69600.0
5984     261800.0
2433      62400.0
1963     141200.0
Name: median_house_value, dtype: float64

In [15]:
test_data_prepared = full_pipeline.transform(test_data)
predicted_labels = LR_model.predict(test_data_prepared)

In [16]:
predicted_labels

array([187971.22239035, 194774.45371304, 301574.4694785 , 129230.96722085,
       320695.84583319, 318467.62319173, 108761.98669944, 262152.68774101,
        63147.66250568, 117639.06427522])

In [17]:
pd.DataFrame({"Bashorat": predicted_labels, "Asl qiymat": test_label
              })

Unnamed: 0,Bashorat,Asl qiymat
4157,187971.22239,176900.0
14801,194774.453713,144500.0
6587,301574.469478,431800.0
11916,129230.967221,117300.0
11483,320695.845833,342800.0
11446,318467.623192,330700.0
2162,108761.986699,69600.0
5984,262152.687741,261800.0
2433,63147.662506,62400.0
1963,117639.064275,141200.0
