In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd


In [None]:
df = pd.read_csv('Housing.csv')
X = df.drop('price', axis=1)
y = df['price']

In [None]:
print(X)

     area  bedrooms  bathrooms  stories mainroad guestroom basement  \
0    7420         4          2        3      yes        no       no   
1    8960         4          4        4      yes        no       no   
2    9960         3          2        2      yes        no      yes   
3    7500         4          2        2      yes        no      yes   
4    7420         4          1        2      yes       yes      yes   
..    ...       ...        ...      ...      ...       ...      ...   
540  3000         2          1        1      yes        no      yes   
541  2400         3          1        1       no        no       no   
542  3620         2          1        1      yes        no       no   
543  2910         3          1        1       no        no       no   
544  3850         3          1        2      yes        no       no   

    hotwaterheating airconditioning  parking prefarea furnishingstatus  
0                no             yes        2      yes        furnished  
1

In [None]:
print(y)

0      13300000
1      12250000
2      12250000
3      12215000
4      11410000
         ...   
540     1820000
541     1767150
542     1750000
543     1750000
544     1750000
Name: price, Length: 545, dtype: int64


In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder, StandardScaler


binary_cols = ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea']
category_col = ['furnishingstatus']
numeric_cols = ['area', 'bedrooms', 'bathrooms', 'stories', 'parking']


ct = ColumnTransformer(
    transformers=[
        ('binary', OrdinalEncoder(), binary_cols),
        ('cat', OneHotEncoder(drop='first'), category_col),
        ('num', StandardScaler(), numeric_cols)
    ],
    remainder='passthrough'
)


X = ct.fit_transform(X)


In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [None]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)


In [None]:
print(X_test)

[[ 1.          0.          0.         ... -0.57018671  0.22441013
   0.35597563]
 [ 1.          0.          1.         ... -0.57018671 -0.92939666
   1.51769249]
 [ 1.          0.          0.         ... -0.57018671  0.22441013
  -0.80574124]
 ...
 [ 1.          0.          1.         ... -0.57018671 -0.92939666
   0.35597563]
 [ 1.          0.          0.         ... -0.57018671  0.22441013
   0.35597563]
 [ 1.          0.          1.         ...  1.42181174 -0.92939666
   1.51769249]]


In [None]:
y_pred = regressor.predict(X_test)


np.set_printoptions(precision=2)


comparison = np.concatenate((y_pred.reshape(len(y_pred),1), y_test.values.reshape(len(y_test),1)), 1)


print("   Predicted  |   Actual")
print(comparison[:10])

   Predicted  |   Actual
[[3950288.62 4585000.  ]
 [6173868.82 6083000.  ]
 [4483635.99 4007500.  ]
 [7258732.75 6930000.  ]
 [2836727.58 2940000.  ]
 [7032947.1  6195000.  ]
 [3203851.47 3535000.  ]
 [3270994.01 2940000.  ]
 [3472554.04 3500000.  ]
 [8289978.33 7980000.  ]]


In [None]:
from sklearn.metrics import r2_score

r2 = r2_score(y_test, y_pred)
print(r2)



In [None]:

train_score = regressor.score((X_train), y_train)


test_score = r2_score(y_test, y_pred)

print(f"train: {train_score}")
print(f"test: {test_score}")

دقت آموزش: 0.6822529683801706
دقت تست: 0.6611214250980103
