In [16]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np
import pandas as pd, random


In [17]:
areas = [50,60,70,80,90,100,110,120,130,140,150,160,170,180,190,200,210,220,230,240,250,260,270,280,290]
random.seed(42)
prices = [3*a*a + 50*a + 20 + random.randint(-50,50) for a in areas]

print(prices)


[10051, 13784, 18173, 23264, 28805, 35001, 41798, 49187, 57264, 65783, 75056, 84864, 95239, 106181, 117845, 130024, 142774, 156173, 170181, 184797, 199999, 215834, 232247, 249173, 266841]


In [18]:
df = pd.DataFrame({"area": areas, "price": prices})
X_train, X_test, y_train, y_test = train_test_split(df[["area"]], df["price"], test_size=0.2, random_state=42)

In [None]:
pipe = make_pipeline(PolynomialFeatures(degree=2, include_bias=False), LinearRegression()) #degree (hyperparameter)
pipe.fit(X_train, y_train)

y_pred = pipe.predict(X_test)

In [20]:

mse = mean_squared_error(y_test, y_pred) 
rmse = np.sqrt(mse) 
r2 = r2_score(y_test, y_pred) #r2 = accuracy model (*) => Linear best

In [21]:
print(f"RMSE: {rmse}")

RMSE: 51.08864990653534


In [22]:
print(f"Model Accuracy: {r2}")  # ~99%

Model Accuracy: 0.9999996143436369


In [23]:
def predict_price(area_m2):
    X_new = pd.DataFrame({"area": [area_m2]}) # fit col[area] 
    return pipe.predict(X_new) # print array, had [0] or item()
print(predict_price(50))


[10004.24009416]


Accuracy: 1.0
ROC-AUC : 1.0
              precision    recall  f1-score   support

           0      1.000     1.000     1.000         3
           1      1.000     1.000     1.000         2

    accuracy                          1.000         5
   macro avg      1.000     1.000     1.000         5
weighted avg      1.000     1.000     1.000         5

P(expensive=1 | 185 m²): 0.05623369118734913


