In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

# Simple Linear Regression

In [133]:
areas = [50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250, 260, 270, 280, 290]
random.seed(42) # ~ random_state (train_test_split)
prices = [3 * area + 50 + random.randint(-50, 50) for area in areas]
# y = LinearRegression() => 3x + 50 + [-50, 50]
print(prices)

[231, 194, 213, 334, 305, 331, 358, 377, 484, 433, 536, 574, 579, 551, 645, 654, 634, 663, 701, 747, 779, 844, 887, 843, 941]


In [134]:
df = pd.DataFrame({"area": areas, "price": prices})
X_train, X_test, y_train, y_test = train_test_split(
    df[["area"]], df["price"], test_size=0.2, random_state=42 #2D vs features
)

In [135]:
Linear_model = LinearRegression()
Linear_model.fit(X_train, y_train)

In [136]:
y_pred = Linear_model.predict(X_test)
y_pred

array([426.7087243 , 674.30145738, 179.11599122, 890.94509882,
       519.5559992 ])

In [137]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [138]:
mae = mean_absolute_error(y_test, y_pred) #y_test gia tri thuc, y_pre cho danh gia
mse = mean_squared_error(y_test, y_pred) # loss model (*) > mae = dao ham () -> Xác định một biến nó tăng/ giảm ở một thời điểm nhất định 
rmse = np.sqrt(mse) # root mean square
r2 = r2_score(y_test, y_pred) #r2 = accuracy model (*) => Linear best

In [139]:
print(f"RMSE: {rmse}")

RMSE: 50.71751143339624


In [140]:
print(f"Model Accuracy: {r2}")  # ~93%

Model Accuracy: 0.9355473010342266


In [141]:
def predict_price(area_m2):
    X_new = pd.DataFrame({"area": [area_m2]}) # fit col[area] 
    return Linear_model.predict(X_new) # print array, had [0] or item()
print(predict_price(50))


[179.11599122]


=> Do sai số [-50, 50] cho nên chấp nhận được

# Multiple Linear Regression

In [142]:
areas = [50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250, 260, 270, 280, 290]
bedrooms = list(range(1, len(areas) + 1))
random.seed(42) # ~ random_state (train_test_split)
prices = [3 * area + 50 + random.randint(-50, 50) for area in areas]
# y = LinearRegression() => 3x + 50 + [-50, 50]
print(bedrooms)

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]


In [143]:
df = pd.DataFrame({"area": areas, "bedroom": bedrooms ,"price": prices})
X_train, X_test, y_train, y_test = train_test_split(
    df[["area", "bedroom"]], df["price"], test_size=0.2, random_state=42 #2D vs features
)

In [144]:
Linear_model = LinearRegression()
Linear_model.fit(X_train, y_train)

In [145]:
y_pred = Linear_model.predict(X_test)
y_pred

array([426.7087243 , 674.30145738, 179.11599122, 890.94509882,
       519.5559992 ])

In [146]:
mae = mean_absolute_error(y_test, y_pred) #y_test gia tri thuc, y_pre cho danh gia
mse = mean_squared_error(y_test, y_pred) # loss model (*) > mae = dao ham () -> Xác định một biến nó tăng/ giảm ở một thời điểm nhất định 
rmse = np.sqrt(mse) # root mean square
r2 = r2_score(y_test, y_pred) #r2 = accuracy model (*) => Linear best

In [147]:
print(f"RMSE: {rmse}")

RMSE: 50.71751143339645


In [148]:
print(f"Model Accuracy: {r2}")  

Model Accuracy: 0.9355473010342261


In [149]:
def predict_price(area_m2, bedroom_m2):
    X_new = pd.DataFrame({"area": [area_m2], "bedroom": [bedroom_m2]}) # fit col[area] 
    return Linear_model.predict(X_new) # print array, had [0] or item()
print(predict_price(300, 10))


[947.94045569]
