In [1]:
import pandas as pd

file_path = 'real_estate_data.csv'
data = pd.read_csv(file_path)


In [2]:
import pymc as pm
import numpy as np

x1 = data['Surface_area'].values
x2 = data['Rooms'].values
x3 = data['Distance_to_center'].values
y = data['Price'].values

x1_std = (x1 - x1.mean()) / x1.std()
x2_std = (x2 - x2.mean()) / x2.std()
x3_std = (x3 - x3.mean()) / x3.std()
y_std = (y - y.mean()) / y.std()

with pm.Model() as model:
    beta_0 = pm.Normal("beta_0", mu=0, sigma=1)
    beta_1 = pm.Normal("beta_1", mu=0, sigma=1)
    beta_2 = pm.Normal("beta_2", mu=0, sigma=1)
    beta_3 = pm.Normal("beta_3", mu=0, sigma=1)
    sigma = pm.HalfNormal("sigma", sigma=1)

    mu = beta_0 + beta_1 * x1_std + beta_2 * x2_std + beta_3 * x3_std

    y_obs = pm.Normal("y_obs", mu=mu, sigma=sigma, observed=y_std)

    trace = pm.sample(2000, tune=1000, return_inferencedata=True)


import arviz as az

summary = az.summary(trace, var_names=["beta_0", "beta_1", "beta_2", "beta_3", "sigma"])
summary

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [beta_0, beta_1, beta_2, beta_3, sigma]


Output()

Sampling 4 chains for 1_000 tune and 2_000 draw iterations (4_000 + 8_000 draws total) took 20 seconds.


Unnamed: 0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
beta_0,-0.0,0.008,-0.015,0.013,0.0,0.0,13361.0,6578.0,1.0
beta_1,0.994,0.008,0.98,1.008,0.0,0.0,8889.0,6123.0,1.0
beta_2,0.148,0.008,0.133,0.161,0.0,0.0,10073.0,6526.0,1.0
beta_3,-0.119,0.008,-0.134,-0.105,0.0,0.0,9402.0,6466.0,1.0
sigma,0.075,0.005,0.065,0.086,0.0,0.0,9021.0,5899.0,1.0
