In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [None]:
df = pd.read_csv('housing_prices.csv')

In [None]:
print(df.head())

      price  area  bedrooms  bathrooms  stories mainroad guestroom basement  \
0  13300000  7420         4          2        3      yes        no       no   
1  12250000  8960         4          4        4      yes        no       no   
2  12250000  9960         3          2        2      yes        no      yes   
3  12215000  7500         4          2        2      yes        no      yes   
4  11410000  7420         4          1        2      yes       yes      yes   

  hotwaterheating airconditioning  parking prefarea furnishingstatus  
0              no             yes        2      yes        furnished  
1              no             yes        3       no        furnished  
2              no              no        2      yes   semi-furnished  
3              no             yes        3      yes        furnished  
4              no             yes        2       no        furnished  


In [None]:
print(df.isnull().sum())

price               0
area                0
bedrooms            0
bathrooms           0
stories             0
mainroad            0
guestroom           0
basement            0
hotwaterheating     0
airconditioning     0
parking             0
prefarea            0
furnishingstatus    0
dtype: int64


In [None]:
df.columns

Index(['price', 'area', 'bedrooms', 'bathrooms', 'stories', 'mainroad',
       'guestroom', 'basement', 'hotwaterheating', 'airconditioning',
       'parking', 'prefarea', 'furnishingstatus'],
      dtype='object')

In [None]:
features = ['area', 'bedrooms', 'bathrooms', 'stories', 'mainroad', 'guestroom','basement', 'hotwaterheating', 'airconditioning', 'parking', 'prefarea', 'furnishingstatus']
target = 'price'
X = df[features]
y = df[target]

In [None]:
numerical_features = ['area', 'bedrooms', 'bathrooms', 'stories', 'parking']
numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())])

In [None]:
categorical_features = ['mainroad', 'guestroom', 'basement', 'hotwaterheating',
                        'airconditioning', 'prefarea', 'furnishingstatus']
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])


In [None]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)])

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)


In [None]:
def evaluate_model(model, X_test, y_test):
    predictions = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, predictions))
    mae = mean_absolute_error(y_test, predictions)
    return rmse, mae

In [None]:
from sklearn.linear_model import LinearRegression
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error


In [None]:
l = LinearRegression()
l.fit(X_train, y_train)
l_rmse, l_mae = evaluate_model(l, X_test, y_test)
print(f'Linear Regression RMSE: {l_rmse}, MAE: {l_mae}')

Linear Regression RMSE: 1324506.9600914402, MAE: 970043.4039201644


In [None]:
from sklearn.tree import DecisionTreeRegressor

In [None]:
d = DecisionTreeRegressor(random_state=42)
d.fit(X_train, y_train)
d_rmse, d_mae = evaluate_model(d, X_test, y_test)
print(f'Decision Tree Regression RMSE: {d_rmse}, MAE: {d_mae}')

Decision Tree Regression RMSE: 1699626.144131854, MAE: 1234857.7981651376


In [None]:
from sklearn.ensemble import RandomForestRegressor

In [None]:
r = RandomForestRegressor(random_state=42)
r.fit(X_train, y_train)
r_rmse, r_mae = evaluate_model(r, X_test, y_test)
print(f'Random Forest Regression RMSE: {r_rmse}, MAE: {r_mae}')

Random Forest Regression RMSE: 1403925.371676078, MAE: 1025961.1683486238


In [None]:
results = {
    'Model': ['Linear Regression', 'Decision Tree Regression', 'Random Forest Regression'],
    'RMSE': [l_rmse, d_rmse, r_rmse],
    'MAE': [l_mae, d_mae, r_mae]
}

In [None]:
results_df = pd.DataFrame(results)
print(results_df)

                      Model          RMSE           MAE
0         Linear Regression  1.324507e+06  9.700434e+05
1  Decision Tree Regression  1.699626e+06  1.234858e+06
2  Random Forest Regression  1.403925e+06  1.025961e+06


In [None]:
best_model = r
print(f'The best-performing model is: Random Forest Regression')
print(f' with RMSE: {r_rmse} and MAE: {r_mae}')

The best-performing model is: Random Forest Regression
 with RMSE: 1403925.371676078 and MAE: 1025961.1683486238


In [None]:
from sklearn.metrics import accuracy_score

In [None]:
import numpy as np
threshold = 0.3
predictions = r.predict(X_test)
absolute_error = np.abs(predictions - y_test) / y_test
accuracy = np.mean(absolute_error <= threshold) * 100
print(f'Accuracy: {accuracy:.2f}%')

Accuracy: 73.39%


In [None]:
import numpy as np
threshold = 0.2
predictions = l.predict(X_test)
absolute_error = np.abs(predictions - y_test) / y_test
accuracy = np.mean(absolute_error <= threshold) * 100
print(f'Accuracy: {accuracy:.2f}%')

Accuracy: 59.63%


In [None]:
import numpy as np
threshold = 0.3
predictions = d.predict(X_test)
absolute_error = np.abs(predictions - y_test) / y_test
accuracy = np.mean(absolute_error <= threshold) * 100
print(f'Accuracy: {accuracy:.2f}%')

Accuracy: 66.06%


In [None]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
X = np.array([[1], [2], [3], [4], [5]])
y = np.array([1.1, 1.9, 3.2, 4.0, 5.1])
model = LinearRegression()
model.fit(X, y)
y_pred = model.predict(X)
r_squared = r2_score(y, y_pred)
print("R-squared:", r_squared)


R-squared: 0.9950253609051892


In [None]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score
X = np.array([[1], [2], [3], [4], [5]])
y = np.array([1.1, 1.9, 3.2, 4.0, 5.1])
model =DecisionTreeRegressor()
model.fit(X, y)
y_pred = model.predict(X)
r_squared = r2_score(y, y_pred)
print("R-squared:", r_squared)

R-squared: 1.0


In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
X = np.array([[1], [2], [3], [4], [5]])
y = np.array([1.1, 1.9, 3.2, 4.0, 5.1])
model =RandomForestRegressor()
model.fit(X, y)
y_pred = model.predict(X)
r_squared = r2_score(y, y_pred)
print("R-squared:", r_squared)

R-squared: 0.9568105735466255
