In [124]:
import pandas as pd
import numpy as np

In [125]:
np.random.seed(0)
num_samples = 100

In [126]:
data = {
    'Area (sqft)': np.random.uniform(500, 3500, num_samples),
    'Number of Bedrooms': np.random.randint(1, 6, num_samples),
    'Number of Bathrooms': np.random.randint(1, 4, num_samples),
    'Price ($)': np.random.uniform(100000, 500000, num_samples)
}


In [127]:
df = pd.DataFrame(data)

In [128]:
df

Unnamed: 0,Area (sqft),Number of Bedrooms,Number of Bathrooms,Price ($)
0,2146.440512,3,1,242245.095140
1,2645.568099,4,1,476172.778101
2,2308.290128,3,1,406130.101523
3,2134.649549,4,1,399465.447940
4,1770.964398,1,2,461487.895898
...,...,...,...,...
95,1049.574086,2,2,130782.578795
96,2259.538804,2,3,307534.059533
97,560.322639,1,3,222724.039818
98,2986.820088,4,2,331017.179533


In [129]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 4 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Area (sqft)          100 non-null    float64
 1   Number of Bedrooms   100 non-null    int64  
 2   Number of Bathrooms  100 non-null    int64  
 3   Price ($)            100 non-null    float64
dtypes: float64(2), int64(2)
memory usage: 3.2 KB


In [130]:
df.describe()

Unnamed: 0,Area (sqft),Number of Bedrooms,Number of Bathrooms,Price ($)
count,100.0,100.0,100.0,100.0
mean,1918.381519,2.84,1.95,295086.156945
std,869.262046,1.440539,0.808728,123070.964394
min,514.086429,1.0,1.0,104570.98345
25%,1117.409472,1.0,1.0,192840.405298
50%,1902.442962,3.0,2.0,287645.238159
75%,2553.449763,4.0,3.0,409954.955994
max,3465.121514,5.0,3.0,492731.755927


In [131]:
df.isnull().sum()

Area (sqft)            0
Number of Bedrooms     0
Number of Bathrooms    0
Price ($)              0
dtype: int64

In [132]:
# Features and target variable
X = df[['Area (sqft)', 'Number of Bedrooms', 'Number of Bathrooms']].values
y = df['Price ($)'].values

In [133]:
#data standarization
means = np.mean(X, axis=0)
std_devs = np.std(X, axis=0)

In [134]:
# Feature Scaling
X_scaled = (X - means) / std_devs

In [135]:
np.random.seed(0)

In [136]:
indices = np.arange(X_scaled.shape[0])
np.random.shuffle(indices)

In [137]:
test_size = 0.2
split_index = int(X_scaled.shape[0] * (1 - test_size)) #split ratio

In [138]:
X_train = X_scaled[indices[:split_index]]
X_test = X_scaled[indices[split_index:]]
y_train = y[indices[:split_index]]
y_test = y[indices[split_index:]]

In [139]:
# Print shapes of the resulting datasets
print(f"Training set: {X_train.shape}, Testing set: {X_test.shape}")

Training set: (80, 3), Testing set: (20, 3)


In [140]:
def fit_linear_regression(X, y):
    X_b = np.c_[np.ones((X.shape[0], 1)), X]

    theta_best = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y) #coeefiient calculation

    # Extract coefficients
    intercept = theta_best[0]
    coefficients = theta_best[1:]

    return intercept, coefficients

In [141]:
# Function for prediction for model
def predict_linear_regression(X, intercept, coefficients):
    return np.dot(X, coefficients) + intercept

In [142]:
# Calculate metrics
def mean_squared_error(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

In [143]:
def root_mean_squared_error(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    return np.sqrt(mse)

In [144]:
def r_squared(y_true, y_pred):
    ss_total = np.sum((y_true - np.mean(y_true)) ** 2)
    ss_residual = np.sum((y_true - y_pred) ** 2)
    return 1 - (ss_residual / ss_total)

In [145]:
# Train the model
intercept, coefficients = fit_linear_regression(X_train, y_train)

In [146]:
# Predict on the test set
y_pred = predict_linear_regression(X_test, intercept, coefficients)

In [147]:
mse = mean_squared_error(y_test, y_pred)
rmse = root_mean_squared_error(y_test, y_pred)
r2 = r_squared(y_test, y_pred)

In [150]:
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"R-squared (R^2): {r2}")

Mean Squared Error (MSE): 14229716841.102749
Root Mean Squared Error (RMSE): 119288.37680638777
R-squared (R^2): -0.22839716560041046


In [151]:
# results
print(f"Intercept: {intercept}")
print(f"Coefficients: {coefficients}")
print(f"Predictions: {y_pred[:10]}")  # Print first 10 predictions

Intercept: 288789.37820566224
Coefficients: [16343.34308865  4204.95688044 23856.92322995]
Predictions: [326929.55456853 293044.7938274  290356.87628999 299646.00422623
 270008.87238821 258127.47045729 266593.16007967 302008.2225655
 320369.22150338 351874.80974427]
