In [1]:
# Importing required libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Creating a small dataset manually
data = pd.DataFrame({
    'Area (sq ft)': [1500, 2000, 2500, 3000, 3500, 4000, 1000, 1200, 1800, 2200],
    'Bedrooms': [3, 4, 3, 5, 4, 5, 2, 2, 3, 3],
    'Age (years)': [5, 10, 15, 20, 25, 30, 3, 7, 10, 15],
    'Price': [7500000, 10000000, 11000000, 15000000, 14000000, 16000000, 5000000, 6000000, 8500000, 10500000]
})

# Displaying the raw dataset
print("Raw Dataset:")
print(data)

# Splitting the dataset into features (X) and target (y)
X = data[['Area (sq ft)', 'Bedrooms', 'Age (years)']]
y = data['Price']

# Data preprocessing: Standardizing the features (scaling the data)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Building the Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Predicting on the test data
y_pred = model.predict(X_test)

# Calculating performance metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("\nPerformance Metrics:")
print(f"Mean Squared Error: {mse}")
print(f"R^2 Score: {r2}")

# Displaying model coefficients
print("\nModel Coefficients:")
print(f"Intercept: {model.intercept_}")
print(f"Coefficients: {model.coef_}")

# Making a prediction for a new input
new_input = pd.DataFrame({'Area (sq ft)': [2800], 'Bedrooms': [4], 'Age (years)': [12]})
new_input_scaled = scaler.transform(new_input)  # Applying the same scaling as the training data
predicted_price = model.predict(new_input_scaled)
print(f"\nPredicted Price for new input {new_input.values.tolist()}: {predicted_price[0]}")


Raw Dataset:
   Area (sq ft)  Bedrooms  Age (years)     Price
0          1500         3            5   7500000
1          2000         4           10  10000000
2          2500         3           15  11000000
3          3000         5           20  15000000
4          3500         4           25  14000000
5          4000         5           30  16000000
6          1000         2            3   5000000
7          1200         2            7   6000000
8          1800         3           10   8500000
9          2200         3           15  10500000

Performance Metrics:
Mean Squared Error: 194862425827.20703
R^2 Score: 0.6535779096405208

Model Coefficients:
Intercept: 10433757.522884741
Coefficients: [1502959.53698032 1486491.96670666  659882.57367654]

Predicted Price for new input [[2800, 4, 12]]: 12002370.656956457
