In [68]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_percentage_error


In [69]:

df = pd.read_csv("melb_data_csv.csv")
print("Shape of the dataset:", df.shape)
df.head()

# Drop unnamed or irrelevant columns
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]


Shape of the dataset: (13580, 21)


In [70]:
# Define features to use and the target (Price)
features = ['Suburb','Type','SellerG','Postcode','YearBuilt','Distance','Method','Rooms', 'Bathroom', 'Car', 'Landsize',
            'BuildingArea','CouncilArea','Regionname','Propertycount']

# Create a cleaned dataframe
df_model = df[features + ['Price']].dropna()

# Set up X (features) and y (target)
X = df_model[features]
y = df_model['Price']


In [71]:
# Split into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [72]:
# Initialize and train the model
model = LinearRegression()
model.fit(X_train, y_train)


In [77]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error

# Predict on train and test sets
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

# Mean Absolute Percentage Error (MAPE)
train_mape = mean_absolute_percentage_error(y_train, y_train_pred) * 100
test_mape = mean_absolute_percentage_error(y_test, y_test_pred) * 100

# Mean Squared Error (MSE)
train_mse = mean_squared_error(y_train, y_train_pred)
test_mse = mean_squared_error(y_test, y_test_pred)

# R-squared Score
train_r2 = r2_score(y_train, y_train_pred)
test_r2 = r2_score(y_test, y_test_pred)

# Print results
print(f"📊 Training Set:")
print(f"  - MAPE: {train_mape:.0f}%")
print(f"  - Accuracy to predict price: {100-train_mape:.0F}%")
print(f"  - MSE: {train_mse:.2f}")
print(f"  - R² Score: {train_r2:.4f}")

print(f"\n📊 Test Set:")
print(f"  - MAPE: {test_mape:.0f}%")
print(f"  - Accuracy to predict price: {100-test_mape:.0F}%")
print(f"  - MSE: {test_mse:.2f}")
print(f"  - R² Score: {test_r2:.4f}")


📊 Training Set:
  - MAPE: 30%
  - Accuracy to predict price: 70%
  - MSE: 191858958077.81
  - R² Score: 0.5338

📊 Test Set:
  - MAPE: 30%
  - Accuracy to predict price: 70%
  - MSE: 182497779545.04
  - R² Score: 0.5406
