In [3]:
import pandas as pd
import numpy as np
from sklearn import linear_model
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error

# Load the data
data = pd.read_csv('Real estate - Real estate.csv')

# Drop unnecessary columns and remove rows with missing values
df = data.drop(["No", "X1 transaction date", "X5 latitude", "X6 longitude"], axis=1).dropna()

# Remove duplicate rows
df = df.drop_duplicates()

# Split the data into input features (X) and target variable (Y)
X_linear = df[['X3 distance to the nearest MRT station']]
X_multilinear = df[['X3 distance to the nearest MRT station', 'X4 number of convenience stores']]
X_tree = df[['X3 distance to the nearest MRT station']]
Y = df['Y house price of unit area']

# Split the data into training and testing sets
X_linear_train, X_linear_test, Y_train, Y_test = train_test_split(X_linear, Y, test_size=0.2, random_state=0)
X_multilinear_train, X_multilinear_test, _, _ = train_test_split(X_multilinear, Y, test_size=0.2, random_state=0)
X_tree_train, X_tree_test, _, _ = train_test_split(X_tree, Y, test_size=0.2, random_state=0)

# Create and fit the Linear Regression model
regr_linear = linear_model.LinearRegression()
regr_linear.fit(X_linear_train, Y_train)

# Make predictions using Linear Regression
y_pred_linear = regr_linear.predict(X_linear_test)

# Calculate R-squared and MSE for Linear Regression
r2_linear = r2_score(Y_test, y_pred_linear)
mse_linear = mean_squared_error(Y_test, y_pred_linear)

# Create and fit the Multilinear Regression model
regr_multilinear = linear_model.LinearRegression()
regr_multilinear.fit(X_multilinear_train, Y_train)

# Make predictions using Multilinear Regression
y_pred_multilinear = regr_multilinear.predict(X_multilinear_test)

# Calculate R-squared and MSE for Multilinear Regression
r2_multilinear = r2_score(Y_test, y_pred_multilinear)
mse_multilinear = mean_squared_error(Y_test, y_pred_multilinear)

# Create and fit the Decision Tree Regressor model
regressor_tree = DecisionTreeRegressor(random_state=0)
regressor_tree.fit(X_tree_train, Y_train)

# Make predictions using Decision Tree Regressor
y_pred_tree = regressor_tree.predict(X_tree_test)

# Calculate R-squared and MSE for Decision Tree Regressor
r2_tree = r2_score(Y_test, y_pred_tree)
mse_tree = mean_squared_error(Y_test, y_pred_tree)

# Create an accuracy table
accuracy_table = pd.DataFrame({
    'Model': ['Simple Linear Regression', 'Multilinear Regression', 'Decision Tree Regressor'],
    'R-squared': [r2_linear*100, r2_multilinear*100, r2_tree*100],
    'MSE': [mse_linear, mse_multilinear, mse_tree]
})

print(accuracy_table)

                      Model  R-squared         MSE
0  Simple Linear Regression  51.595111   84.078815
1    Multilinear Regression  57.117911   74.485765
2   Decision Tree Regressor  33.355191  115.761375
