# Singapore Resale Flat Price Prediction

# 1. Import Libraries

In [1]:
#[Data Transformation]
import pandas as pd
import numpy as np

#[Data Visualization]
import matplotlib.pyplot as plt
import seaborn as sns

#[Model]
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

#[Metrics]
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

#[Algorithm]
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression

#[Functions]
import pickle
import warnings
warnings.filterwarnings('ignore')

%matplotlib inline 

# 2. Load Datasets

In [2]:
# Add all datasets from 1990 to till date
df_prices1990 = pd.read_csv("Data/ResaleFlatPricesBasedonApprovalDate19901999.csv")
df_prices2000 = pd.read_csv("Data/ResaleFlatPricesBasedonApprovalDate2000Feb2012.csv")
df_prices2012 = pd.read_csv("Data/ResaleFlatPricesBasedonRegistrationDateFromMar2012toDec2014.csv")
df_prices2015 = pd.read_csv("Data/ResaleFlatPricesBasedonRegistrationDateFromJan2015toDec2016.csv")
df_prices2017 = pd.read_csv("Data/ResaleflatpricesbasedonregistrationdatefromJan2017onwards.csv")

In [3]:
df_prices1990.head()

Unnamed: 0,month,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,lease_commence_date,resale_price
0,1990-01,ANG MO KIO,1 ROOM,309,ANG MO KIO AVE 1,10 TO 12,31.0,IMPROVED,1977,9000
1,1990-01,ANG MO KIO,1 ROOM,309,ANG MO KIO AVE 1,04 TO 06,31.0,IMPROVED,1977,6000
2,1990-01,ANG MO KIO,1 ROOM,309,ANG MO KIO AVE 1,10 TO 12,31.0,IMPROVED,1977,8000
3,1990-01,ANG MO KIO,1 ROOM,309,ANG MO KIO AVE 1,07 TO 09,31.0,IMPROVED,1977,6000
4,1990-01,ANG MO KIO,3 ROOM,216,ANG MO KIO AVE 1,04 TO 06,73.0,NEW GENERATION,1976,47200


In [4]:
df_prices2000.head()

Unnamed: 0,month,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,lease_commence_date,resale_price
0,2000-01,ANG MO KIO,3 ROOM,170,ANG MO KIO AVE 4,07 TO 09,69.0,Improved,1986,147000.0
1,2000-01,ANG MO KIO,3 ROOM,174,ANG MO KIO AVE 4,04 TO 06,61.0,Improved,1986,144000.0
2,2000-01,ANG MO KIO,3 ROOM,216,ANG MO KIO AVE 1,07 TO 09,73.0,New Generation,1976,159000.0
3,2000-01,ANG MO KIO,3 ROOM,215,ANG MO KIO AVE 1,07 TO 09,73.0,New Generation,1976,167000.0
4,2000-01,ANG MO KIO,3 ROOM,218,ANG MO KIO AVE 1,07 TO 09,67.0,New Generation,1976,163000.0


In [5]:
df_prices2012.head()

Unnamed: 0,month,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,lease_commence_date,resale_price
0,2012-03,ANG MO KIO,2 ROOM,172,ANG MO KIO AVE 4,06 TO 10,45.0,Improved,1986,250000.0
1,2012-03,ANG MO KIO,2 ROOM,510,ANG MO KIO AVE 8,01 TO 05,44.0,Improved,1980,265000.0
2,2012-03,ANG MO KIO,3 ROOM,610,ANG MO KIO AVE 4,06 TO 10,68.0,New Generation,1980,315000.0
3,2012-03,ANG MO KIO,3 ROOM,474,ANG MO KIO AVE 10,01 TO 05,67.0,New Generation,1984,320000.0
4,2012-03,ANG MO KIO,3 ROOM,604,ANG MO KIO AVE 5,06 TO 10,67.0,New Generation,1980,321000.0


In [6]:
df_prices2015.head()

Unnamed: 0,month,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,lease_commence_date,remaining_lease,resale_price
0,2015-01,ANG MO KIO,3 ROOM,174,ANG MO KIO AVE 4,07 TO 09,60.0,Improved,1986,70,255000.0
1,2015-01,ANG MO KIO,3 ROOM,541,ANG MO KIO AVE 10,01 TO 03,68.0,New Generation,1981,65,275000.0
2,2015-01,ANG MO KIO,3 ROOM,163,ANG MO KIO AVE 4,01 TO 03,69.0,New Generation,1980,64,285000.0
3,2015-01,ANG MO KIO,3 ROOM,446,ANG MO KIO AVE 10,01 TO 03,68.0,New Generation,1979,63,290000.0
4,2015-01,ANG MO KIO,3 ROOM,557,ANG MO KIO AVE 10,07 TO 09,68.0,New Generation,1980,64,290000.0


In [7]:
df_prices2017.head()

Unnamed: 0,month,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,lease_commence_date,remaining_lease,resale_price
0,2017-01,ANG MO KIO,2 ROOM,406,ANG MO KIO AVE 10,10 TO 12,44.0,Improved,1979,61 years 04 months,232000.0
1,2017-01,ANG MO KIO,3 ROOM,108,ANG MO KIO AVE 4,01 TO 03,67.0,New Generation,1978,60 years 07 months,250000.0
2,2017-01,ANG MO KIO,3 ROOM,602,ANG MO KIO AVE 5,01 TO 03,67.0,New Generation,1980,62 years 05 months,262000.0
3,2017-01,ANG MO KIO,3 ROOM,465,ANG MO KIO AVE 10,04 TO 06,68.0,New Generation,1980,62 years 01 month,265000.0
4,2017-01,ANG MO KIO,3 ROOM,601,ANG MO KIO AVE 5,01 TO 03,67.0,New Generation,1980,62 years 05 months,265000.0


**Dataset:**
| Feature | Description |
| :--- | :--- |
| __month__ | Data collection |
| __town__ | Place of the building |
| __flat_type__ | Number of rooms |
| __block__ | Building number |
| __street_name__ | Address of the building |
| __storey_range__ | Available floors |
| __floor_area_sqm__ | Floor area including recess area, roof_terrace, etc. |
| __flat_model__ | Type of flat |
| __lease_commence_date__ | Starting period of lease (year) |
| __remaining_lease__ | The number of years, months and days left before the lease expires |
| __resale_price__ | resale price is agreed between buyers and sellers are dependent on many factors |

# 3. Data Preparation

In [8]:
df_prices2015 = df_prices2015.drop("remaining_lease", axis=1)
df_prices2017 = df_prices2017.drop("remaining_lease", axis=1)
df_prices2017.head()

Unnamed: 0,month,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,lease_commence_date,resale_price
0,2017-01,ANG MO KIO,2 ROOM,406,ANG MO KIO AVE 10,10 TO 12,44.0,Improved,1979,232000.0
1,2017-01,ANG MO KIO,3 ROOM,108,ANG MO KIO AVE 4,01 TO 03,67.0,New Generation,1978,250000.0
2,2017-01,ANG MO KIO,3 ROOM,602,ANG MO KIO AVE 5,01 TO 03,67.0,New Generation,1980,262000.0
3,2017-01,ANG MO KIO,3 ROOM,465,ANG MO KIO AVE 10,04 TO 06,68.0,New Generation,1980,265000.0
4,2017-01,ANG MO KIO,3 ROOM,601,ANG MO KIO AVE 5,01 TO 03,67.0,New Generation,1980,265000.0


In [9]:
prices = pd.concat([df_prices1990, df_prices2000, df_prices2012, df_prices2015, df_prices2017], axis=0, ignore_index=True, sort=False)
prices

Unnamed: 0,month,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,lease_commence_date,resale_price
0,1990-01,ANG MO KIO,1 ROOM,309,ANG MO KIO AVE 1,10 TO 12,31.0,IMPROVED,1977,9000.0
1,1990-01,ANG MO KIO,1 ROOM,309,ANG MO KIO AVE 1,04 TO 06,31.0,IMPROVED,1977,6000.0
2,1990-01,ANG MO KIO,1 ROOM,309,ANG MO KIO AVE 1,10 TO 12,31.0,IMPROVED,1977,8000.0
3,1990-01,ANG MO KIO,1 ROOM,309,ANG MO KIO AVE 1,07 TO 09,31.0,IMPROVED,1977,6000.0
4,1990-01,ANG MO KIO,3 ROOM,216,ANG MO KIO AVE 1,04 TO 06,73.0,NEW GENERATION,1976,47200.0
...,...,...,...,...,...,...,...,...,...,...
925052,2024-03,YISHUN,EXECUTIVE,824,YISHUN ST 81,07 TO 09,142.0,Apartment,1987,855000.0
925053,2024-04,YISHUN,EXECUTIVE,826,YISHUN ST 81,04 TO 06,146.0,Maisonette,1988,900000.0
925054,2024-04,YISHUN,EXECUTIVE,836,YISHUN ST 81,04 TO 06,142.0,Apartment,1988,805000.0
925055,2024-02,YISHUN,MULTI-GENERATION,666,YISHUN AVE 4,04 TO 06,164.0,Multi Generation,1987,998000.0


In [10]:
prices.shape

(925057, 10)

In [11]:
prices.describe()

Unnamed: 0,floor_area_sqm,lease_commence_date,resale_price
count,925057.0,925057.0,925057.0
mean,95.691869,1988.25379,320108.6
std,25.828512,10.639862,169859.4
min,28.0,1966.0,5000.0
25%,73.0,1981.0,193000.0
50%,93.0,1986.0,296000.0
75%,113.0,1996.0,416000.0
max,307.0,2022.0,1568888.0


In [12]:
prices.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 925057 entries, 0 to 925056
Data columns (total 10 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   month                925057 non-null  object 
 1   town                 925057 non-null  object 
 2   flat_type            925057 non-null  object 
 3   block                925057 non-null  object 
 4   street_name          925057 non-null  object 
 5   storey_range         925057 non-null  object 
 6   floor_area_sqm       925057 non-null  float64
 7   flat_model           925057 non-null  object 
 8   lease_commence_date  925057 non-null  int64  
 9   resale_price         925057 non-null  float64
dtypes: float64(2), int64(1), object(7)
memory usage: 70.6+ MB


In [13]:
prices.isnull().sum()

month                  0
town                   0
flat_type              0
block                  0
street_name            0
storey_range           0
floor_area_sqm         0
flat_model             0
lease_commence_date    0
resale_price           0
dtype: int64

- The dataset consists of **925057** records and **10** features.
- The datatypes seen are **object, float64, int64**.
- `remaining_lease` has lots of NAs. They are only available after 2015 sales onwards. Hence, dropped.
- Other features doesn't have any missing values.
- Categorical variables are *month, town, flat_type, storey_range, flat_model*.
- Continous variables are *floor_area_sqm, lease_commence_date, resale_price*.

In [14]:
prices.columns

Index(['month', 'town', 'flat_type', 'block', 'street_name', 'storey_range',
       'floor_area_sqm', 'flat_model', 'lease_commence_date', 'resale_price'],
      dtype='object')

## 3.1 Feature Selection
Irrelevant features removal

In [15]:
df1 = prices.drop(["month", "block", "street_name"], axis= 1)
df1.head()

Unnamed: 0,town,flat_type,storey_range,floor_area_sqm,flat_model,lease_commence_date,resale_price
0,ANG MO KIO,1 ROOM,10 TO 12,31.0,IMPROVED,1977,9000.0
1,ANG MO KIO,1 ROOM,04 TO 06,31.0,IMPROVED,1977,6000.0
2,ANG MO KIO,1 ROOM,10 TO 12,31.0,IMPROVED,1977,8000.0
3,ANG MO KIO,1 ROOM,07 TO 09,31.0,IMPROVED,1977,6000.0
4,ANG MO KIO,3 ROOM,04 TO 06,73.0,NEW GENERATION,1976,47200.0


## 3.2 Encoding

In [16]:
df1["town"].unique()

array(['ANG MO KIO', 'BEDOK', 'BISHAN', 'BUKIT BATOK', 'BUKIT MERAH',
       'BUKIT TIMAH', 'CENTRAL AREA', 'CHOA CHU KANG', 'CLEMENTI',
       'GEYLANG', 'HOUGANG', 'JURONG EAST', 'JURONG WEST',
       'KALLANG/WHAMPOA', 'MARINE PARADE', 'QUEENSTOWN', 'SENGKANG',
       'SERANGOON', 'TAMPINES', 'TOA PAYOH', 'WOODLANDS', 'YISHUN',
       'LIM CHU KANG', 'SEMBAWANG', 'BUKIT PANJANG', 'PASIR RIS',
       'PUNGGOL'], dtype=object)

In [17]:
df1["flat_type"].unique()

array(['1 ROOM', '3 ROOM', '4 ROOM', '5 ROOM', '2 ROOM', 'EXECUTIVE',
       'MULTI GENERATION', 'MULTI-GENERATION'], dtype=object)

In [18]:
df1.replace("MULTI-GENERATION", "MULTI GENERATION", inplace=True)

In [19]:
df1["flat_type"].unique()

array(['1 ROOM', '3 ROOM', '4 ROOM', '5 ROOM', '2 ROOM', 'EXECUTIVE',
       'MULTI GENERATION'], dtype=object)

In [20]:
df1["storey_range"].unique()

array(['10 TO 12', '04 TO 06', '07 TO 09', '01 TO 03', '13 TO 15',
       '19 TO 21', '16 TO 18', '25 TO 27', '22 TO 24', '28 TO 30',
       '31 TO 33', '40 TO 42', '37 TO 39', '34 TO 36', '06 TO 10',
       '01 TO 05', '11 TO 15', '16 TO 20', '21 TO 25', '26 TO 30',
       '36 TO 40', '31 TO 35', '46 TO 48', '43 TO 45', '49 TO 51'],
      dtype=object)

In [21]:
df1["flat_model"].unique()

array(['IMPROVED', 'NEW GENERATION', 'MODEL A', 'STANDARD', 'SIMPLIFIED',
       'MODEL A-MAISONETTE', 'APARTMENT', 'MAISONETTE', 'TERRACE',
       '2-ROOM', 'IMPROVED-MAISONETTE', 'MULTI GENERATION',
       'PREMIUM APARTMENT', 'Improved', 'New Generation', 'Model A',
       'Standard', 'Apartment', 'Simplified', 'Model A-Maisonette',
       'Maisonette', 'Multi Generation', 'Adjoined flat',
       'Premium Apartment', 'Terrace', 'Improved-Maisonette',
       'Premium Maisonette', '2-room', 'Model A2', 'DBSS', 'Type S1',
       'Type S2', 'Premium Apartment Loft', '3Gen'], dtype=object)

In [22]:
flat_model_replaces = {"NEW GENERATION": "New Generation", "MULTI GENERATION": "Multi Generation",
                       "IMPROVED":  "Improved", "STANDARD": "Standard",
                       "SIMPLIFIED": "Simplified", "MODEL A-MAISONETTE": "Model A-Maisonette",
                       "APARTMENT": "Apartment", "MAISONETTE": "Maisonette",
                       "TERRACE": "Terrace", "2-ROOM": "2-room", "IMPROVED-MAISONETTE": "Improved-Maisonette",
                       "PREMIUM APARTMENT": "Premium Apartment", "MODEL A": "Model A"}

df1 = df1.replace({'flat_model': flat_model_replaces})

In [23]:
df1["flat_model"].unique()

array(['Improved', 'New Generation', 'Model A', 'Standard', 'Simplified',
       'Model A-Maisonette', 'Apartment', 'Maisonette', 'Terrace',
       '2-room', 'Improved-Maisonette', 'Multi Generation',
       'Premium Apartment', 'Adjoined flat', 'Premium Maisonette',
       'Model A2', 'DBSS', 'Type S1', 'Type S2', 'Premium Apartment Loft',
       '3Gen'], dtype=object)

In [24]:
# Label Encoding
df1["town"] = LabelEncoder().fit_transform(df1["town"])
df1["storey_range"] = LabelEncoder().fit_transform(df1["storey_range"])
df1["flat_model"] = LabelEncoder().fit_transform(df1["flat_model"])

# Map Method
flat_type_map = {'1 ROOM': 0, '2 ROOM': 1, '3 ROOM': 2, '4 ROOM': 3, '5 ROOM':4, 'EXECUTIVE': 5, 'MULTI GENERATION': 6}
df1["flat_type"] = df1["flat_type"].map(flat_type_map)

In [25]:
df1

Unnamed: 0,town,flat_type,storey_range,floor_area_sqm,flat_model,lease_commence_date,resale_price
0,0,0,5,31.0,5,1977,9000.0
1,0,0,2,31.0,5,1977,6000.0
2,0,0,5,31.0,5,1977,8000.0
3,0,0,4,31.0,5,1977,6000.0
4,0,2,2,73.0,12,1976,47200.0
...,...,...,...,...,...,...,...
925052,26,5,4,142.0,3,1987,855000.0
925053,26,5,2,146.0,7,1988,900000.0
925054,26,5,2,142.0,3,1988,805000.0
925055,26,6,2,164.0,11,1987,998000.0


In [26]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 925057 entries, 0 to 925056
Data columns (total 7 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   town                 925057 non-null  int32  
 1   flat_type            925057 non-null  int64  
 2   storey_range         925057 non-null  int32  
 3   floor_area_sqm       925057 non-null  float64
 4   flat_model           925057 non-null  int32  
 5   lease_commence_date  925057 non-null  int64  
 6   resale_price         925057 non-null  float64
dtypes: float64(2), int32(3), int64(2)
memory usage: 38.8 MB


All the categories are encoded for model.

## 3.3 Skewness

In [27]:
# Get continuous features
continuous_features = list(df1.columns)
continuous_features

['town',
 'flat_type',
 'storey_range',
 'floor_area_sqm',
 'flat_model',
 'lease_commence_date',
 'resale_price']

In [28]:
# Measure skewness in the numerical features
numerical_features = df1[continuous_features]  
skewness = numerical_features.skew()
print(skewness)

town                  -0.085713
flat_type              0.388498
storey_range           1.101257
floor_area_sqm         0.366345
flat_model             0.499884
lease_commence_date    0.644231
resale_price           0.937800
dtype: float64


Moderate skewness is observed in all features.

## 3.4 Feature Scaling
Scaling is required for linear regression.

In [29]:
scaler = StandardScaler()

# Fit continuous columns and transform
scaled_columns = ["floor_area_sqm", "resale_price"]

scaler.fit(df1[scaled_columns])

scaled_df = pd.DataFrame(scaler.transform(df1[scaled_columns]), index=df1.index, columns=scaled_columns)

df2 = df1.copy()
df2 = df2.drop(["floor_area_sqm", "resale_price"], axis=1)
scaled_df = pd.concat([df2, scaled_df], axis= 1)

scaled_df


Unnamed: 0,town,flat_type,storey_range,flat_model,lease_commence_date,floor_area_sqm,resale_price
0,0,0,5,5,1977,-2.504670,-1.831567
1,0,0,2,5,1977,-2.504670,-1.849228
2,0,0,5,5,1977,-2.504670,-1.837454
3,0,0,4,5,1977,-2.504670,-1.849228
4,0,2,2,12,1976,-0.878559,-1.606675
...,...,...,...,...,...,...,...
925052,26,5,4,3,1987,1.792908,3.149027
925053,26,5,2,7,1988,1.947776,3.413952
925054,26,5,2,3,1988,1.792908,2.854666
925055,26,6,2,11,1987,2.644681,3.990901


## 3.4 Outliers Treatment

In [30]:
def count_outliers(df):
    Q1 = df[continuous_features].quantile(0.25)
    Q3 = df[continuous_features].quantile(0.75)
    IQR = Q3 - Q1
    outliers_count_specified = ((df[continuous_features] < (Q1 - 1.5 * IQR)) | (df[continuous_features] > (Q3 + 1.5 * IQR))).sum()
    return outliers_count_specified

In [36]:
count_outliers(scaled_df)

town                       0
flat_type                  0
storey_range           26090
floor_area_sqm          2404
flat_model                 0
lease_commence_date     2380
resale_price           18950
dtype: int64

In [38]:
def remove_outliers(df):
    """Interquartile Range (IQR) method based outlier removal"""
    for column in df.select_dtypes(include="number").columns:
        percentile25 = df[column].quantile(0.25) # Q1
        percentile75 = df[column].quantile(0.75) # Q3
        iqr = percentile75-percentile25
        upper_lt = percentile75 + 1.5*iqr
        lower_lt = percentile25 - 1.5*iqr
        df[column] = df[column].clip(lower_lt, upper_lt)
    return df

In [41]:
df2 = remove_outliers(df1)
df2

Unnamed: 0,town,flat_type,storey_range,floor_area_sqm,flat_model,lease_commence_date,resale_price
0,0,0,5.0,31.0,5,1977.0,9000.0
1,0,0,2.0,31.0,5,1977.0,6000.0
2,0,0,5.0,31.0,5,1977.0,8000.0
3,0,0,4.0,31.0,5,1977.0,6000.0
4,0,2,2.0,73.0,12,1976.0,47200.0
...,...,...,...,...,...,...,...
925052,26,5,4.0,142.0,3,1987.0,750500.0
925053,26,5,2.0,146.0,7,1988.0,750500.0
925054,26,5,2.0,142.0,3,1988.0,750500.0
925055,26,6,2.0,164.0,11,1987.0,750500.0


# 4. EDA (Exploratory Data Analysis)

In [32]:
df1

Unnamed: 0,town,flat_type,storey_range,floor_area_sqm,flat_model,lease_commence_date,resale_price
0,0,0,5,31.0,5,1977,9000.0
1,0,0,2,31.0,5,1977,6000.0
2,0,0,5,31.0,5,1977,8000.0
3,0,0,4,31.0,5,1977,6000.0
4,0,2,2,73.0,12,1976,47200.0
...,...,...,...,...,...,...,...
925052,26,5,4,142.0,3,1987,855000.0
925053,26,5,2,146.0,7,1988,900000.0
925054,26,5,2,142.0,3,1988,805000.0
925055,26,6,2,164.0,11,1987,998000.0


# 5. Model

In [33]:
# Find the best prediction model with MSE, RMSE, R^2, MAE.
 
def evaluate_Reg_model(df: pd.DataFrame, algorithms: list):
    """
    Evaluate various regression models based on MSE, RMSE, R^2, MAE.

    Args:
        df: The input dataframe containing features and target variables
        algorithm: A list of regression model classes.

    Returns:
        pd.DataFrame: A df summarizing the performance metrics. 
    """

    results = []
    for algorithm in algorithms:
        x = df.drop("resale_price", axis= 1)
        y = df["resale_price"]
        x_train, x_test, y_train, y_test = train_test_split(x, y, test_size= 0.2, random_state=42)

        model = algorithm().fit(x_train, y_train)
        y_pred = model.predict(x_test)

        mse = mean_squared_error(y_test, y_pred)
        rmse = np.sqrt(mse)
        r2 = r2_score(y_test, y_pred)
        mae = mean_absolute_error(y_test, y_pred)

        metrics = {"Model": algorithm.__name__,
                   "MSE": mse,
                   "RMSE": rmse,
                   "R^2": r2,
                   "MAE": mae}
        results.append(metrics)

    # Convert dictionary to dataframe
    df = pd.DataFrame(results).round(4)
    
    return df

In [42]:
# Evaluate regression models
reg_algorithms = [DecisionTreeRegressor, ExtraTreesRegressor, RandomForestRegressor, LinearRegression]

df_reg_performance = evaluate_Reg_model(df2, reg_algorithms)
df_reg_performance

Unnamed: 0,Model,MSE,RMSE,R^2,MAE
0,DecisionTreeRegressor,10377190000.0,101868.4775,0.6081,79347.7058
1,ExtraTreesRegressor,10342070000.0,101695.9469,0.6094,79266.2683
2,RandomForestRegressor,10264080000.0,101311.7861,0.6124,79124.5676
3,LinearRegression,13471000000.0,116064.6312,0.4913,92089.9158


## 5.1 Linear Regression

In [35]:
# # Train the model and predict "resale_price"
# lr_model = scaled_df
# x = lr_model.drop("resale_price", axis= 1)
# y = lr_model["resale_price"]

# x_train, x_test, y_train, y_test = train_test_split(x, y, test_size= 0.2, random_state=42)

# model = LinearRegression().fit(x_train, y_train)
# y_pred_train = model.predict(x_train)
# y_pred_test = model.predict(x_test)

# mse = mean_squared_error(y_test, y_pred_test)
# rmse = np.sqrt(mse)
# r2 = r2_score(y_test, y_pred_test)
# mae = mean_absolute_error(y_test, y_pred_test)

# metrics = {"Model": "Linear Regression",
#             "MSE": [mse],
#             "RMSE": [rmse],
#             "R^2": [r2],
#             "MAE": [mae]}

# index = ["Model", "MSE", "RMSE", "R^2", "MAE"]

# # Convert dictionary to dataframe
# metrics_df = pd.DataFrame.from_dict(metrics, orient= "columns")
# print(metrics_df)

## 5.