## Importing Libraries

In [1]:
import pandas as pd
import numpy as np

## Importing Dataset

In [2]:
dataset = pd.read_csv("melb_data.csv")
X = dataset.iloc[:,1:5].values
y = dataset.iloc[:,0].values

In [3]:
print(dataset.isna().sum())

Suburb             0
Type               0
Price              0
BuildingArea    6450
YearBuilt       5375
dtype: int64


In [4]:
print(X)

[['h' 1480000 nan nan]
 ['h' 1035000 79.0 1900.0]
 ['h' 1465000 150.0 1900.0]
 ...
 ['h' 1170000 nan 1997.0]
 ['h' 2500000 157.0 1920.0]
 ['h' 1285000 112.0 1920.0]]


In [5]:
print(y)

['Abbotsford' 'Abbotsford' 'Abbotsford' ... 'Williamstown' 'Williamstown'
 'Yarraville']


## Taking care of Missing Data

In [6]:
from sklearn.impute import SimpleImputer
imputer= SimpleImputer(missing_values=np.nan, strategy="mean")
imputer = imputer.fit(X[:,1:])
X[:,1:] = imputer.transform(X[:,1:])

In [7]:
print(X)

[['h' 1480000.0 151.96764988779805 1964.6842169408897]
 ['h' 1035000.0 79.0 1900.0]
 ['h' 1465000.0 150.0 1900.0]
 ...
 ['h' 1170000.0 151.96764988779805 1997.0]
 ['h' 2500000.0 157.0 1920.0]
 ['h' 1285000.0 112.0 1920.0]]


## Encoding Independent Variable

In [8]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
column_transformer = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [0])], remainder='passthrough')
X = np.array(column_transformer.fit_transform(X))

In [9]:
print(X)

[[1.0 0.0 0.0 1480000.0 151.96764988779805 1964.6842169408897]
 [1.0 0.0 0.0 1035000.0 79.0 1900.0]
 [1.0 0.0 0.0 1465000.0 150.0 1900.0]
 ...
 [1.0 0.0 0.0 1170000.0 151.96764988779805 1997.0]
 [1.0 0.0 0.0 2500000.0 157.0 1920.0]
 [1.0 0.0 0.0 1285000.0 112.0 1920.0]]


## Encoding Dependent Variable

In [10]:
from sklearn.preprocessing import LabelEncoder
labelencoder = LabelEncoder()
y = labelencoder.fit_transform(y)

In [11]:
print(y)

[  0   0   0 ... 305 305 313]


## Feature Scaling

In [12]:
from sklearn.preprocessing import StandardScaler
standardscaler = StandardScaler()
X[:, 3:] = standardscaler.fit_transform(X[:, 3:])

In [13]:
print(X)

[[1.0 0.0 0.0 0.6324480294931559 1.6894010452623133e-14
  -8.86069836744998e-12]
 [1.0 0.0 0.0 -0.06363975439906099 -0.1861474114371596 -2.232709157684046]
 [1.0 0.0 0.0 0.6089843963282496 -0.005019661915797484 -2.232709157684046]
 ...
 [1.0 0.0 0.0 0.14753294408509468 1.6894010452623133e-14
  1.115445902960865]
 [1.0 0.0 0.0 2.2279750847067765 0.012838003529970611 -1.5423679080665387]
 [1.0 0.0 0.0 0.32742079834937543 -0.10196127433568143
  -1.5423679080665387]]
