In [15]:
import pandas as pd
import numpy as np

In [16]:
housing_data = pd.read_csv('./melb_data.csv')

In [26]:
x = housing_data.copy()
x['Price'].dropna(inplace=True)

y = housing_data['Price']
x.drop(columns=['Price'], inplace=True)

numerical_columns = [col for col in x.columns if housing_data[col].dtype in ['int64', 'float64']]
categorical_columns = [col for col in x.columns if housing_data[col].dtype == 'object']

good_categorical_columns = [col for col in categorical_columns if x[col].nunique() < 10]
bad_categorical_columns = [col for col in categorical_columns if x[col].nunique() >= 10]

In [45]:
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import SimpleImputer, IterativeImputer
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

x_train, x_validation, y_train, y_validation = train_test_split(x, y, test_size=0.2, train_size=0.8, random_state=0)

numerical_transformer = IterativeImputer()

categorical_transformer = Pipeline(steps=[('impute', SimpleImputer(strategy='most_frequent')), ('encode', OneHotEncoder(handle_unknown='ignore'))])

preprocessor = ColumnTransformer(transformers=[('numerical', numerical_transformer, numerical_columns), ('categorical', categorical_transformer, categorical_columns)])

model = RandomForestRegressor(random_state=0)

pipeline = Pipeline(steps=[('preprocess', preprocessor), ('model', model)])

pipeline.fit(x_train, y_train)

predictions = pipeline.predict(x_validation)

mean_absolute_error(predictions, y_validation)

164757.58282032402

In [48]:
from sklearn.model_selection import cross_val_score

scores = -1 * cross_val_score(pipeline, x, y, cv=5, scoring='neg_mean_absolute_error')

scores

array([202545.42027614, 187095.59981591, 185113.33544183, 153240.20760309,
       162059.47404639])