In [1]:
import pandas as pd
import numpy as np

from sklearn.ensemble import RandomForestRegressor
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import cross_val_score

np.random.seed(0)

In [2]:
data = pd.read_csv('melb.csv')

data.head()

Unnamed: 0,Suburb,Address,Rooms,Type,Price,Method,SellerG,Date,Distance,Postcode,...,Bathroom,Car,Landsize,BuildingArea,YearBuilt,CouncilArea,Lattitude,Longtitude,Regionname,Propertycount
0,Abbotsford,85 Turner St,2,h,1480000,S,Biggin,3/12/2016,2.5,3067,...,1,1.0,202,,,Yarra,-37.7996,144.9984,Northern Metropolitan,4019
1,Abbotsford,25 Bloomburg St,2,h,1035000,S,Biggin,4/2/2016,2.5,3067,...,1,0.0,156,79.0,1900.0,Yarra,-37.8079,144.9934,Northern Metropolitan,4019
2,Abbotsford,5 Charles St,3,h,1465000,SP,Biggin,4/3/2017,2.5,3067,...,2,0.0,134,150.0,1900.0,Yarra,-37.8093,144.9944,Northern Metropolitan,4019
3,Abbotsford,40 Federation La,3,h,850000,PI,Biggin,4/3/2017,2.5,3067,...,2,1.0,94,,,Yarra,-37.7969,144.9969,Northern Metropolitan,4019
4,Abbotsford,55a Park St,4,h,1600000,VB,Nelson,4/6/2016,2.5,3067,...,1,2.0,120,142.0,2014.0,Yarra,-37.8072,144.9941,Northern Metropolitan,4019


In [3]:
data.shape

(13580, 21)

In [4]:
y = data.Price

X_full = data.drop(['Price'], axis=1)

In [5]:
numerical_cols = [col for col in X_full.columns if X_full[col].dtype in ['int64', 'float64']]

categorical_cols = [col for col in X_full.columns if X_full[col].dtype == 'object']

In [6]:
my_cols = numerical_cols + categorical_cols

X = X_full[my_cols].copy()

In [7]:
numerical_transformer = SimpleImputer(strategy='median')

categorical_transformer = Pipeline(steps=[('imputer', SimpleImputer(strategy='most_frequent')), 
                                         ('onehot', OneHotEncoder(handle_unknown='ignore'))])

In [8]:
preprocessor = ColumnTransformer(transformers=[('num', numerical_cols, numerical_transformer), 
                                              ('cat', categorical_cols, categorical_cols)])

In [9]:
model = RandomForestRegressor(random_state=0)

my_pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('model', model)])

In [10]:
scores = -1 * cross_val_score(my_pipeline, X, y, cv=5, scoring='neg_mean_absolute_error')

scores

TypeError: All estimators should implement fit and transform, or can be 'drop' or 'passthrough' specifiers. '['Rooms', 'Distance', 'Postcode', 'Bedroom2', 'Bathroom', 'Car', 'Landsize', 'BuildingArea', 'YearBuilt', 'Lattitude', 'Longtitude', 'Propertycount']' (type <class 'list'>) doesn't.



array([nan, nan, nan, nan, nan])

In [None]:

import pandas as pd

from sklearn.ensemble import RandomForestRegressor
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import cross_val_score

melb = pd.read_csv('melb.csv')

X_full = melb.drop(['Price'], axis=1)

y = melb.Price

numericals = [col for col in X_full.columns if X_full[col].dtype in ['int64', 'float64']]

categoricals = [col for col in X_full.columns if X_full[col].dtype == 'object']
my_cols = numericals + categoricals

X = X_full[my_cols].copy()
numerical_transformer = SimpleImputer(strategy='median')

categorical_transformer = Pipeline(steps=[('imputer', SimpleImputer(strategy='most_frequent')), 
                                         ('onehot', OneHotEncoder(handle_unknown='ignore'))])
preprocessor = ColumnTransformer(transformers=[('num', numerical_transformer, numericals), 
                                              ('cat', categorical_transformer, categoricals)])
model = RandomForestRegressor(random_state=0)
model = RandomForestRegressor(random_state=0)
my_pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('model', model)])
scores = -1 * cross_val_score(my_pipeline, X, y, cv=5, scoring='neg_mean_absolute_error')
print('The scores are: ', scores)
print('Average score: ', scores.mean())