In [3]:
import orca
import numpy as np
import pandas as pd


from urbansim_templates import modelmanager
from urbansim_templates.models.regression import RandomForestRegressionStep


In [4]:
# load rental data
rental = pd.read_csv('data\\rentals_with_nodes.csv')
node_small = pd.read_csv('data\\nodessmall_vars.csv') 
node_walk = pd.read_csv('data\\nodeswalk_vars.csv') 

In [5]:
data = pd.merge(rental, node_small, left_on='node_id_small', right_on='osmid')
data = pd.merge(data, node_walk, left_on='node_id_walk', right_on='osmid')


In [6]:
# add columns
data['log_rent_sqft'] = np.log(data.rent_sqft)
data['log_units_500_walk'] = np.log(data.units_500_walk + 1)
data['log_rich_500_walk'] = np.log(data.rich_500_walk + 1)

In [7]:
# register data in orca
orca.add_table('rental_prices', data)

<orca.orca.DataFrameWrapper at 0x377eb38>

In [8]:
# create random forest step 
modelmanager.initialize()

No yaml files found in path 'configs'


In [9]:
m = RandomForestRegressionStep()
m.tables = 'rental_prices'

In [10]:
# define model expression
m.model_expression = 'log_rent_sqft ~ bedrooms + log_units_500_walk + log_rich_500_walk'
    

In [11]:
m.fit()

In [12]:
# create cross validation metrics
m.cross_validate_score()
m.cv_metric

{'mean_cross_validation': 0.03182563086816382,
 'mae_cross_validation': 0.10135065719788883}

In [13]:
# feature importance to identify main explanatory variables
m.importance

{'bedrooms': 0.2765170675043592,
 'log_units_500_walk': 0.3175431100409288,
 'log_rich_500_walk': 0.40593982245471205}

In [14]:
# add more features
data['log_singles_500_walk'] = np.log(data.singles_500_walk + 1)
data['log_children_500_walk'] = np.log(data.children_500_walk + 1)

In [15]:
m.model_expression = 'log_rent_sqft ~ bedrooms + log_units_500_walk + log_rich_500_walk + log_singles_500_walk + log_children_500_walk'

In [16]:
# create cross validation metrics with additional variables
m.cross_validate_score()
m.cv_metric

{'mean_cross_validation': 0.029222386766438384,
 'mae_cross_validation': 0.09636885589556099}

In [17]:
# features importances
m.fit()
m.importance

{'bedrooms': 0.2512610450803349,
 'log_units_500_walk': 0.11722934102167251,
 'log_rich_500_walk': 0.22012426195932022,
 'log_singles_500_walk': 0.2416436799256035,
 'log_children_500_walk': 0.1697416720130688}

In [18]:
m.name = 'random_forest_rental_prices'

In [19]:
modelmanager.register(m)

Saving 'random_forest_rental_prices.yaml': C:\Users\Xavier\Urbansim\udst\urbansim_templates_testing\urbansim_templates\urbansim_templates\tests\configs
Registering model step 'random_forest_rental_prices'
