![Ames Housing dataset image](https://i.imgur.com/lTJVG4e.png)

# Imports

In [1]:
import pandas as pd
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

# Read Data and Set Dependent & Independent Variables

In [2]:
iowa_file_path = "01_input/train.csv"
home_data = pd.read_csv(iowa_file_path)
y = home_data.SalePrice
features = ["LotArea", "YearBuilt", "1stFlrSF", "2ndFlrSF", "FullBath", 
            "BedroomAbvGr", "TotRmsAbvGrd"]
X = home_data[features]

# Split Data to Train and Test Set

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                        train_size=0.8, test_size=0.2, random_state=1)

# Random Forest Models

In [4]:
model_1 = RandomForestRegressor(n_estimators=50, random_state=0)
model_2 = RandomForestRegressor(n_estimators=100, random_state=0)
model_3 = RandomForestRegressor(n_estimators=100, criterion='absolute_error', 
                                random_state=0)
model_4 = RandomForestRegressor(n_estimators=200, min_samples_split=20, 
                                random_state=0)
model_5 = RandomForestRegressor(n_estimators=100, max_depth=7, random_state=0)

models = [model_1, model_2, model_3, model_4, model_5]

# Choosing the Best Model

In [5]:
def score_model(model, X_train=X_train, X_test=X_test, y_train=y_train, 
                y_test=y_test):
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    return mean_absolute_error(y_test, predictions)

performances = {i: score_model(model=model) for i, model in enumerate(models, 1)}

for key in performances.keys():
    print(f"Model: {key}, MAE: {performances[key]:,.0f}")
    
best_model = min(performances, key=performances.get)
print(f"\n\nBest Model is model_{best_model}")

Model: 1, MAE: 22,075
Model: 2, MAE: 21,980
Model: 3, MAE: 22,457
Model: 4, MAE: 22,509
Model: 5, MAE: 22,439


Best Model is model_2


# Generating Test Predictions

In [6]:
best_model = models[best_model-1]
best_model.fit(X, y)

predictions = best_model.predict(X_test)