# Housing Regression Example
## Evaluate Models Notebook

In [None]:
import os
import pandas as pd
import seaborn as sns
from joblib import load
import matplotlib.pyplot as plt

In [None]:
# Check working dir
os.getcwd()

In [None]:
# Set data path, X and y test paths
data_path = os.path.join('..', 'data')
X_test_path = os.path.join(data_path, 'X_test.csv')
y_test_path = os.path.join(data_path, 'y_test.csv')

In [None]:
# Read in data
X_test = pd.read_csv(X_test_path)
y_test = pd.read_csv(y_test_path)

In [None]:
# Set model paths
models_path = os.path.join('..', 'models')
lr_model_path = os.path.join(models_path, 'lr.pkl')
rf_model_path = os.path.join(models_path, 'rf.pkl')

In [None]:
# Load models
lr = load(lr_model_path)
rf = load(rf_model_path)

In [None]:
# linear regression predict
lr_preds = lr.predict(X_test)
lr_preds

In [None]:
# random forest regression predict
rf_preds = rf.predict(X_test)
rf_preds

In [None]:
from sklearn.metrics import explained_variance_score
from sklearn.metrics import mean_squared_error

In [None]:
# Calculate explained variance for both models
lr_evs = explained_variance_score(y_test.target, lr_preds)
rf_evs = explained_variance_score(y_test.target, rf_preds)

In [None]:
# Display explained variance scores
print(f'The explained variance score for the linear regression models is: {lr_evs: .2f}')
print(f'The explained variance score for the random forest regression models is: {rf_evs: .2f}')

In [None]:
# Calculate mean squared error (MSE)
lr_mse = mean_squared_error(y_test.target, lr_preds)
rf_mse = mean_squared_error(y_test.target, rf_preds)

In [None]:
# Display MSE
print(f"The MSE for the linear regression models is : {lr_mse: .2f}")
print(f"The MSE for the random forest regression models is : {rf_mse: .2f}")

In [None]:
# Add predictions back to y df
y_test['lr_preds'] = lr_preds
y_test['rf_preds'] = rf_preds

In [None]:
# Check df
y_test.head()

In [None]:
# Get correlation across real, lr, and rf values
y_test.corr()

In [None]:
# Seaborn pair plot on y data
sns.pairplot(y_test)

In [None]:
# Create path for scatterplot
image_path = os.path.join('..', 'imgs')
os.makedirs(image_path, exist_ok=True)
error_plot_path = os.path.join(image_path, 'Test_Scatterplot.png')

In [None]:
# Plot results
plt.figure(figsize=(10,10))
plt.scatter(y_test.target, lr_preds, c='crimson', label='Linear Regression')
plt.scatter(y_test.target, rf_preds, c='gold', label='RF Regression')

plt.xlabel('True Values', fontsize=15)
plt.ylabel('Predictions', fontsize=15)
plt.title('Test Error', fontsize=15)

plt.legend()
plt.tight_layout()
plt.savefig(error_plot_path)
plt.show()