# SKLEARN HOUSING DATA
## Evaluate Models Notebook

In [None]:
import os
import pandas as pd
import seaborn as sns
from joblib import load

In [None]:
data_path = os.path.join('..', 'data')

In [None]:
X_test_path = os.path.join(data_path, 'X_test.csv')

In [None]:
y_test_path = os.path.join(data_path, 'y_test.csv')

In [None]:
X_test = pd.read_csv(X_test_path)
y_test = pd.read_csv(y_test_path)

In [None]:
models_path = os.path.join('..', 'models')

In [None]:
lr_model_path = os.path.join(models_path, 'lr.pkl')
lr = load(lr_model_path)
lr_preds = lr.predict(X_test)

In [None]:
lr_preds

In [None]:
rf_model_path = os.path.join(models_path, 'rf.pkl')
rf = load(rf_model_path)
rf_preds = rf.predict(X_test)

In [None]:
rf_preds

In [None]:
from sklearn.metrics import explained_variance_score
lr_evs = explained_variance_score(y_test.target, lr_preds)
rf_evs = explained_variance_score(y_test.target, rf_preds)

In [None]:
print(f'The explained variance score for the linear regression models is: {lr_evs: .2f}')
print(f'The explained variance score for the random forest regression models is: {rf_evs: .2f}')

In [None]:
y_test['lr_preds'] = lr_preds
y_test['rf_preds'] = rf_preds

In [None]:
y_test

In [None]:
y_test.corr()

In [None]:
sns.pairplot(y_test)

In [None]:
import matplotlib.pyplot as plt

In [None]:
image_path = os.path.join('..', 'images')
os.makedirs(image_path, exist_ok=True)
error_plot_path = os.path.join(image_path, 'Test_Scatterplot.png')

In [None]:
plt.figure(figsize=(10,10))
plt.scatter(y_test.target, lr_preds, c='crimson', label='Linear Regression')
plt.scatter(y_test.target, rf_preds, c='gold', label='RF Regression')

plt.xlabel('True Values', fontsize=15)
plt.ylabel('Predictions', fontsize=15)
plt.title('Test Error', fontsize=15)

plt.legend()
plt.tight_layout()
plt.savefig(error_plot_path)
plt.show()

In [None]:
from sklearn.metrics import mean_squared_error

In [None]:
lr_mse = mean_squared_error(y_test.target, lr_preds)
rf_mse = mean_squared_error(y_test.target, rf_preds)

In [None]:
print(f"The MSE for the linear regression models is : {lr_mse: .2f}")
print(f"The MSE for the random forest regression models is : {rf_mse: .2f}")