# SKLEARN HOUSING DATA
## Train Models Notebook

In [None]:
import os
import pandas as pd

In [None]:
data_path = os.path.join('..', 'data')

In [None]:
X_train_data_path = os.path.join(data_path, 'X_train.csv')

In [None]:
y_train_data_path = os.path.join(data_path, 'y_train.csv')

In [None]:
X_train = pd.read_csv(X_train_data_path)
y_train = pd.read_csv(y_train_data_path)

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
lr = LinearRegression()
lr.fit(X_train, y_train)

In [None]:
from joblib import dump, load

In [None]:
os.getcwd()

In [None]:
model_path = os.path.join('..', 'models')

In [None]:
os.makedirs(model_path, exist_ok=True)

In [None]:
lr_model_name = os.path.join(model_path, 'lr.pkl')

In [None]:
dump(lr, lr_model_name)

In [None]:
from sklearn.ensemble import RandomForestRegressor

In [None]:
rf = RandomForestRegressor()

In [None]:
rf.fit(X_train, y_train.target)

In [None]:
rf_model_name = os.path.join(model_path, 'rf.pkl')

In [None]:
dump(rf, rf_model_name)

# Train Metrics

In [None]:
from sklearn.metrics import mean_squared_error

In [None]:
lr_train_preds = lr.predict(X_train)
rf_train_preds = rf.predict(X_train)

In [None]:
lr_mse = mean_squared_error(y_train, lr_train_preds)
rf_mse = mean_squared_error(y_train, rf_train_preds)

In [None]:
print(f"The MSE for the linear regression models is : {lr_mse: .2f}")
print(f"The MSE for the random forest regression models is : {rf_mse: .2f}")

In [None]:
import matplotlib.pyplot as plt

In [None]:
image_path = os.path.join('..', 'images')
os.makedirs(image_path, exist_ok=True)
error_plot_path = os.path.join(image_path, 'Train_Scatterplot.png')

In [None]:
plt.figure(figsize=(10,10))
plt.scatter(y_train, lr_train_preds, c='crimson', label='Linear Regression')
plt.scatter(y_train, rf_train_preds, c='gold', label='RF Regression')

plt.xlabel('True Values', fontsize=15)
plt.ylabel('Predictions', fontsize=15)
plt.title('Training Error', fontsize=15)

plt.legend()
plt.tight_layout()
plt.savefig(error_plot_path)
plt.show()