## Inferential Visualizations
- Look at feature loadings.
- Look at how accurate your predictions are.
- Is there a pattern to your errors? Consider reworking your model to address this.

In [165]:
# Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn import metrics

In [166]:
# Access scaled & Test/Train-Split variables from Notebooks 02 & 03
%store -r X_train
%store -r X_test
%store -r X_train_ss
%store -r X_test_ss
%store -r y_train
%store -r y_test

In [167]:
# List ID of X-features included in Linear Regression Model
features =['Year Remod/Add', 'Year Built', '1st Flr SF','Total Bsmt SF','Garage Area', 'Gr Liv Area', 'Overall Qual']

In [173]:
lm = LinearRegression()
lm.fit(X_train_ss,y_train)
# Create predictions using the `lm` object.
y_preds              = lm.predict(X_test_ss)
y_pred = [y_preds] #Create a list of y_pred, b/c I am trying to make it a column on the X_test_ss df, b/c I want to use sns.regplot since I don't know what to think of plt.plot, plt.scatter
X_test_ss['y_pred'] = y_pred #Also I dont know what to think of negative Sale Price on variance of error

IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

In [172]:
y_pred = [y_pred]
y_preds

[array([105137.77192132, 249978.21906679, 125312.98423593, 114964.42950248,
        175218.62207898, 288730.26959908, 218586.16060605, 207855.87168281,
        241605.61689414, 141252.99166151, 212632.98391164, 157883.75187766,
        104635.43594749, 144026.5205928 , 234426.41068941, 285324.38749139,
        222048.43079443, 236065.62319341, 229700.09501989, 305634.44466132,
        314793.9577425 , 178277.279669  , 137378.75698444, 121190.6223421 ,
        136150.75845182, 186854.70419825, 109550.35013586, 315127.27329162,
        222301.49429076, 111226.30801512, 128423.43914874, 217834.57896742,
        170896.22728631, 275612.51320745, 128082.64166066, 271465.42396115,
        226451.21004422, 154602.78613748, 133245.4351014 , 292219.22696936,
        179659.2607017 , 197870.97281502, 109284.74437308, 158670.74785306,
        250073.86885788, 124592.76896395, 157004.17958716, 125556.31554382,
        110894.01227698, 123859.84234251, 123588.15516003, 153878.7635706 ,
        1001

In [None]:
# Verify dimensions in X and y represent an equal number of observations in each, n = No. of rows
print('X:        ', X_test.shape) # X.shape equals (n,p)
print('y_actual: ', y_test.shape) # y.shape equals (n, null)
print('y_pred: ', y_pred.shape)

In [None]:
y_pred;

### LINE Assumptions: Linearity
- y is linearly related to X

In [None]:
y_pred.shape

In [None]:
X_test.shape

In [None]:
X_test[['y_pred']]= y_pred

In [None]:
# Plot the predictions to gain a sense of the accuracy of the model
plt.scatter(X_test[:,1], y_test, s = 1);
plt.plot(X_test[:,1], y_pred, color = 'orange');
#plt.xlim(0, 50)
#plt.ylim(0, 10);
plt.xlabel(f'X-feature: Year Built');
plt.ylabel('Predicted Sale Price');
plt.title(f'Is this model accurate?');

In [None]:
sns.regplot('temp', y_test, elec, ci=None, 
            scatter_kws = {'s': 1}, 
            line_kws = {'color': 'orange'})

### LINE Assumptions: Normality of Errors

In [None]:
# N - Normality of errors
resids = y_test - y_pred
plt.figure(figsize = (10, 5));
plt.hist(resids, bins=100);
plt.axvline(0,color='red')
plt.xlabel(f'Residuals: (y_test - y_pred),  \u03BC = {round(resids.mean(),2)}');
plt.ylabel('Frequency');
plt.title(f'Normality of Errors');
print(f'Mean of resids: {round(resids.mean(),2)}')

### LINE Assumptions: Equal Variance of Errors

In [None]:
# E-Equal Variance of Errors: errors should have consistent pattern for all X-values, resids should have no relationship w/ X
plt.scatter(y_pred, resids, s=1, )
plt.axhline(resids.mean(), color="orange");
plt.xlabel(f'Residuals: (y_test - y_pred),  \u03BC = {round(resids.mean(),2)}');
plt.ylabel(f'Predicted Sale Price');
plt.title(f'Equal Variance of Errors');

### Root Mean Squared Error

In [None]:
# Score it: Evaluate the model locally with training values of Sale Price
# Mean Squared Error (MSE)
MSE = metrics.mean_squared_error(y_test,y_pred)
print(f'MSE: {MSE:,.2f}')
# Root Mean Squared Error (RMSE)
RMSE = metrics.mean_squared_error(y_test,y_pred,squared=False)
print(f'RMSE: {RMSE:,.2f}')