# Phase 2 Project Office Hours

## Building an Evaluation Function

In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

import statsmodels.api as sm
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

In [None]:
df = pd.read_csv('data/kc_house_data.csv')

In [None]:
df.head()

In [None]:
X = df.drop(columns='price')
y = df['price']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [None]:
lr_simple = sm.OLS(y_train, sm.add_constant(X_train['sqft_living'])).fit()

In [None]:
lr_simple.summary()

In [None]:
train_preds = lr_simple.predict(sm.add_constant(X_train['sqft_living']))
test_preds = lr_simple.predict(sm.add_constant(X_test['sqft_living']))

In [None]:
# Evaluate
print(f"Train R2 Score: {r2_score(y_train, train_preds):.4f}")
print(f"Train MAE Score: ${mean_absolute_error(y_train, train_preds):.4f}")
print(f"Train RMSE Score: ${mean_squared_error(y_train, train_preds, squared=False):.4f}")
print("*"*20)
print(f"Test R2 Score: {r2_score(y_test, test_preds):.4f}")
print(f"Test MAE Score: ${mean_absolute_error(y_test, test_preds):.4f}")
print(f"Test RMSE Score: ${mean_squared_error(y_test, test_preds, squared=False):.4f}")

In [None]:
# visualize residuals
plt.scatter(train_preds, y_train-train_preds, label='Train')
plt.scatter(test_preds, y_test-test_preds, label='Test')

plt.axhline(y=0, color = 'red', label = '0')
plt.xlabel('predictions')
plt.ylabel('residuals')
plt.legend()
plt.show()

## 3D Visualizations With Plotly

In [None]:
import plotly.express as px
fig = px.scatter_3d(df, x='sqft_living', y='bathrooms', z='price')
fig.show()