# 第16章 モデルを評価する
## 16.1 残差

### データを読み込む

In [None]:
import polars as pl

housing = pl.read_csv("../data/housing_renamed.csv")
housing.head()

### データを学習する

In [None]:
import statsmodels
import statsmodels.api as sm
import statsmodels.formula.api as smf

# 学習
house1 = (
    smf.glm(
        "value_per_sq_ft ~ units + sq_ft + boro",
        data = housing
    )
    .fit()
)

# 学習結果
house1.summary()

### 残差曲線を表示する

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

fig, ax = plt.subplots()
sns.scatterplot(
    x = house1.fittedvalues,
    y = house1.resid_deviance,
    ax = ax
)

plt.show()

### 層別した残差曲線を描画する

In [None]:
# 層毎の色分け
color_dict = dict(
    {
        "Manhattan": "#d7191c",
        "Brooklyn": "#fdae61",
        "Queens": "#ffffbf",
        "Bronx": "#abdda4",
        "Staten Island": "#2b83ba",
    }
)

fig, ax = plt.subplots()
sns.scatterplot(
    x = house1.fittedvalues,
    y = house1.resid_deviance,
    hue = housing["boro"],
    ax = ax,
    palette = color_dict,
    edgecolor = "black",
)

plt.show()

### Q-Qプロット

In [None]:
from scipy import stats

resid = house1.resid_deviance.copy()

fig = statsmodels.graphics.gofplots.qqplot(resid, line = "r")
plt.show()


### 残差のヒストグラム

In [None]:
resid_std = stats.zscore(resid)

fig, ax = plt.subplots()
sns.histplot(resid_std, ax = ax)
plt.show()