In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from sklearn.preprocessing import scale
from sklearn.metrics import mean_squared_error

In [2]:
data = "../7/boston_small.csv"
df = pd.read_csv(data,skiprows=8)
display((df.head()))

Unnamed: 0,CRIM,INDUS,RM,MEDV
0,0.00632,2.31,6.575,24.0
1,0.02731,7.07,6.421,21.6
2,0.02729,7.07,7.185,34.7
3,0.03237,2.18,6.998,33.4
4,0.06905,2.18,7.147,36.2


In [3]:
X = df[['CRIM', 'INDUS', 'RM']]
y = df['MEDV']

In [4]:
X_c = sm.add_constant(X)
model = sm.OLS(y, X_c)
results = model.fit()

In [5]:
print(f"\n自由度調整済み決定係数: {results.rsquared_adj:.3f}")


自由度調整済み決定係数: 0.807


In [6]:
crim_coef = results.params['CRIM']
print(f"CRIMの偏回帰係数: {crim_coef:.3f}")
print(f"CRIMの値を1増やした場合の目的変数の増分: {crim_coef:.3f}")

CRIMの偏回帰係数: 0.296
CRIMの値を1増やした場合の目的変数の増分: 0.296


In [7]:
test_data = pd.DataFrame({
    'CRIM': [0.1],
    'INDUS': [4.0],
    'RM': [7.0]
})

test_data_c = sm.add_constant(test_data, has_constant='add')
prediction = results.predict(test_data_c)
print(f"CRIM=0.1, INDUS=4.0, RM=7.0のときのMEDVの予測値: {prediction[0]:.1f}")

CRIM=0.1, INDUS=4.0, RM=7.0のときのMEDVの予測値: 32.2


In [8]:
X_scaled_array = scale(X)
y_scaled_array = scale(y)

X_scaled = pd.DataFrame(X_scaled_array, columns=X.columns)
y_scaled = pd.Series(y_scaled_array, name=y.name)

In [9]:
model_scaled = sm.OLS(y_scaled, X_scaled)
results_scaled = model_scaled.fit()

print(results_scaled.summary())

                                 OLS Regression Results                                
Dep. Variable:                   MEDV   R-squared (uncentered):                   0.809
Model:                            OLS   Adj. R-squared (uncentered):              0.807
Method:                 Least Squares   F-statistic:                              418.6
Date:                Mon, 23 Jun 2025   Prob (F-statistic):                   2.62e-106
Time:                        10:51:01   Log-Likelihood:                         -177.57
No. Observations:                 300   AIC:                                      361.1
Df Residuals:                     297   BIC:                                      372.3
Df Model:                           3                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [11]:
coef_abs = results_scaled.params.abs().sort_values(ascending=False)
print(coef_abs)
print(f"\n目的変数への影響が最も大きい説明変数: {coef_abs.index[0]}")

RM       0.879515
INDUS    0.059635
CRIM     0.022335
dtype: float64

目的変数への影響が最も大きい説明変数: RM
