In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from sklearn.preprocessing import scale

In [2]:
csv_in = 'condo.csv'
df = pd.read_csv(csv_in, sep=',', skiprows=0, header=0, encoding='shift-jis')

In [3]:
print(df.shape)
print(df.info())
display(df.head())

(25, 9)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 9 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   No         25 non-null     int64 
 1   minutes    25 non-null     int64 
 2   Price      25 non-null     int64 
 3   Area       25 non-null     int64 
 4   Year       25 non-null     int64 
 5   Structure  25 non-null     object
 6   BCR        25 non-null     int64 
 7   FAR        25 non-null     int64 
 8   Reformed   25 non-null     int64 
dtypes: int64(8), object(1)
memory usage: 1.9+ KB
None


Unnamed: 0,No,minutes,Price,Area,Year,Structure,BCR,FAR,Reformed
0,1,3,190000000,70,1,ＲＣ,80,500,0
1,2,5,350000000,200,39,ＳＲＣ,60,400,0
2,4,4,38000000,30,16,ＲＣ,80,700,0
3,5,2,30000000,20,17,ＳＲＣ,80,700,0
4,6,4,29000000,25,14,ＲＣ,80,600,0


In [4]:
X = df.drop(columns=['No', 'Price'])
y = df['Price']
print(X.shape)
display(X.head())
print(y.shape)
print(y.head())

(25, 7)


Unnamed: 0,minutes,Area,Year,Structure,BCR,FAR,Reformed
0,3,70,1,ＲＣ,80,500,0
1,5,200,39,ＳＲＣ,60,400,0
2,4,30,16,ＲＣ,80,700,0
3,2,20,17,ＳＲＣ,80,700,0
4,4,25,14,ＲＣ,80,600,0


(25,)
0    190000000
1    350000000
2     38000000
3     30000000
4     29000000
Name: Price, dtype: int64


In [5]:
X_dumm = pd.get_dummies(X, drop_first=True, dtype='uint8')
print('X_dumm:', X_dumm.shape)
display(X_dumm.head())

X_dumm: (25, 7)


Unnamed: 0,minutes,Area,Year,BCR,FAR,Reformed,Structure_ＳＲＣ
0,3,70,1,80,500,0,0
1,5,200,39,60,400,0,1
2,4,30,16,80,700,0,0
3,2,20,17,80,700,0,1
4,4,25,14,80,600,0,0


In [6]:
# 定数項追加・重回帰分析（標準化なし）
X_dumm_c = sm.add_constant(X_dumm)
model = sm.OLS(y, X_dumm_c)
results = model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                  Price   R-squared:                       0.950
Model:                            OLS   Adj. R-squared:                  0.930
Method:                 Least Squares   F-statistic:                     46.51
Date:                Mon, 22 Sep 2025   Prob (F-statistic):           7.36e-10
Time:                        17:28:06   Log-Likelihood:                -450.39
No. Observations:                  25   AIC:                             916.8
Df Residuals:                      17   BIC:                             926.5
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
const          7.574e+07   9.08e+07      0.834

In [7]:
print('R2:', results.rsquared)
print('Adj R2:', results.rsquared_adj)
print(results.params)

R2: 0.9503795837148971
Adj R2: 0.9299476475975017
const            7.573668e+07
minutes         -2.274047e+06
Area             1.775590e+06
Year            -9.504725e+05
BCR             -2.994994e+05
FAR             -6.894131e+04
Reformed         7.205383e+06
Structure_ＳＲＣ    4.965924e+05
dtype: float64


In [8]:
# 標準化
X_scaled_ar = scale(X_dumm)
y_scaled_ar = scale(y)

In [9]:
X_scaled = pd.DataFrame(X_scaled_ar, columns=X_dumm.columns)
y_scaled = pd.Series(y_scaled_ar, name=y.name)
model = sm.OLS(y_scaled, X_scaled)
results_scaled = model.fit()
print(results_scaled.summary())

                                 OLS Regression Results                                
Dep. Variable:                  Price   R-squared (uncentered):                   0.950
Model:                            OLS   Adj. R-squared (uncentered):              0.931
Method:                 Least Squares   F-statistic:                              49.25
Date:                Mon, 22 Sep 2025   Prob (F-statistic):                    1.85e-10
Time:                        17:28:15   Log-Likelihood:                          2.0684
No. Observations:                  25   AIC:                                      9.863
Df Residuals:                      18   BIC:                                      18.40
Df Model:                           7                                                  
Covariance Type:            nonrobust                                                  
                    coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------

In [10]:
print(results_scaled.params.sort_values(key=np.abs, ascending=False))

Area             0.962703
Year            -0.172663
FAR             -0.101733
minutes         -0.051779
Reformed         0.036470
BCR             -0.022436
Structure_ＳＲＣ    0.003291
dtype: float64


In [11]:
# Structure_SRC: SRC=1, RC=0
# Reformed: 1=リフォームあり, 0=なし
new_data = pd.DataFrame({
    'minutes': [5, 15],
    'Area': [65, 75],
    'Year': [4, 10],
    'BCR': [75, 65],
    'FAR': [510, 410],
    'Reformed': [1, 0],
    'Structure_SRC': [1, 0]
})

for col in X_dumm.columns:
    if col not in new_data.columns:
        new_data[col] = 0
new_data = new_data[X_dumm.columns]
print(new_data)

   minutes  Area  Year  BCR  FAR  Reformed  Structure_ＳＲＣ
0        5    65     4   75  510         1              0
1       15    75    10   65  410         0              0


In [13]:
new_data_c = sm.add_constant(new_data)
pred_price = results.predict(new_data_c)
for i, price in enumerate(pred_price):
    print(f'{i+1}行目: {price:.2e}')

1行目: 1.26e+08
2行目: 1.18e+08
