In [1]:
import pandas as pd
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

In [126]:
df = pd.read_csv("data_merge.csv")
df['인구밀도'] = df['인구밀도'].str.replace(',', '').astype(float)
df['주유소_LPG개수'] = df['주유소_개수'] + df['LPG충전소_개수']


# 주유소_최소3개평균	주유소_최소5개평균	lpg_최소3개평균	lpg_최소5개평균	
X = df[['인구밀도', '혼잡시간강도', '공시지가중앙값', '상업수']]
# X = df[['총교통량', '버스교통량', '화물차교통량', '인구밀도', '혼잡시간강도', '공시지가중앙값', '교육', '상업수']]
y = df['주유소_최소5개평균']

In [127]:
df['주유소_LPG개수']

0       2
1       2
2       0
3       0
4       0
       ..
199    18
200    28
201     8
202     3
203     5
Name: 주유소_LPG개수, Length: 204, dtype: int64

In [128]:
X = sm.add_constant(X)

In [129]:
model = sm.OLS(y, X).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:             주유소_최소5개평균   R-squared:                       0.384
Model:                            OLS   Adj. R-squared:                  0.371
Method:                 Least Squares   F-statistic:                     30.98
Date:                Thu, 14 Nov 2024   Prob (F-statistic):           4.74e-20
Time:                        16:54:36   Log-Likelihood:                -1619.6
No. Observations:                 204   AIC:                             3249.
Df Residuals:                     199   BIC:                             3266.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const       2712.4038    171.405     15.825      0.0

In [51]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.22, random_state=0)
model = LinearRegression()
model.fit(X_train, y_train)

# 결과 확인
print("회귀계수:", model.coef_)
print("절편:", model.intercept_)
print("R^2:", model.score(X_test, y_test))

회귀계수: [-0.85552208  2.38345769 -4.79801001 -7.82618098]
절편: 6358.883748400931
R^2: 0.23067740402297088


In [6]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from scipy import stats

In [52]:
model = LinearRegression()
model.fit(X, y)

# 예측값과 잔차 계산
y_pred = model.predict(X)
residuals = y - y_pred

# 표준 오차 계산
sse = np.sum(residuals**2)  # Sum of Squared Errors
sst = np.sum((y - np.mean(y))**2)  # Total Sum of Squares
mse = sse / (len(y) - len(X.columns) - 1)  # Mean Squared Error
se = np.sqrt(np.diagonal(mse * np.linalg.inv(X.T @ X)))  # Standard Error of coefficients

# t값과 p-value 계산
t_values = model.coef_ / se
p_values = [2 * (1 - stats.t.cdf(np.abs(t), df=len(X) - len(X.columns) - 1)) for t in t_values]

# 결과 출력
print("Coefficients:", model.coef_)
print("p-values:", p_values)

Coefficients: [ -1.00745739   2.6246415    9.16950183 -15.44740415]
p-values: [2.8071100999227383e-11, 7.599627949161913e-08, 0.4907008713736447, 0.08590559252293395]


In [142]:
seogu = df[df['구'] == '서구']

coefs = [-0.023, -19.1367, -0.0001, -0.1700]

seogu['입지선정지수'] = seogu['인구밀도'] * coefs[0] + seogu['혼잡시간강도'] * coefs[1] + seogu['공시지가중앙값'] * coefs[2] + seogu['상업수'] * coefs[3]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  seogu['입지선정지수'] = seogu['인구밀도'] * coefs[0] + seogu['혼잡시간강도'] * coefs[1] + seogu['공시지가중앙값'] * coefs[2] + seogu['상업수'] * coefs[3]


In [143]:
seogu['입지선정지수']

102   -2059.171684
103   -1766.302076
104   -2199.711195
105    -971.849631
106   -1338.511345
107   -1112.212793
108   -1737.284786
109   -1746.324786
110   -1748.504786
Name: 입지선정지수, dtype: float64

In [144]:
seogu

Unnamed: 0,지도코드,지도_동,시,구,동,행정코드,총교통량,개인승용차교통량,버스교통량,화물차교통량,...,부동산,소매,수리·개인,숙박,시설관리·임대,예술·스포츠,음식,상업수,주유소_LPG개수,입지선정지수
102,27170101,내당동,대구광역시,서구,내당동,2203051,8089.0,6807.0,437.5,844.0,...,71,444,256,17,50,61,531,1588,3,-2059.171684
103,27170102,비산동,대구광역시,서구,비산동,2203054,12772.2,10478.2,654.2,1640.2,...,68,663,366,44,61,92,884,2327,5,-1766.302076
104,27170103,평리동,대구광역시,서구,평리동,2203060,6963.8,5605.6,313.2,1045.2,...,97,557,358,40,88,105,830,2299,15,-2199.711195
105,27170104,상리동,대구광역시,서구,상중이동,2203066,21527.0,15375.0,391.0,5761.0,...,0,20,19,0,7,0,9,61,1,-971.849631
106,27170105,중리동,대구광역시,서구,상중이동,2203066,21527.0,15375.0,391.0,5761.0,...,26,231,94,3,54,28,181,725,6,-1338.511345
107,27170106,이현동,대구광역시,서구,상중이동,2203066,21527.0,15375.0,391.0,5761.0,...,10,88,94,0,31,6,56,320,11,-1112.212793
108,27170107,원대동1가,대구광역시,서구,원대동,2203068,8416.0,7047.0,386.0,984.0,...,8,45,22,0,1,2,42,133,1,-1737.284786
109,27170108,원대동2가,대구광역시,서구,원대동,2203068,8416.0,7047.0,386.0,984.0,...,0,10,7,0,2,1,24,57,0,-1746.324786
110,27170109,원대동3가,대구광역시,서구,원대동,2203068,8416.0,7047.0,386.0,984.0,...,9,62,26,3,9,7,81,213,0,-1748.504786


In [145]:
seogu = seogu.sort_values(by='입지선정지수', ascending=True)

In [146]:
seogu.to_csv("seogu_rank.csv", index=False, encoding='utf-8-sig')