In [163]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [164]:
# 1. 데이터 불러오기
url = "https://raw.githubusercontent.com/MyungKyuYi/AI-class/refs/heads/main/kc_house_data.csv"
df = pd.read_csv(url)
print("✅ 데이터 로딩 완료")
print(df.head())

✅ 데이터 로딩 완료
           id             date     price  bedrooms  bathrooms  sqft_living  \
0  7129300520  20141013T000000  221900.0         3       1.00         1180   
1  6414100192  20141209T000000  538000.0         3       2.25         2570   
2  5631500400  20150225T000000  180000.0         2       1.00          770   
3  2487200875  20141209T000000  604000.0         4       3.00         1960   
4  1954400510  20150218T000000  510000.0         3       2.00         1680   

   sqft_lot  floors  waterfront  view  ...  grade  sqft_above  sqft_basement  \
0      5650     1.0           0     0  ...      7        1180              0   
1      7242     2.0           0     0  ...      7        2170            400   
2     10000     1.0           0     0  ...      6         770              0   
3      5000     1.0           0     0  ...      7        1050            910   
4      8080     1.0           0     0  ...      8        1680              0   

   yr_built  yr_renovated  zipcode    

In [165]:
print(df.columns.tolist())  # 컬럼 이름 확인
print(df.dtypes)            # 데이터 타입 확인

['id', 'date', 'price', 'bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'waterfront', 'view', 'condition', 'grade', 'sqft_above', 'sqft_basement', 'yr_built', 'yr_renovated', 'zipcode', 'lat', 'long', 'sqft_living15', 'sqft_lot15']
id                 int64
date              object
price            float64
bedrooms           int64
bathrooms        float64
sqft_living        int64
sqft_lot           int64
floors           float64
waterfront         int64
view               int64
condition          int64
grade              int64
sqft_above         int64
sqft_basement      int64
yr_built           int64
yr_renovated       int64
zipcode            int64
lat              float64
long             float64
sqft_living15      int64
sqft_lot15         int64
dtype: object


In [166]:
# 문자열인 'date' 컬럼 제거
if 'date' in df.columns:
    df = df.drop(columns=['date'])

In [167]:
# 2. X, y 분리 (price가 예측할 대상)
X = df.drop(columns=['price'])
y = df['price']

In [168]:
# 3. 데이터 분할 (Train/Test)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

In [169]:
# 4. 정규화 (스케일링)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [170]:
# 5. 딥러닝 모델 구성
model = Sequential()
model.add(Dense(128, activation='relu', input_shape=(X_train.shape[1],)))
model.add(Dropout(0.3))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1)) # 회귀이므로 활성화함수 없음

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [171]:
# 6. 모델 컴파일
model.compile(optimizer='adam',loss='mse',metrics=['mae'])

In [172]:
# 7. 조기종료 콜백
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

In [173]:
# 8. 모델 학습
history = model.fit(
    X_train, y_train,
    validation_split=0.1,
    epochs=10,
    batch_size=32,
    callbacks=[early_stop],
    verbose=1
)

Epoch 1/10


2025-04-02 15:24:22.947544: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[1m487/487[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 20ms/step - loss: 400362176512.0000 - mae: 522177.2188 - val_loss: 97168637952.0000 - val_mae: 243669.8438
Epoch 2/10
[1m487/487[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 18ms/step - loss: 80436060160.0000 - mae: 211970.7656 - val_loss: 53722058752.0000 - val_mae: 173225.7031
Epoch 3/10
[1m487/487[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 18ms/step - loss: 53228326912.0000 - mae: 166425.1250 - val_loss: 44026994688.0000 - val_mae: 152853.8281
Epoch 4/10
[1m487/487[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 18ms/step - loss: 45876330496.0000 - mae: 147870.2656 - val_loss: 37747240960.0000 - val_mae: 136888.3906
Epoch 5/10
[1m487/487[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 19ms/step - loss: 39969783808.0000 - mae: 132814.3125 - val_loss: 34197397504.0000 - val_mae: 127006.0625
Epoch 6/10
[1m487/487[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 20ms/step - loss

In [189]:
# 9. 모델 평가
loss, mae = model.evaluate(X_test, y_test)
print(f"\n 🧪Test MAE (평균 절대 오차): {mae:.2f}")

[1m136/136[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 37034295296.0000 - mae: 116568.2188

 🧪Test MAE (평균 절대 오차): 114251.72


In [191]:
# 10. 예측 예시
y_pred = model.predict(X_test[:5])
print("\n 📈예측 결과 (상위 5개):")
for i in range(5):
    print(f"실제 가격: {y_test.iloc[i]:,.0f}원 / 예측 가격: {y_pred[i][0]:,.0f}원")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step

 📈예측 결과 (상위 5개):
실제 가격: 365,000원 / 예측 가격: 360,410원
실제 가격: 865,000원 / 예측 가격: 815,540원
실제 가격: 1,038,000원 / 예측 가격: 1,297,296원
실제 가격: 1,490,000원 / 예측 가격: 1,865,679원
실제 가격: 711,000원 / 예측 가격: 671,952원
