In [1]:
# 1. 라이브러리 로드
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from xgboost import XGBRegressor
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from google.colab import files

# 2. 엑셀 파일 업로드
uploaded = files.upload()
df_all = pd.read_excel("천원의아침밥데이터셋.xlsx", sheet_name=None)

# 3. 시트 통합 및 전처리
data = []
for month, df in df_all.items():
    df = df.dropna(how='all').reset_index(drop=True)
    for _, row in df.iterrows():
        date = row.get('날짜')
        if pd.isna(date): continue
        for col in df.columns[1:6]:
            menu = row.get(col)
            if pd.notna(menu):
                data.append({'날짜': date, '월': month, '메뉴': menu})

df = pd.DataFrame(data)
df['날짜'] = pd.to_datetime(df['날짜'].astype(str), format='%y.%m.%d', errors='coerce')
df = df.dropna(subset=['날짜'])
df['요일'] = df['날짜'].dt.day_name()

# 4. 임의 생성: 식사 인원, 잔반량, 메뉴 선호도
np.random.seed(42)
df['식사 인원'] = np.random.randint(150, 300, size=len(df))
df['잔반량(kg)'] = np.round(np.random.uniform(3.0, 25.0, size=len(df)), 1)
df['메뉴 선호도(%)'] = np.round(np.random.uniform(60.0, 95.0, size=len(df)), 1)

# 5. 인코딩 및 X, y 생성
X_raw = df[['요일', '메뉴', '식사 인원', '메뉴 선호도(%)']]
y = df['잔반량(kg)']

encoder = OneHotEncoder(sparse_output=False)
X_cat = encoder.fit_transform(X_raw[['요일', '메뉴']])
X_num = X_raw[['식사 인원', '메뉴 선호도(%)']].values
X = np.hstack([X_cat, X_num])

# 6. train/test split (index 보존 방식)
indices = np.arange(len(X))
train_idx, test_idx = train_test_split(indices, test_size=0.2, random_state=42)
X_train, X_test = X[train_idx], X[test_idx]
y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

# 7. 결과 저장용 리스트
results = []

# ✅ RandomForest
rf = RandomForestRegressor(random_state=42)
rf.fit(X_train, y_train)
rf_pred = rf.predict(X_test)
results.append(['RandomForest',
                mean_absolute_error(y_test, rf_pred),
                np.sqrt(mean_squared_error(y_test, rf_pred)),
                r2_score(y_test, rf_pred)])

# ✅ XGBoost
xgb = XGBRegressor(random_state=42)
xgb.fit(X_train, y_train)
xgb_pred = xgb.predict(X_test)
results.append(['XGBoost',
                mean_absolute_error(y_test, xgb_pred),
                np.sqrt(mean_squared_error(y_test, xgb_pred)),
                r2_score(y_test, xgb_pred)])

# ✅ LSTM
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)
X_lstm = X_scaled.reshape(-1, 1, X_scaled.shape[1])
X_train_lstm, X_test_lstm = X_lstm[train_idx], X_lstm[test_idx]

lstm_model = Sequential()
lstm_model.add(Dense(64, activation='relu', input_shape=(1, X.shape[1])))
lstm_model.add(Dense(32, activation='relu'))
lstm_model.add(Dense(1))
lstm_model.compile(optimizer='adam', loss='mse')

lstm_model.fit(X_train_lstm, y_train, epochs=30, batch_size=16, verbose=0)
lstm_pred = lstm_model.predict(X_test_lstm).flatten()

results.append(['LSTM',
                mean_absolute_error(y_test, lstm_pred),
                np.sqrt(mean_squared_error(y_test, lstm_pred)),
                r2_score(y_test, lstm_pred)])

# 📊 최종 성능 비교표 출력
result_df = pd.DataFrame(results, columns=['모델', 'MAE', 'RMSE', 'R²'])
print("📊 성능 비교 결과:")
print(result_df)


Saving 천원의아침밥데이터셋.xlsx to 천원의아침밥데이터셋.xlsx


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
📊 성능 비교 결과:
             모델       MAE      RMSE        R²
0  RandomForest  5.278255  6.247936 -0.161998
1       XGBoost  5.138440  6.292629 -0.178682
2          LSTM  5.094613  6.254496 -0.164439
