In [1]:
import pandas as pd

import lightgbm as lgb
from lightgbm import LGBMRegressor

import catboost
from catboost import CatBoostRegressor

import xgboost as xgb
from xgboost import XGBRegressor

from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error
import numpy as np

In [2]:
# 데이터 불러오기
train_df = pd.read_csv('../data/CalorieConsumption/train.csv')
test_df = pd.read_csv('../data/CalorieConsumption/test.csv')
submission = pd.read_csv('../data/CalorieConsumption/sample_submission.csv')

# ID 열 제거
train_df = train_df.drop('ID', axis=1)
test_df = test_df.drop('ID', axis=1)

In [3]:
# Weight_Status, Gender 열을 숫자 데이터로 변환
train_df['Weight_Status'] = train_df['Weight_Status'].map({'Normal Weight': 0, 'Overweight': 1, 'Obese': 2})
train_df['Gender'] = train_df['Gender'].map({'M': 0, 'F': 1})
test_df['Weight_Status'] = test_df['Weight_Status'].map({'Normal Weight': 0, 'Overweight': 1, 'Obese': 2})
test_df['Gender'] = test_df['Gender'].map({'M': 0, 'F': 1})

In [4]:
train_df

Unnamed: 0,Exercise_Duration,Body_Temperature(F),BPM,Height(Feet),Height(Remainder_Inches),Weight(lb),Weight_Status,Gender,Age,Calories_Burned
0,26.0,105.6,107.0,5.0,9.0,154.3,0,1,45,166.0
1,7.0,103.3,88.0,6.0,6.0,224.9,1,0,50,33.0
2,7.0,103.3,86.0,6.0,3.0,218.3,1,0,29,23.0
3,17.0,104.0,99.0,5.0,6.0,147.7,0,1,33,91.0
4,9.0,102.7,88.0,5.0,10.0,169.8,0,0,38,32.0
...,...,...,...,...,...,...,...,...,...,...
7495,22.0,105.1,104.0,4.0,10.0,112.4,0,1,75,151.0
7496,20.0,105.3,104.0,5.0,8.0,147.7,0,1,21,114.0
7497,8.0,103.1,90.0,6.0,2.0,202.8,1,0,57,41.0
7498,12.0,104.4,97.0,5.0,9.0,167.6,1,0,35,57.0


In [5]:
# PolynomialFeatures를 사용하여 데이터 전처리
poly = PolynomialFeatures(degree=2, include_bias=False)
X = poly.fit_transform(train_df.drop('Calories_Burned', axis=1))
y = train_df['Calories_Burned']

In [6]:
# 표준화
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [7]:
# train, valid 데이터 나누기
X_train, X_valid, y_train, y_valid = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

In [8]:
# 회귀 신경망 모델 학습
mlp = MLPRegressor(hidden_layer_sizes=(1024, 512,2), max_iter=500, activation='relu', solver='adam', random_state=42)
mlp.fit(X_train, y_train)

In [9]:
# valid 데이터 예측 및 평가
y_pred_valid = mlp.predict(X_valid)
rmse_valid = np.sqrt(mean_squared_error(y_valid, y_pred_valid))
print(f"Valid 데이터 RMSE: {rmse_valid:.3f}")

Valid 데이터 RMSE: 0.405


In [10]:
# test 데이터 예측
X_test = test_df.values
X_poly_test = poly.transform(X_test)
X_test_scaled = scaler.transform(X_poly_test)
y_pred_test = mlp.predict(X_test_scaled)



In [11]:
# 결과 저장
submission['Calories_Burned'] = y_pred_test
submission.to_csv('submission.csv', index=False)