In [16]:
# 라이브러리 불러오기
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import tensorflow as tf
from tensorflow.keras import layers, models
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder

In [31]:
url = "https://raw.githubusercontent.com/stedy/Machine-Learning-with-R-datasets/master/insurance.csv"



# 데이터 읽기

df = pd.read_csv(url)

In [33]:
df

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.900,0,yes,southwest,16884.92400
1,18,male,33.770,1,no,southeast,1725.55230
2,28,male,33.000,3,no,southeast,4449.46200
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.880,0,no,northwest,3866.85520
...,...,...,...,...,...,...,...
1333,50,male,30.970,3,no,northwest,10600.54830
1334,18,female,31.920,0,no,northeast,2205.98080
1335,18,female,36.850,0,no,southeast,1629.83350
1336,21,female,25.800,0,no,southwest,2007.94500


In [35]:
df['sex'] = LabelEncoder().fit_transform(df['sex'])  

In [37]:
df['smoker'] = LabelEncoder().fit_transform(df['smoker'])  

In [39]:
df['region'] = LabelEncoder().fit_transform(df['region'])  

In [41]:
# 타겟(BMI) 분리
X = df.drop(columns=['bmi']).values  # 7개 특성
y = df['bmi'].values.astype(np.float32)

In [43]:
# 정규화
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [46]:
# 시퀀스 형태로 변환
def split_sequences(sequences, n_steps):
    X, y = [], []
    for i in range(len(sequences)):
        end_ix = i + n_steps
        if end_ix > len(sequences):
            break
        seq_x, seq_y = sequences[i:end_ix, :-1], sequences[end_ix-1, -1]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)

In [48]:
# BMI를 붙여서 시퀀스 분할
full_data = np.hstack((X, y.reshape(-1, 1)))
X_seq, y_seq = split_sequences(full_data, n_steps=5)

In [50]:
# Train/Test 분할
X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.2, random_state=42)

In [52]:
# Tensor 변환
X_train = torch.tensor(X_train, dtype=torch.float32).permute(0, 2, 1)  # (batch, channels, seq_len)
X_test = torch.tensor(X_test, dtype=torch.float32).permute(0, 2, 1)
y_train = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
y_test = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

In [54]:
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)

In [56]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

In [60]:
#모델 정의: Conv1D 회귀 모델
class BMICNN(nn.Module):
    def __init__(self):
        super(BMICNN, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=6, out_channels=16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv1d(16, 32, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(32 * 5, 64)
        self.fc2 = nn.Linear(64, 1)  # 회귀 출력

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = x.view(x.size(0), -1)
        x = torch.relu(self.fc1(x))
        return self.fc2(x)

In [62]:
#학습 및 평가
model = BMICNN()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

train_losses = []

# Training loop
for epoch in range(30):
    model.train()
    running_loss = 0.0
    for xb, yb in train_loader:
        optimizer.zero_grad()
        preds = model(xb)
        loss = criterion(preds, yb)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    train_losses.append(running_loss / len(train_loader))
    print(f"Epoch {epoch+1}, Loss: {train_losses[-1]:.4f}")

Epoch 1, Loss: 872.8678
Epoch 2, Loss: 196.1055
Epoch 3, Loss: 50.4778
Epoch 4, Loss: 45.5483
Epoch 5, Loss: 44.6400
Epoch 6, Loss: 42.0641
Epoch 7, Loss: 41.0382
Epoch 8, Loss: 40.2368
Epoch 9, Loss: 39.8537
Epoch 10, Loss: 39.9255
Epoch 11, Loss: 39.2438
Epoch 12, Loss: 38.3019
Epoch 13, Loss: 37.6389
Epoch 14, Loss: 37.3324
Epoch 15, Loss: 36.5953
Epoch 16, Loss: 36.8720
Epoch 17, Loss: 35.7923
Epoch 18, Loss: 35.4546
Epoch 19, Loss: 35.0303
Epoch 20, Loss: 35.1703
Epoch 21, Loss: 34.5249
Epoch 22, Loss: 34.1620
Epoch 23, Loss: 34.1083
Epoch 24, Loss: 34.4650
Epoch 25, Loss: 33.7818
Epoch 26, Loss: 33.2280
Epoch 27, Loss: 32.7736
Epoch 28, Loss: 33.0551
Epoch 29, Loss: 32.3780
Epoch 30, Loss: 32.6151


In [64]:
# Evaluation
model.eval()
preds = []
actuals = []
with torch.no_grad():
    for xb, yb in test_loader:
        out = model(xb)
        preds.extend(out.squeeze().cpu().numpy())
        actuals.extend(yb.squeeze().cpu().numpy())

mse = mean_squared_error(actuals, preds)
r2 = r2_score(actuals, preds)
print(f"Test MSE: {mse:.4f}")
print(f"R² Score: {r2:.4f}")

Test MSE: 42.2169
R² Score: -0.1329
