In [1]:
import pandas as pd

# 데이터 읽기
df = pd.read_csv('BP_data.csv')

# 데이터 구조 보기
print(df.head())
print(df.info())
print(df.describe())

   Patient_Number  Blood_Pressure_Abnormality  Level_of_Hemoglobin  \
0               1                           1                11.28   
1               2                           0                 9.75   
2               3                           1                10.79   
3               4                           0                11.00   
4               5                           1                14.17   

   Genetic_Pedigree_Coefficient  Age  BMI  Sex  Pregnancy  Smoking  \
0                          0.90   34   23    1        1.0        0   
1                          0.23   54   33    1        NaN        0   
2                          0.91   70   49    0        NaN        0   
3                          0.43   71   50    0        NaN        0   
4                          0.83   52   19    0        NaN        0   

   Physical_activity  salt_content_in_the_diet  alcohol_consumption_per_day  \
0              45961                     48071                          NaN   


In [3]:
pip install scipy

Collecting scipy
  Downloading scipy-1.15.2-cp312-cp312-macosx_14_0_arm64.whl.metadata (61 kB)
Downloading scipy-1.15.2-cp312-cp312-macosx_14_0_arm64.whl (22.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m22.4/22.4 MB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: scipy
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
gensim 4.3.3 requires scipy<1.14.0,>=1.7.0, but you have scipy 1.15.2 which is incompatible.[0m[31m
[0mSuccessfully installed scipy-1.15.2
Note: you may need to restart the kernel to use updated packages.


In [4]:
# 결측치 확인
print(df.isnull().sum())

# 결측치 처리
# Pregnancy: 결측치 → 0 (남성 또는 임신하지 않은 경우로 간주)
df['Pregnancy'] = df['Pregnancy'].fillna(0)

# alcohol_consumption_per_day: 결측치 → 0 (술을 안 마시는 경우로 간주)
df['alcohol_consumption_per_day'] = df['alcohol_consumption_per_day'].fillna(0)

# 최종 결측치 확인
print(df.isnull().sum())

# 라벨(y)와 입력(X) 나누기
y = df['Blood_Pressure_Abnormality']   # Label (분류 목표)
X = df.drop(columns=['Blood_Pressure_Abnormality'])  # 입력 특성

# 데이터 표준화
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

Patient_Number                    0
Blood_Pressure_Abnormality        0
Level_of_Hemoglobin               0
Genetic_Pedigree_Coefficient     92
Age                               0
BMI                               0
Sex                               0
Pregnancy                         0
Smoking                           0
Physical_activity                 0
salt_content_in_the_diet          0
alcohol_consumption_per_day       0
Level_of_Stress                   0
Chronic_kidney_disease            0
Adrenal_and_thyroid_disorders     0
dtype: int64
Patient_Number                    0
Blood_Pressure_Abnormality        0
Level_of_Hemoglobin               0
Genetic_Pedigree_Coefficient     92
Age                               0
BMI                               0
Sex                               0
Pregnancy                         0
Smoking                           0
Physical_activity                 0
salt_content_in_the_diet          0
alcohol_consumption_per_day       0
Level_of_Stress

In [5]:
# train/test 나누기
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

# CNN 입력을 위한 reshape
import numpy as np

# (샘플 수, 채널수, 높이, 너비) 형태로 바꾼다
X_train = np.expand_dims(X_train, axis=1)  # (N, 1, 1, feature수)
X_test = np.expand_dims(X_test, axis=1)

In [10]:
!pip install torch

Collecting torch
  Using cached torch-2.7.0-cp312-none-macosx_11_0_arm64.whl.metadata (29 kB)
Collecting sympy>=1.13.3 (from torch)
  Using cached sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Downloading torch-2.7.0-cp312-none-macosx_11_0_arm64.whl (68.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m68.6/68.6 MB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading sympy-1.14.0-py3-none-any.whl (6.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m18.8 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: sympy, torch
  Attempting uninstall: sympy
    Found existing installation: sympy 1.13.2
    Uninstalling sympy-1.13.2:
      Successfully uninstalled sympy-1.13.2
Successfully installed sympy-1.14.0 torch-2.7.0


In [11]:
import torch
from torch.utils.data import TensorDataset, DataLoader

# numpy -> torch tensor 변환
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)  # 분류니까 long
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)

# TensorDataset으로 묶기
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

# DataLoader 만들기
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

In [18]:
import torch.nn as nn

class CNNClassifier(nn.Module):
    def __init__(self):
        super(CNNClassifier, self).__init__()
        self.conv1 = nn.Conv2d(
            in_channels=1,        # 입력 채널 1개
            out_channels=16,      # 필터 16개
            kernel_size=(1, 3),   # (높이=1, 너비=3)
            padding=(0, 1)        # 너비 방향 패딩
        )
        self.relu = nn.ReLU()
        self.pool = nn.AdaptiveAvgPool2d((1, 1))  # 최종 크기 (1,1)로 압축
        self.fc = nn.Linear(16, 2)  # 최종 클래스 수 (0,1)

    def forward(self, x):
        x = self.conv1(x)       # (N, 1, 1, 14) → (N, 16, 1, 14)
        x = self.relu(x)
        x = self.pool(x)        # (N, 16, 1, 1)
        x = x.view(x.size(0), -1)  # (N, 16)
        x = self.fc(x)          # (N, 2)
        return x

In [19]:
# 모델 인스턴스 생성
model = CNNClassifier()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [20]:
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)  # CrossEntropyLoss expects long
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"[Epoch {epoch+1}/{num_epochs}] Loss: {running_loss/len(train_loader):.4f}")

RuntimeError: Given groups=1, weight of size [16, 1, 1, 3], expected input[1, 32, 1, 14] to have 1 channels, but got 32 channels instead

In [21]:
print(X_train.shape)

(1600, 1, 14)


In [22]:
# CNN 입력 형태로 명확히 reshape
X_train = X_train.reshape(-1, 1, 1, X_train.shape[-1])
X_test = X_test.reshape(-1, 1, 1, X_test.shape[-1])

In [23]:
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)  # CrossEntropyLoss expects long
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"[Epoch {epoch+1}/{num_epochs}] Loss: {running_loss/len(train_loader):.4f}")

RuntimeError: Given groups=1, weight of size [16, 1, 1, 3], expected input[1, 32, 1, 14] to have 1 channels, but got 32 channels instead

In [24]:
import torch
from torch.utils.data import TensorDataset, DataLoader

# numpy → torch tensor 변환 (reshape 이후!)
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)

# TensorDataset 만들기
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

# DataLoader
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

In [25]:
# reshape 완료 후 다시
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

In [26]:
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    avg_loss = running_loss / len(train_loader)
    print(f"[Epoch {epoch+1}/{num_epochs}] Loss: {avg_loss:.4f}")

[Epoch 1/10] Loss: nan
[Epoch 2/10] Loss: nan
[Epoch 3/10] Loss: nan
[Epoch 4/10] Loss: nan
[Epoch 5/10] Loss: nan
[Epoch 6/10] Loss: nan
[Epoch 7/10] Loss: nan
[Epoch 8/10] Loss: nan
[Epoch 9/10] Loss: nan
[Epoch 10/10] Loss: nan


In [27]:
# 모델 평가 모드로 전환
model.eval()

correct = 0
total = 0

# 평가할 때는 gradient 계산 끄기 (메모리 절약)
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)  # 가장 큰 값의 인덱스
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = correct / total
print(f"✅ Test Accuracy: {accuracy:.4f}")

✅ Test Accuracy: 0.5575


In [28]:
import numpy as np

# 모델 평가 모드
model.eval()

# 클래스 별 총 개수와 맞춘 개수
n_classes = 2  # (0, 1 두 개)
correct_per_class = np.zeros(n_classes)
total_per_class = np.zeros(n_classes)

with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)

        for label, prediction in zip(labels, predicted):
            total_per_class[label.item()] += 1
            if label.item() == prediction.item():
                correct_per_class[label.item()] += 1

# 클래스별 정확도 계산
class_accuracy = correct_per_class / total_per_class

print(f"Class-wise Accuracy: {class_accuracy}")

Class-wise Accuracy: [1. 0.]
