In [5]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder

# 데이터 로드 및 전처리
train = pd.read_csv("./train.csv")
test = pd.read_csv("./test.csv")
data = train

# ID 컬럼 제거
data.drop(columns=['ID'], inplace=True)

# 레이블 인코딩
label_encoder = LabelEncoder()
data['SUBCLASS'] = label_encoder.fit_transform(data['SUBCLASS'])

# 특성과 레이블 분리
X = data.drop(columns=['SUBCLASS'])
y = data['SUBCLASS'].values

# 범주형 변수 원-핫 인코딩
categorical_columns = X.select_dtypes(include=['object', 'category']).columns
X_encoded = pd.get_dummies(X, columns=categorical_columns)

# 데이터 스케일링
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_encoded)

# 학습 및 검증 데이터셋 분할
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

class CancerDataset(Dataset):
    def __init__(self, features, labels):
        self.features = torch.tensor(features, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.long)
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

# 데이터로더 생성
train_dataset = CancerDataset(X_train, y_train)
val_dataset = CancerDataset(X_val, y_val)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# 모델 정의
class SimpleNN(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(128, num_classes)
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

# 하이퍼파라미터 설정
input_dim = X_train.shape[1]
num_classes = len(label_encoder.classes_)
learning_rate = 0.001
num_epochs = 20

# 모델 초기화
model = SimpleNN(input_dim=input_dim, num_classes=num_classes)

# 손실 함수 및 옵티마이저 정의
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# 학습 루프
for epoch in range(num_epochs):
    model.train()
    for features, labels in train_loader:
        # 순전파
        outputs = model(features)
        loss = criterion(outputs, labels)

        # 역전파 및 최적화
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # 검증 루프
    model.eval()
    val_loss = 0.0
    correct_predictions = 0

    with torch.no_grad():
        for features, labels in val_loader:
            outputs = model(features)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted_labels = torch.max(outputs, 1)
            correct_predictions += (predicted_labels == labels).sum().item()

    val_loss /= len(val_loader)
    accuracy = correct_predictions / len(val_dataset)

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}, Val Loss: {val_loss:.4f}, Accuracy: {accuracy:.4f}')

print("Training complete.")

Epoch [1/20], Loss: 4.1213, Val Loss: 9.9163, Accuracy: 0.2417
Epoch [2/20], Loss: 12.9398, Val Loss: 25.3781, Accuracy: 0.1668
Epoch [3/20], Loss: 10.7897, Val Loss: 20.7131, Accuracy: 0.1789
Epoch [4/20], Loss: 9.5192, Val Loss: 21.0344, Accuracy: 0.1934
Epoch [5/20], Loss: 14.3051, Val Loss: 39.0257, Accuracy: 0.2063
Epoch [6/20], Loss: 17.2286, Val Loss: 51.4325, Accuracy: 0.1265
Epoch [7/20], Loss: 22.0532, Val Loss: 68.9786, Accuracy: 0.1273
Epoch [8/20], Loss: 19.2266, Val Loss: 57.1641, Accuracy: 0.1402
Epoch [9/20], Loss: 18.9131, Val Loss: 56.5238, Accuracy: 0.1821
Epoch [10/20], Loss: 19.2349, Val Loss: 57.1661, Accuracy: 0.1241
Epoch [11/20], Loss: 19.6101, Val Loss: 57.2689, Accuracy: 0.1080
Epoch [12/20], Loss: 19.3308, Val Loss: 56.6712, Accuracy: 0.1096
Epoch [13/20], Loss: 19.0984, Val Loss: 56.3666, Accuracy: 0.1217
Epoch [14/20], Loss: 19.4066, Val Loss: 55.9429, Accuracy: 0.1168
Epoch [15/20], Loss: 18.6960, Val Loss: 54.8184, Accuracy: 0.1322
Epoch [16/20], Loss: 1

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# 데이터 로드 및 전처리
train = pd.read_csv("./train.csv")
data = train

# ID 컬럼 제거
data.drop(columns=['ID'], inplace=True)

# 레이블 인코딩
label_encoder = LabelEncoder()
data['SUBCLASS'] = label_encoder.fit_transform(data['SUBCLASS'])

# 특성과 레이블 분리
X = data.drop(columns=['SUBCLASS'])
y = data['SUBCLASS'].values

# 범주형 변수 원-핫 인코딩
categorical_columns = X.select_dtypes(include=['object', 'category']).columns
X_encoded = pd.get_dummies(X, columns=categorical_columns)

# 데이터 스케일링
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_encoded)

# 학습 및 검증 데이터셋 분할
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# 다양한 커널과 C 값을 사용한 SVM 모델 학습 및 평가
kernels = ['linear', 'poly', 'rbf', 'sigmoid']
C_values = [0.005, 0.01, 0.1]

for kernel in kernels:
    for C in C_values:
        print(f"\nUsing {kernel} kernel with C={C}:")
        svm_model = SVC(kernel=kernel, C=C)
        svm_model.fit(X_train, y_train)
        
        y_pred_svm = svm_model.predict(X_val)
        
        # 결과 출력
        print("Accuracy:", accuracy_score(y_val, y_pred_svm))
        print("Classification Report:\n", classification_report(y_val, y_pred_svm))



Using linear kernel with C=0.005:
Accuracy: 0.05801772763900081
Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00        14
           1       0.00      0.00      0.00        27
           2       0.04      0.01      0.01       149
           3       0.00      0.00      0.00        31
           4       0.00      0.00      0.00        30
           5       0.00      0.00      0.00        10
           6       0.00      0.00      0.00       116
           7       0.00      0.00      0.00        51
           8       0.05      0.02      0.03        86
           9       0.02      0.02      0.02        54
          10       0.70      0.36      0.47        39
          11       0.00      0.00      0.00        49
          12       0.00      0.00      0.00        34
          13       0.00      0.00      0.00        37
          14       0.00      0.00      0.00        39
          15       0.00      0.00      0.00   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy: 0.058823529411764705
Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00        14
           1       0.00      0.00      0.00        27
           2       0.05      0.01      0.01       149
           3       0.00      0.00      0.00        31
           4       0.00      0.00      0.00        30
           5       0.00      0.00      0.00        10
           6       0.00      0.00      0.00       116
           7       0.00      0.00      0.00        51
           8       0.06      0.02      0.03        86
           9       0.02      0.02      0.02        54
          10       0.70      0.36      0.47        39
          11       0.00      0.00      0.00        49
          12       0.00      0.00      0.00        34
          13       0.00      0.00      0.00        37
          14       0.00      0.00      0.00        39
          15       0.50      0.02      0.03        58
          16       0.00   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy: 0.058823529411764705
Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00        14
           1       0.00      0.00      0.00        27
           2       0.06      0.01      0.01       149
           3       0.00      0.00      0.00        31
           4       0.00      0.00      0.00        30
           5       0.00      0.00      0.00        10
           6       0.00      0.00      0.00       116
           7       0.00      0.00      0.00        51
           8       0.06      0.02      0.03        86
           9       0.02      0.02      0.02        54
          10       0.70      0.36      0.47        39
          11       0.00      0.00      0.00        49
          12       0.00      0.00      0.00        34
          13       0.00      0.00      0.00        37
          14       0.00      0.00      0.00        39
          15       0.14      0.02      0.03        58
          16       0.00   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


: 