# Linear Classification

- 입력 변수 (Input variable) : 분류에 사용되는 데이터의 특징(feature)
- 가중치 (Weights) : 각 입력 변수의 영향력을 나타내는 값
- 편향 (Bias) : 모델의 적합도를 조정하는 상수항
- 시그모이드 함수 (Sigmoid function) : 입력 값을 0과 1사이로 변환하는 함수
- 로그 손실 함수 (Log loss function) : 모델의 오차를 계산하는 함수

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from torch.utils.data import Dataset, DataLoader


## create dataset

In [6]:
# 1000개의 데이터 feature 5개, label 0/1
x, y = make_classification(
    n_samples = 1000,         #생성할 데이터 수
    n_features = 5,           #독립변수 수(입력변수에 사용)/종속변수 라벨
    n_informative = 2,        #독립 변수 중에 유의미한 변수 계수
    n_redundant = 0,          #독립 변수 중에 불필요한 독립변수 계수
    n_clusters_per_class = 1,    #클래스당 클러스터 계수
    random_state = 42
)

#print(x,y)
print(x.shape)

(1000, 5)


In [7]:
X_train, X_test, y_train, y_test = train_test_split(x,y, test_size=0.2, random_state=42)

print(len(X_train))
print(len(X_test))

800
200


## Customdataset

In [8]:
#Datasetc 클래스로 to tensor
class MyCustomDataset(Dataset):
    def __init__(self, x, y):
        #to tensor
        self.x = torch.tensor(x, dtype = torch.float32)
        self.y = torch.tensor(y, dtype = torch.float32)

    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

    def __len__(self):
        return len(self.x)
    


In [12]:
#데이터셋 정의
train_dataset = MyCustomDataset(X_train, y_train)
test_dataset = MyCustomDataset(X_test, y_test)

#데이터 로드 정의
# batch 기반 딥러닝 학습을 위해 data slice한다
train_loader = DataLoader(train_dataset, batch_size = 32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size =32, shuffle=True)



## Define Model

In [16]:
#모델 정의
class LogisticRegression(nn.Module):
    def __init__(self, input_dim):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(input_dim, 1)

    def forward(self, x):
        out = self.linear(x)
        out = torch.sigmoid(out)

        return out

model = LogisticRegression(input_dim=5)
print(model)

LogisticRegression(
  (linear): Linear(in_features=5, out_features=1, bias=True)
)


## Loss function, Optimizer

In [20]:
#Loss function, optimizer 선언
criterion = nn.BCELoss()        # 0/1 이진분류 이기 때문에 BCELoss사용()
optimizer = optim.SGD(model.parameters(),
                    lr = 0.01,
                    weight_decay = 1e-5,
                    momentum = 0.9,
                    nesterov = True)
print(optimizer)

SGD (
Parameter Group 0
    dampening: 0
    lr: 0.01
    momentum: 0.9
    nesterov: True
    weight_decay: 1e-05
)


## Train

In [21]:
# train loop
num_epochs = 100

for epoch in range(num_epochs):
    for inputs, targets in train_loader:
        
        #optimizer 초기화
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, targets.unsqueeze(1))
        loss.backward()
        optimizer.step()

        if epoch%10 ==0:
            print(f"Epoch [{epoch+1}/{num_epochs}] , Loss: {loss.item():.4f}")

Epoch [1/100] , Loss: 0.7278
Epoch [1/100] , Loss: 0.7259
Epoch [1/100] , Loss: 0.7669
Epoch [1/100] , Loss: 0.6200
Epoch [1/100] , Loss: 0.6972
Epoch [1/100] , Loss: 0.5687
Epoch [1/100] , Loss: 0.5330
Epoch [1/100] , Loss: 0.7146
Epoch [1/100] , Loss: 0.7395
Epoch [1/100] , Loss: 0.5930
Epoch [1/100] , Loss: 0.5436
Epoch [1/100] , Loss: 0.5552
Epoch [1/100] , Loss: 0.5919
Epoch [1/100] , Loss: 0.5179
Epoch [1/100] , Loss: 0.5466
Epoch [1/100] , Loss: 0.5887
Epoch [1/100] , Loss: 0.5438
Epoch [1/100] , Loss: 0.5124
Epoch [1/100] , Loss: 0.4412
Epoch [1/100] , Loss: 0.4183
Epoch [1/100] , Loss: 0.4344
Epoch [1/100] , Loss: 0.4866
Epoch [1/100] , Loss: 0.5040
Epoch [1/100] , Loss: 0.4791
Epoch [1/100] , Loss: 0.4014
Epoch [11/100] , Loss: 0.1920
Epoch [11/100] , Loss: 0.3994
Epoch [11/100] , Loss: 0.4082
Epoch [11/100] , Loss: 0.2118
Epoch [11/100] , Loss: 0.2706
Epoch [11/100] , Loss: 0.1783
Epoch [11/100] , Loss: 0.3148
Epoch [11/100] , Loss: 0.3735
Epoch [11/100] , Loss: 0.1974
Epoch

## Test

In [24]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

model.eval()
with torch.no_grad():
    correct =0
    total = 0
    for inputs, targets in test_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += (predicted == targets).sum().item()

    print("Accuary %d%%"% (100*correct/total))

Using device: cpu
Accuary 51%
