# 선형 분류 - 로지스틱 회귀 (Logistic Regression)

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

from torch.utils.data import Dataset, DataLoader

### 데이터셋을 만들기 make_classificaiton 함수를 사용하여 무작위로 데이터셋을 만들기

In [2]:
x, y = make_classification(
    n_samples=3000, # 생성할 데이터 수 
    n_features=5,   # 독립변수 수 (입력변수에 사용되는 변수) / 종속변수 -> 라벨 
    n_informative=2,# 독립변수 수 중에서 실제로 유의미한 의미가 있는 변수 계수 
    n_redundant=0,  # 독립변수 중에 다른 독립 변수로부터 파생된 불필요한 독립변수 계수 
    n_clusters_per_class=1, # 클래스당 클러스트 계수 
    random_state=42         # 난수 생성 발생기의 시드값 
)

print(x, y)

[[-0.02439857 -0.57540077  1.26796049 -1.42222965 -0.9629849 ]
 [-1.07638119  0.3872175   1.08299994 -0.67379011 -2.65098736]
 [-1.12984986 -0.26922916  1.12735009 -0.82383687 -1.70574586]
 ...
 [-0.53797853  0.26401859 -0.48915618  0.4664446  -1.57451325]
 [ 0.01920342  0.9761859  -0.14717165 -1.51725386  2.31873002]
 [-0.37051336  0.93603022 -0.62133172 -0.23084897  1.66473405]] [0 0 0 ... 0 1 1]


### 이제 데이터셋을 Train set 과 Test set 으로 나누기

In [3]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

print("x_train >> " , len(x_train))
print("x_test >> ", len(x_test))
print("y_train >> ", len(y_train))
print("y_test >> ", len(y_test))

x_train >>  2400
x_test >>  600
y_train >>  2400
y_test >>  600


### Pytorc의 Dataset 과 Dataloader 를 사용하기 위한 Custom Dataset 구현

In [5]:
class MyCustomDataset(Dataset) :
    def __init__(self, x, y) : 
        # 텐서형태로 변환 
        self.x = torch.tensor(x, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)
        
        
    def __getitem__(self, index) :
        
        return self.x[index], self.y[index]
        
    
    def __len__(self) : 
        return len(self.x)

# 데이터셋 정의     
train_dataset = MyCustomDataset(x_train, y_train)
test_dataset = MyCustomDataset(x_test, y_test)

# 데이터 로드 정의 
train_loader = DataLoader(train_dataset, batch_size = 32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size = 1, shuffle=False)

# ########## 디버깅 코드 ##############
test = MyCustomDataset(x_test, y_test)
for i in test : 
    print(i)
########## 디버깅 코드 ##############
for index, (datas, lables) in enumerate(test_loader) : 
    print(index, datas, lables)

(tensor([-0.1814,  0.9541, -0.8362,  0.9478,  1.1666]), tensor(1.))
(tensor([-1.0747, -0.5975, -0.0673, -0.7824, -1.8099]), tensor(0.))
(tensor([2.0192, 1.4252, 0.7749, 0.9027, 0.9276]), tensor(1.))
(tensor([0.4525, 0.4803, 0.6397, 0.7536, 2.2430]), tensor(1.))
(tensor([ 1.3994,  0.5470, -0.7957, -0.2365, -1.8805]), tensor(0.))
(tensor([-0.5218,  1.1701, -1.8481, -1.2085,  1.2183]), tensor(1.))
(tensor([-0.3237,  0.8182, -0.7245, -0.2665,  1.0007]), tensor(1.))
(tensor([ 1.8711,  1.4021, -0.5651, -0.8509, -1.1655]), tensor(0.))
(tensor([0.7552, 0.1501, 1.1108, 1.0845, 1.0893]), tensor(1.))
(tensor([ 0.3990,  2.0237, -0.6619,  1.7075, -0.4431]), tensor(1.))
(tensor([-0.2678,  0.3786,  1.6651,  1.3227, -0.5773]), tensor(0.))
(tensor([ 0.4529,  1.2332, -0.6631, -0.2662,  0.8212]), tensor(1.))
(tensor([ 0.4259, -0.3123,  0.6507, -1.0707, -0.7738]), tensor(0.))
(tensor([-1.3319,  0.8085, -0.7474,  1.3080,  0.7274]), tensor(1.))
(tensor([-1.4163,  0.0362,  0.1478, -0.6428, -2.0718]), tensor(

(tensor([ 0.9118,  1.0213,  0.7159, -0.9618,  1.1446]), tensor(1.))
(tensor([ 0.1420, -0.3127,  1.3723, -0.6729, -1.8925]), tensor(0.))
(tensor([0.2230, 0.3112, 0.7184, 0.0105, 1.0069]), tensor(1.))
(tensor([ 0.4304,  1.2057, -2.0614,  1.2747, -1.4673]), tensor(0.))
(tensor([-0.1897,  1.7786,  1.3602,  0.8496, -1.7764]), tensor(0.))
(tensor([-1.1222,  0.1443,  1.4503, -0.7270,  2.2018]), tensor(1.))
(tensor([-0.1928,  1.2278, -0.5432, -1.3395,  0.5451]), tensor(1.))
(tensor([ 1.3618,  1.3653,  1.1601, -0.5867,  1.6585]), tensor(1.))
(tensor([-0.5553,  0.5757, -0.1606,  0.4280,  0.0129]), tensor(0.))
(tensor([-0.2247, -0.0487, -1.1330,  1.0750,  1.7240]), tensor(1.))
(tensor([-1.4384,  0.7397, -0.7894, -1.2699,  1.1928]), tensor(1.))
(tensor([-0.7974,  0.5262,  0.3619,  1.0745, -1.6931]), tensor(0.))
(tensor([-1.4679,  0.9823,  0.1633,  0.4592,  1.0718]), tensor(1.))
(tensor([ 0.9955,  0.9400,  1.6772, -0.0815,  1.3100]), tensor(1.))
(tensor([-0.0379,  0.8997,  0.2873,  0.0985,  0.5027]

(tensor([ 0.1045,  1.4381, -0.9511, -0.1447,  0.1937]), tensor(1.))
(tensor([-1.0010,  1.9946,  1.2784,  1.0332, -0.2508]), tensor(0.))
(tensor([-0.1822,  1.5418, -1.7460,  0.6884,  0.8427]), tensor(1.))
(tensor([ 0.5521,  0.6356, -1.5256, -0.7804,  0.9907]), tensor(1.))
(tensor([0.0366, 1.3759, 2.1572, 1.8143, 1.0387]), tensor(1.))
(tensor([-0.5388,  1.8420,  1.3160, -0.1871, -2.5470]), tensor(0.))
(tensor([-0.5106,  0.8479, -0.6548,  0.0799, -0.3333]), tensor(0.))
(tensor([ 1.0064,  1.5792, -0.1465, -1.0578, -1.4011]), tensor(0.))
(tensor([ 2.1210,  0.8962, -0.7402,  0.9778, -1.0575]), tensor(0.))
(tensor([-0.5883,  0.7296, -1.3592,  0.9080,  1.1126]), tensor(1.))
(tensor([ 1.4930,  1.0919,  0.5337, -0.3910, -1.3053]), tensor(0.))
(tensor([-1.2844,  0.2508, -0.6802,  1.4759, -1.4264]), tensor(0.))
(tensor([ 0.4796,  1.2289,  0.2378, -0.4323, -1.2056]), tensor(0.))
(tensor([-0.3856,  1.1599,  0.4167,  0.1909,  1.1856]), tensor(1.))
(tensor([ 0.4398,  2.3036, -0.0109,  1.6764, -0.3672]

181 tensor([[-0.1702,  1.1591,  1.2841, -0.9549,  2.0663]]) tensor([1.])
182 tensor([[-1.1579,  0.7512,  0.6803, -0.0644,  0.9518]]) tensor([1.])
183 tensor([[ 0.5025,  1.5859, -0.4248, -0.8907, -0.3690]]) tensor([0.])
184 tensor([[0.7077, 0.4327, 0.7999, 0.5065, 1.2365]]) tensor([1.])
185 tensor([[ 0.1830,  0.8930,  0.0784, -0.8343, -0.4391]]) tensor([0.])
186 tensor([[-0.5896,  1.1070,  0.4501,  0.4868,  1.1624]]) tensor([1.])
187 tensor([[ 0.3324,  0.0970, -1.9899, -0.3764, -1.7757]]) tensor([0.])
188 tensor([[-0.3539,  2.4053, -0.4553,  2.1655, -1.9186]]) tensor([0.])
189 tensor([[ 1.0500,  1.0478,  0.3990,  0.3247, -2.1275]]) tensor([0.])
190 tensor([[-1.2666,  2.1139,  1.6776, -0.9897, -0.1027]]) tensor([0.])
191 tensor([[-0.8456,  1.2121,  0.4050, -1.6103, -0.6363]]) tensor([0.])
192 tensor([[-0.3863,  0.2963,  0.3019, -2.2053, -1.0281]]) tensor([0.])
193 tensor([[1.4330, 1.8176, 0.7665, 0.7986, 0.4991]]) tensor([1.])
194 tensor([[-0.6809,  1.5896, -0.8387,  0.6820,  2.3272]]) t

459 tensor([[-1.2464,  1.0699, -0.0488, -1.1420,  0.8567]]) tensor([1.])
460 tensor([[ 0.6184,  0.1865, -0.1601,  1.4556, -2.5318]]) tensor([0.])
461 tensor([[ 0.5569,  0.6339, -0.2659,  1.8640, -1.6151]]) tensor([0.])
462 tensor([[-0.6073,  0.8321,  0.5210,  0.4277,  0.9233]]) tensor([1.])
463 tensor([[ 0.9341,  0.8066, -0.9033,  0.4335,  1.1985]]) tensor([1.])
464 tensor([[ 1.6688,  1.2611,  0.2186, -1.5286,  1.3228]]) tensor([1.])
465 tensor([[ 0.3737,  1.7768,  0.2944, -0.2626,  0.3112]]) tensor([1.])
466 tensor([[ 0.9276,  0.9775,  1.6777, -0.6304,  0.0429]]) tensor([1.])
467 tensor([[ 1.7303,  1.8610, -0.0953, -2.2456,  0.2030]]) tensor([0.])
468 tensor([[-0.4802,  0.5575,  1.1952,  0.2689, -0.5381]]) tensor([0.])
469 tensor([[ 0.2819,  0.8825, -0.5488,  0.4565,  0.5273]]) tensor([1.])
470 tensor([[-2.9118,  1.4655, -0.3997, -1.4446,  1.3603]]) tensor([1.])
471 tensor([[ 0.4154,  1.5857,  0.3595, -2.3831,  1.9951]]) tensor([1.])
472 tensor([[-1.3212,  1.5729,  0.7471,  3.0966,  1

### 모델 정의 

In [6]:
class LogisticRegression(nn.Module) : 
    def __init__(self, input_dim) : 
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(input_dim, 1)
        
    def forward(self, x) :
        out = self.linear(x)
        out = torch.sigmoid(out)
        
        return out
    
model = LogisticRegression(input_dim=5)
print(model)

LogisticRegression(
  (linear): Linear(in_features=5, out_features=1, bias=True)
)


### 모델을 학습시키기 전에 학습에 필요한 Loss function, optimizer 선언

In [12]:
from adamp import SGDP
"""
# define your params
optimizer = SGDP(params, lr=0.1, weight_decay=1e-5, momentum=0.9, nesterov=True)
"""
criterion = nn.BCELoss() # 사용한 이유는 0과 1 분류 이진분류 하기 떄문입니다. 
# optimizer = SGDP(model.parameters(), lr=0.25, weight_decay=1e-5, momentum=0.9, nesterov=True)
optimizer = optim.SGD(model.parameters(), lr=0.01, weight_decay=1e-5, momentum=0.9, nesterov=True)
print(optimizer)

SGD (
Parameter Group 0
    dampening: 0
    foreach: None
    lr: 0.01
    maximize: False
    momentum: 0.9
    nesterov: True
    weight_decay: 1e-05
)


### tarin loop 구현

In [13]:
num_epochs = 100

for epoch in range(num_epochs) : 
    for inputs, targets in train_loader : 
#         print(targets)
#         print("unsqueeze >> ", targets.unsqueeze(1))
        # optimizer 초기화 진행 
        optimizer.zero_grad()
        
        outputs = model(inputs)
        loss = criterion(outputs, targets.unsqueeze(1))
        loss.backward()
        
        optimizer.step()
    
    if epoch % 10 == 0 :
        print(f"Epoch : [{epoch+1}/{num_epochs}], Loss : [{loss.item():.4f}]")

Epoch : [1/100], Loss : [0.2157]
Epoch : [11/100], Loss : [0.4489]
Epoch : [21/100], Loss : [0.0921]
Epoch : [31/100], Loss : [0.2294]
Epoch : [41/100], Loss : [0.2072]
Epoch : [51/100], Loss : [0.2995]
Epoch : [61/100], Loss : [0.3193]
Epoch : [71/100], Loss : [0.3698]
Epoch : [81/100], Loss : [0.3441]
Epoch : [91/100], Loss : [0.2457]


### 평가 코드 작성 

In [9]:
# device on GPU 인지 혹은 CPU 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device >> ", device)

model.eval()
with torch.no_grad() : 
    correct = 0
    total = 0
    for test_inputs, test_targets in test_loader : 
#         print(test_inputs, test_targets)
        test_input, test_target = test_inputs.to(device), test_targets.to(device)
        outputs_test = model(test_input)
        _, pred_test = torch.max(outputs_test, 1)
        total += test_targets.size(0)
        correct += (pred_test == test_targets).sum().item()
        
    print("Acc >> %d%%"%(100 * correct / total))

Using device >>  cpu
Acc >> 48%
