### 비선형 회귀


In [1]:
import torch
import pandas as pd
from torch import nn
from torch import optim
from torch.utils.data import Dataset, DataLoader

In [2]:
# 사용자 정의 데이터세트

class CustomDataset(Dataset):
    def __init__(self, file_path):
        df = pd.read_csv(file_path)
        self.x = df.iloc[:, 0].values
        self.y = df.iloc[:, 1].values
        self.length = len(df)

    def __getitem__(self, index):
        x = torch.FloatTensor([self.x[index] ** 2, self.x[index]])
        y = torch.FloatTensor([self.y[index]])
        return x, y
    
    def __len__(self):
        return self.length

In [3]:
# 사용자 정의 모델
class CustomModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer = nn.Linear(2, 1)
    
    def forward(self, x):
        x = self.layer(x)
        return x

In [4]:
train_dataset = CustomDataset('./dataset/non_linear.csv')
train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True, drop_last=True)

In [5]:
(train_dataset)

<__main__.CustomDataset at 0x1052564c0>

In [6]:
device = torch.device("mps")
model = CustomModel().to(device)
criterion = nn.MSELoss().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.0001)

In [7]:
for epoch in range(10000):
    cost = 0.0

    for x, y in train_dataloader:
        x = x.to(device)
        y = y.to(device)

        output = model(x)
        loss = criterion(output, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        cost += loss
    
    cost = cost / len(train_dataloader)

    if (epoch + 1) % 1000 == 0:
        print(f"Epoch : {epoch+1:4d}, Model : {list(model.parameters())}, Cost : {cost:.3f}")

Epoch : 1000, Model : [Parameter containing:
tensor([[ 3.1166, -1.7004]], device='mps:0', requires_grad=True), Parameter containing:
tensor([-0.5088], device='mps:0', requires_grad=True)], Cost : 0.515
Epoch : 2000, Model : [Parameter containing:
tensor([[ 3.1149, -1.7022]], device='mps:0', requires_grad=True), Parameter containing:
tensor([-0.4234], device='mps:0', requires_grad=True)], Cost : 0.436
Epoch : 3000, Model : [Parameter containing:
tensor([[ 3.1141, -1.7025]], device='mps:0', requires_grad=True), Parameter containing:
tensor([-0.3455], device='mps:0', requires_grad=True)], Cost : 0.374
Epoch : 4000, Model : [Parameter containing:
tensor([[ 3.1128, -1.7024]], device='mps:0', requires_grad=True), Parameter containing:
tensor([-0.2739], device='mps:0', requires_grad=True)], Cost : 0.358
Epoch : 5000, Model : [Parameter containing:
tensor([[ 3.1115, -1.7022]], device='mps:0', requires_grad=True), Parameter containing:
tensor([-0.2086], device='mps:0', requires_grad=True)], Cos

#### 모델평가


In [8]:
with torch.no_grad(): # no_grad클래스: 기울기 계산을 비활성화 하는 클래스 (메모릴 사용량을 줄여 추론에 적합한 상태로 변경)
    model.eval()
    inputs = torch.FloatTensor(
        [
            [1 ** 2, 1],
            [5 ** 2, 5],
            [11 ** 2, 11]
        ]
    ).to(device)
    output = model(inputs)
    print(output)

tensor([[  1.4480],
        [ 69.2124],
        [357.2975]], device='mps:0')


In [27]:
#### 모델 저장

torch.save(
    model,
    "./models/model.pt"    
)

torch.save(
    model.state_dict(),
    "./models/model_state_dict.pt"
)

#### 데이터세트 분리

In [13]:
import torch
import pandas as pd
from torch import nn
from torch import optim
from torch.utils.data import Dataset, DataLoader, random_split

class CustomDataset(Dataset):
    def __init__(self, file_path):
        df = pd.read_csv(file_path)
        self.x = df.iloc[:, 0].values
        self.y = df.iloc[:, 1].values
        self.length = len(df)
    
    def __getitem__(self, index):
        x = torch.FloatTensor([self.x[index]**2, self.x[index]])
        y = torch.FloatTensor([self.y[index]])
        return x, y
    
    def __len__(self):
        return self.length

class CustomModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer = nn.Linear(2, 1)

    def forward(self, x):
        x = self.layer(x)
        return x

In [14]:
dataset = CustomDataset('./dataset/non_linear.csv')
dataset_size = len(dataset)
train_size = int(dataset_size * 0.8)
validation_size = int(dataset_size * 0.1)
test_size = dataset_size - train_size - validation_size

train_dataset, validation_dataset, test_dataset = random_split(dataset, [train_size, validation_size, test_size])
print(f"Training Data Size : {len(train_dataset)}")
print(f"Validation Data Size : {len(validation_dataset)}")
print(f"Training Data Size : {len(test_dataset)}")

train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True, drop_last=True)
validation_dataloader = DataLoader(validation_dataset, batch_size=4, shuffle=True, drop_last=True)
test_dataloader = DataLoader(test_dataset, batch_size=4, shuffle=True, drop_last=True)

Training Data Size : 160
Validation Data Size : 20
Training Data Size : 20


In [15]:
device = torch.device("mps")
model = CustomModel().to(device)
criterion = nn.MSELoss().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.0001)

In [16]:
for epoch in range(10000):
    cost = 0.0

    for x, y in train_dataloader:
        x = x.to(device)
        y = y.to(device)

        output = model(x)
        loss = criterion(output, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        cost += loss

    cost = cost / len(train_dataloader)

    if (epoch + 1) % 1000 == 0:
        print(f"Epoch : {epoch+1:4d}, Model : {list(model.parameters())}, Cost : {cost:.3f}")

Epoch : 1000, Model : [Parameter containing:
tensor([[ 3.1034, -1.7045]], device='mps:0', requires_grad=True), Parameter containing:
tensor([0.2969], device='mps:0', requires_grad=True)], Cost : 0.096
Epoch : 2000, Model : [Parameter containing:
tensor([[ 3.1008, -1.7042]], device='mps:0', requires_grad=True), Parameter containing:
tensor([0.4179], device='mps:0', requires_grad=True)], Cost : 0.082
Epoch : 3000, Model : [Parameter containing:
tensor([[ 3.1002, -1.7042]], device='mps:0', requires_grad=True), Parameter containing:
tensor([0.4678], device='mps:0', requires_grad=True)], Cost : 0.078
Epoch : 4000, Model : [Parameter containing:
tensor([[ 3.0999, -1.7042]], device='mps:0', requires_grad=True), Parameter containing:
tensor([0.4884], device='mps:0', requires_grad=True)], Cost : 0.076
Epoch : 5000, Model : [Parameter containing:
tensor([[ 3.0997, -1.7041]], device='mps:0', requires_grad=True), Parameter containing:
tensor([0.4969], device='mps:0', requires_grad=True)], Cost : 0

In [17]:
with torch.no_grad():
    model.eval()
    
    for x, y in validation_dataloader:
        x = x.to(device)
        y = y.to(device)

        output = model(x)
        print(f"x: {x}")
        print(f"y: {y}")
        print(f'Outputs: {output}')
        print('--------------------------------')

x: tensor([[31.3600,  5.6000],
        [77.4400, -8.8000],
        [53.2900, -7.3000],
        [51.8400,  7.2000]], device='mps:0')
y: tensor([[ 88.2700],
        [256.0100],
        [177.7300],
        [149.1800]], device='mps:0')
Outputs: tensor([[ 88.1566],
        [255.5120],
        [178.1066],
        [148.9051]], device='mps:0')
--------------------------------
x: tensor([[  0.4900,   0.7000],
        [100.0000, -10.0000],
        [  0.6400,  -0.8000],
        [  0.4900,  -0.7000]], device='mps:0')
y: tensor([[  0.8600],
        [327.7900],
        [  3.8700],
        [  2.9800]], device='mps:0')
Outputs: tensor([[  0.8288],
        [327.4783],
        [  3.8496],
        [  3.2143]], device='mps:0')
--------------------------------
x: tensor([[ 5.2900,  2.3000],
        [21.1600, -4.6000],
        [96.0400, -9.8000],
        [14.4400, -3.8000]], device='mps:0')
y: tensor([[ 13.0200],
        [ 73.9400],
        [314.4800],
        [ 52.0000]], device='mps:0')
Outputs: tensor([[

In [33]:
# 데이터 세트 분리 방법

dataset = CustomDataset("./dataset/non_linear.csv")
dataset_size = len(dataset)

train_size = int(dataset_size * 0.8)
validation_size = int(dataset_size * 0.1)
test_size = dataset_size - train_size - validation_size

train_dataset, validation_dataset, test_dataset = random_split(dataset, [train_size, validation_size, test_size])
print(f'train_size : {len(train_dataset)}')
print(f'validation_size : {len(validation_dataset)}')
print(f'test_size : {len(test_dataset)}')

train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True, drop_last=True)
validation_dataloader = DataLoader(validation_dataset, batch_size=4, shuffle=True, drop_last=True)
train_dataloader = DataLoader(test_dataset, batch_size=4, shuffle=True, drop_last=True)

train_size : 160
validation_size : 20
test_size : 20


In [36]:
with torch.no_grad():
    model.eval()
    for x, y in validation_dataloader:
        x = x.to(device)
        y = y.to(device)

        outputs = model(x)
        print(f'x: {x}')

        print(f'y: {y}')
        print(f'output:{output}')
        print('--------------------------------')

x: tensor([[36.0000, -6.0000],
        [23.0400,  4.8000],
        [ 4.4100,  2.1000],
        [43.5600, -6.6000]], device='mps:0')
y: tensor([[122.6500],
        [ 63.3300],
        [ 10.4700],
        [146.3200]], device='mps:0')
output:tensor([[302.4977],
        [  2.3788],
        [ 37.6930],
        [180.4726]], device='mps:0')
--------------------------------
x: tensor([[38.4400, -6.2000],
        [ 0.0000,  0.0000],
        [ 2.8900,  1.7000],
        [ 9.0000,  3.0000]], device='mps:0')
y: tensor([[130.4100],
        [  0.1500],
        [  6.6200],
        [ 23.6100]], device='mps:0')
output:tensor([[302.4977],
        [  2.3788],
        [ 37.6930],
        [180.4726]], device='mps:0')
--------------------------------
x: tensor([[46.2400,  6.8000],
        [46.2400, -6.8000],
        [62.4100,  7.9000],
        [20.2500,  4.5000]], device='mps:0')
y: tensor([[132.0400],
        [155.4300],
        [180.9000],
        [ 55.3500]], device='mps:0')
output:tensor([[302.4977],
   

In [35]:
for x, y in validation_dataloader:
    print(x)
    print(y)
    break

tensor([[36.0000, -6.0000],
        [23.0400,  4.8000],
        [ 4.4100,  2.1000],
        [43.5600, -6.6000]])
tensor([[122.6500],
        [ 63.3300],
        [ 10.4700],
        [146.3200]])
