- - -
[1] 모듈 로딩 및 데이터 준비

In [70]:
#모듈 로딩
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optimizer
from torchmetrics.classification import MulticlassF1Score
from torchinfo import summary

from torch.utils.data import Dataset,DataLoader

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

In [43]:
torch.manual_seed(1)

<torch._C.Generator at 0x1ba67c787d0>

In [44]:
data_file='../data/iris.csv'

In [45]:
irisdf=pd.read_csv(data_file)
irisdf.head()

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
0,5.1,3.5,1.4,0.2,Setosa
1,4.9,3.0,1.4,0.2,Setosa
2,4.7,3.2,1.3,0.2,Setosa
3,4.6,3.1,1.5,0.2,Setosa
4,5.0,3.6,1.4,0.2,Setosa


- - -
[2] 사용자 정의 데이터셋 클래스 생성

In [46]:
class custom_dataset(Dataset):
    #데이터 로딩 및 전처리 진행, 인스턴스 생성 메서드
    def __init__(self,feature_df,target_df):

        #부모 클래스 생성
        super().__init__()
        self.feature_df=feature_df
        self.target_df=target_df
        
        self.n_rows=feature_df.shape[0]
        self.n_features=feature_df.shape[1]

    #데이터 개수 반환
    def __len__(self):
        return self.n_rows

    #특정 index의 데이터와 타겟 반환 메서드
    def __getitem__(self, idx):

        feature_ts=torch.FloatTensor(self.feature_df.iloc[idx].values)
        target_ts=torch.FloatTensor(self.target_df[idx])
        
        return feature_ts,target_ts

In [47]:
class mcf_model(nn.Module):
    def __init__(self):
        super().__init__()

        self.in_layer=nn.Linear(4, 10)
        self.hidden_layer_1st=nn.Linear(10,30)
        self.hidden_layer_2nd=nn.Linear(30,15)
        self.hidden_layer_3rd=nn.Linear(15,10)
        self.out_layer=nn.Linear(10,3)

    def forward(self,input_data):

        y=F.relu(self.in_layer(input_data))
        y=F.relu(self.hidden_layer_1st(y))
        y=F.relu(self.hidden_layer_2nd(y))
        y=F.relu(self.hidden_layer_3rd(y))

        return self.out_layer(y)

In [48]:
model=mcf_model()

print(model)

mcf_model(
  (in_layer): Linear(in_features=4, out_features=10, bias=True)
  (hidden_layer_1st): Linear(in_features=10, out_features=30, bias=True)
  (hidden_layer_2nd): Linear(in_features=30, out_features=15, bias=True)
  (hidden_layer_3rd): Linear(in_features=15, out_features=10, bias=True)
  (out_layer): Linear(in_features=10, out_features=3, bias=True)
)


- - -
[3] 데이터셋 인스턴스 생성

In [49]:
#feature/target 추출
feature_df, target_df=irisdf[irisdf.columns[:-1]],irisdf[[irisdf.columns[-1]]]
                                                # = irisdf[irisdf.columns[-1:]]                          
print(f'feature_df: {feature_df.shape}')
print(f'target_df: {target_df.shape}')

#iris 데이터셋 인스턴스 생성
irisds=custom_dataset(feature_df, target_df)

feature_df: (150, 4)
target_df: (150, 1)


In [50]:
irisds.feature_df

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [51]:
irisds.target_df

Unnamed: 0,variety
0,Setosa
1,Setosa
2,Setosa
3,Setosa
4,Setosa
...,...
145,Virginica
146,Virginica
147,Virginica
148,Virginica


In [52]:
#타겟 컬럼 인코딩  w. OneHotEncoder
encoder=OneHotEncoder(sparse_output=False)
irisds.target_df=encoder.fit_transform(irisds.target_df)
irisds.target_df.shape

(150, 3)

In [53]:
print(irisds.target_df)

[[1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0.

In [54]:
irisds.n_features, irisds.n_rows

(4, 150)

- - -
[4] DataLoader 인스턴스 생성
- dataset 인스턴스, 배치 사이즈 필요

In [62]:
irisdl=DataLoader(irisds)     #batch_size=32가 성능이 젤 좋다

In [63]:
for data_ts,target_ts in irisdl:
    print(data_ts.shape,target_ts.shape,data_ts,target_ts)
    break

torch.Size([1, 4]) torch.Size([1, 3]) tensor([[5.1000, 3.5000, 1.4000, 0.2000]]) tensor([[1., 0., 0.]])


In [67]:
EPOCH=1
BATCH_SIZE=32
BATCH_CNT=irisdf.shape[0]//BATCH_SIZE
DEVICE='cuda' if torch.cuda.is_available() else 'cpu'
LR=0.001

print(BATCH_CNT)

4


In [71]:
x_train,x_test,y_train,y_test=train_test_split(feature_df,target_df,random_state=2)
x_train,x_val,y_train,y_val=train_test_split(x_train,y_train,random_state=2)

print(f'x_train: {x_train.shape}, x_test: {x_test.shape}, x_val: {x_val.shape}')
print(f'y_train: {y_train.shape}, y_test: {y_test.shape}, y_val: {y_val.shape}')
print(f'{type(x_train)}, {type(x_test)}, {type(x_val)}')

x_train: (84, 4), x_test: (38, 4), x_val: (28, 4)
y_train: (84, 1), y_test: (38, 1), y_val: (28, 1)
<class 'pandas.core.frame.DataFrame'>, <class 'pandas.core.frame.DataFrame'>, <class 'pandas.core.frame.DataFrame'>


In [72]:
train_ds=custom_dataset(x_train,y_train)
val_ds=custom_dataset(x_val,y_val)
test_ds=custom_dataset(x_test,y_test)

irisdl=DataLoader(train_ds,batch_size=BATCH_SIZE)

- - -
[6] 학습

In [73]:
optimizer=optimizer.Adam(model.parameters(),lr=LR)

In [76]:
loss_history=[[],[]]
score_history=[[],[]]

for epoch in range(EPOCH):        #epoch수만큼 반복
    
    model.train()
    
    loss_total=0
    score_total=0

    #배치 크기만큼 데이터와 타겟 추출해서 학습 진행
    for data_ts,target_ts in irisdl:

        #배치 크기만큼 학습 진행
        pre_y=model(data_ts)
        #print(pre_y.shape,target_ts.reshape(-1).shape)

        #손실 계산
        loss=nn.CrossEntropyLoss()(pre_y,target_ts)
        loss_total+=loss.item()

        #성능평가
        score=MulticlassF1Score(num_classes=3)(pre_y,target_ts)
        score_total+=score.item()

        #최적화
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    model.eval()

    with torh.no_grad():
        val_feature=torch.FloatTensor(val_ds.feature_df.values)
        val_target=torch.FloatTensor(val_ds.target_df.values)

        pre_val=model(val_feature)

        val_loss=nn.CrossEntropyLoss(pre_val,val_target.reshape(-1).long())

        val_score=MulticlassF1Score(num_classes=3)(pre_val,val_target.reshape(-1))

    loss_history[0].append(loss_total/len(irisdl))
    score_history[0].append(score_total/len(irisdl))

    loss_history[1].append(val_loss)
    score_history[1].append(val_score)

    print(f'[{epoch}/{EPOCH}]\n- Train Loss : {loss_history[0][-1]} Score : {score_history[0][-1]}')
    print(f'- Val Loss : {loss_history[1][-1]} Score : {score_history[1][-1]}')

KeyError: 0

- - -
[7] 시각화