In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import copy
import os

import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from torchvision import transforms 
import torch.nn.functional as F

# 데이터 수집

In [2]:
iris_data = pd.read_csv('/home/hts/A_project/hts_pytorch/data/iris_flower_data/164AB90F4B127F491C.csv')
hot_encode_num = {name:i for i, name in enumerate(iris_data['Species'].unique())}
iris_data_y = [hot_encode_num[i] for i in iris_data['Species']]

iris_data_x = iris_data.drop('Species', axis=1)
iris_data_x = (iris_data_x - iris_data_x.mean())/iris_data_x.std()

iris_data[['caseno','SepalLength','SepalWidth','PetalLength','PetalWidth']] = iris_data_x
iris_data['Species'] = iris_data_y

train_iris_data = iris_data.sample(frac=0.8, random_state=42)
test_iris_data = iris_data.drop(train_iris_data.index)

# 데이터셋 제작

In [3]:
class flower_dataset():
    def __init__(self, phase, train_iris_data, test_iris_data):
        if phase == 'train':
            self.phase_data = train_iris_data
        elif phase == 'test':
            self.phase_data = test_iris_data

    def __len__(self):
        return len(self.phase_data)
    
    def x_y_data_collect(self, data):
        x_data = data.drop('Species').values
        y_data = data['Species'].astype(int)

        return x_data, y_data

    def __getitem__(self, index):
        
        x_data,y_data = self.x_y_data_collect(self.phase_data.iloc[index])
        x_data = torch.FloatTensor(x_data)
        
        y_data = torch.tensor(y_data, dtype=torch.long)

        return x_data, y_data



In [4]:
def collate_fn(batch):
    x_data_list = []
    y_data_list = []

    for a, b in batch:
        x_data_list.append(a)
        y_data_list.append(b)
    
    x_data_list = torch.stack(x_data_list)
    y_data_list = torch.stack(y_data_list)

    return x_data_list, y_data_list

In [5]:
def build_dataloader(train_batch_size, val_batch_size):
    dataloaders = {}

    train_data_set = flower_dataset(phase='train', train_iris_data=train_iris_data, test_iris_data=test_iris_data)
    test_data_set = flower_dataset(phase='test', train_iris_data=train_iris_data, test_iris_data=test_iris_data)
    
    dataloaders['train'] = DataLoader(train_data_set, batch_size= train_batch_size, shuffle=True,collate_fn=collate_fn,num_workers=8, pin_memory=True, drop_last=True)
    dataloaders['val'] = DataLoader(test_data_set, batch_size=val_batch_size, shuffle=True, collate_fn=collate_fn,num_workers=8, pin_memory=True, drop_last=True)

    return dataloaders

In [6]:
# dataloaders = build_dataloader(train_batch_size=10,val_batch_size =5)
# temp = iter(dataloaders['train'])
# temp2 = next(temp)

In [7]:
# dataloaders = build_dataloader(train_batch_size=3,val_batch_size =5)
# for index, batch in enumerate(dataloaders['train']):
#     print(index)
#     print("AAAAAAAAAAAAAAA")
#     print(batch[1])
#     break

# 모델

In [8]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.input_layer = nn.Linear(5,16)
        self.hidden_layer1 = nn.Linear(16, 32)
        self.output_layer = nn.Linear(32, 3)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        out = self.relu(self.input_layer(x))
        out = self.relu(self.hidden_layer1(out))
        out = self.output_layer(out)
        return out


# 학습 함수

In [9]:
def train_one_epoch(dataloaders, model, optimizer, device):
    total_loss = {'train': 0, 'val':0}
    loss = 0.0

    for phase in ['train', 'val']:
        running_loss = 0.0

        if phase =='train':
            model.train()
        elif phase =='val':
            model.eval()
        
        for index, batch in enumerate(dataloaders[phase]):
            x_data = batch[0].to(device)
            y_data = batch[1].to(device)

            with torch.set_grad_enabled(phase == 'train'):
                predict = model(x_data)

                loss = F.cross_entropy(predict, y_data, reduction='mean')

                if phase =='train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    
            running_loss += loss.item()
        total_loss[phase] = running_loss/len(dataloaders[phase])
    return total_loss

# 시작전 파라미터및 호출

In [10]:
device = 'cuda:1' if torch.cuda.is_available() else 'cpu'
dataloaders = build_dataloader(train_batch_size=7, val_batch_size=3)
model = NeuralNetwork().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr = 0.0001)
num_epoch = 300

train_loss = []
val_loss = []
best_loss = 100

for epoch in range(num_epoch):
    loss = train_one_epoch(dataloaders=dataloaders, model=model, optimizer=optimizer, device=device)
    train_loss.append(loss['train'])
    val_loss.append(loss['val'])
    print(f"{epoch+1}/{num_epoch}--train_loss : {loss['train']}, val_loss : {loss['val']}")

    if (loss['val']<best_loss):
        best_loss = loss['val']
        best_model = copy.deepcopy(model.state_dict())
        os.makedirs('./trained_model/', exist_ok=True)
        torch.save(best_model, os.path.join('./trained_model/', "bestmodel.pt"), _use_new_zipfile_serialization=False)
    
print(f"bestmodel : {best_loss}")

1/300--train_loss : 1.0725281378802132, val_loss : 1.0427854120731355
2/300--train_loss : 1.0640213805086471, val_loss : 1.0369484305381775
3/300--train_loss : 1.060296020087074, val_loss : 1.0315228164196015
4/300--train_loss : 1.0533619733417736, val_loss : 1.0254252254962921
5/300--train_loss : 1.0454545652165133, val_loss : 1.0195705592632294
6/300--train_loss : 1.0410201935207142, val_loss : 1.0137321054935455
7/300--train_loss : 1.0330483492682963, val_loss : 1.0077040433883666


Exception ignored in: <function _releaseLock at 0x7fb65806c670>
Traceback (most recent call last):
  File "/home/hts/.conda/envs/hts_car/lib/python3.8/logging/__init__.py", line 227, in _releaseLock
    def _releaseLock():
KeyboardInterrupt: 


8/300--train_loss : 1.0294412514742683, val_loss : 1.0017593383789063
9/300--train_loss : 1.022147304871503, val_loss : 0.9958597898483277
10/300--train_loss : 1.016039886895348, val_loss : 0.9891881287097931
11/300--train_loss : 1.0105285013423246, val_loss : 0.9834442138671875
12/300--train_loss : 1.0043346987051123, val_loss : 0.9765957534313202
13/300--train_loss : 0.9945420763071846, val_loss : 0.9697878539562226
14/300--train_loss : 0.9886540244607365, val_loss : 0.9628623187541961
15/300--train_loss : 0.9812769363908207, val_loss : 0.955944961309433
16/300--train_loss : 0.972610196646522, val_loss : 0.948851215839386
17/300--train_loss : 0.9654985561090357, val_loss : 0.9412339150905609
18/300--train_loss : 0.9573509307468638, val_loss : 0.9335070013999939
19/300--train_loss : 0.9489740799455082, val_loss : 0.9259948909282685
20/300--train_loss : 0.9417938905603745, val_loss : 0.9183698654174804
21/300--train_loss : 0.9329359636587256, val_loss : 0.9105330348014832
22/300--train

# 테스트

In [None]:
test_model = NeuralNetwork()
test_model.load_state_dict(torch.load('./trained_model/bestmodel.pt'))
test_model.eval()

total_score = 0
test_count = 0

for index, batch in enumerate(dataloaders['val']):
    x_data = batch[0]
    y_data = batch[1]

    pred = test_model(x_data)
    pred = torch.argmax(pred, dim=1)
    
    for i,j in zip(pred,y_data):
        test_count +=1
        if i==j:
            total_score += 1

    
print(total_score/test_count)

    

0.9333333333333333
