In [None]:
import numpy as np
import random
import os
import math
import argparse
import time
import matplotlib.pyplot as plt

from glob import glob
import pandas as pd
import cv2 as cv
from tqdm.auto import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torch import optim

import torchvision.models as models
from torchvision import transforms

import gzip
import pickle

In [None]:
%cd "/content/drive/MyDrive/Colab Notebooks/DACON_PLANT"

/content/drive/MyDrive/Colab Notebooks/DACON_PLANT


In [None]:
#train의 leaf ratio
train_ratio = pd.read_csv('xgboost/area_ratio_test.txt', header=None)
train_ratio.head(50)

Unnamed: 0,0
0,001 0.148
1,002 0.537
2,003 0.016
3,004 0.151
4,005 0.167
5,006 0.164
6,007 0.081
7,008 0.171
8,009 0.004
9,010 0.182


In [None]:
#meta data
with gzip.open("./meta.score", 'rb') as f:
	meta = pickle.load(f)

In [None]:
#순서 data
with gzip.open("./order.score", 'rb') as f:
  order = pickle.load(f)

In [None]:
order_dict = dict()

for i in range(len(order)):
  order_dict[i] = order[i]

order_dict

{0: 53,
 1: 31,
 2: 7,
 3: 44,
 4: 13,
 5: 52,
 6: 43,
 7: 8,
 8: 71,
 9: 74,
 10: 69,
 11: 32,
 12: 63,
 13: 56,
 14: 3,
 15: 41,
 16: 61,
 17: 72,
 18: 51,
 19: 67,
 20: 73,
 21: 27,
 22: 49,
 23: 22,
 24: 29,
 25: 21,
 26: 65,
 27: 60,
 28: 48,
 29: 64,
 30: 59,
 31: 11,
 32: 9,
 33: 16,
 34: 58,
 35: 46,
 36: 68,
 37: 54,
 38: 42,
 39: 23,
 40: 20,
 41: 55,
 42: 34,
 43: 75,
 44: 50,
 45: 26,
 46: 24,
 47: 2,
 48: 6,
 49: 1,
 50: 30,
 51: 14,
 52: 40,
 53: 15,
 54: 36,
 55: 38,
 56: 70,
 57: 37,
 58: 4,
 59: 45,
 60: 35,
 61: 66,
 62: 47,
 63: 33,
 64: 62,
 65: 19,
 66: 28,
 67: 17,
 68: 12,
 69: 5,
 70: 25,
 71: 39,
 72: 57,
 73: 18,
 74: 10}

In [None]:
weight_label = [0 for _ in range(len(meta))]

for i in range(len(meta)):
  #meta는 뒤죽박죽, leaf ratio는 정렬되었으므로 weight_label을 정렬해야함.
  # order_dict[0] = 53, meta[0] = CASE_53
  weight_label[order_dict[i]-1] = np.array(meta[i].loc[18])


In [None]:
segmentation = list()
for i in range(len(train_ratio)):
  segmentation.append(float(train_ratio[0][i].split(' ')[1]))

In [None]:
index = 0
segmentation = list()
train_df = list()
for i in range(len(weight_label)):
  #각 CASE 별 length
  length = len(weight_label[i])
  label = weight_label[i]
  
  area = list()
  for j in range(index, index+length):
    area.append(float(train_ratio[0][j].split(' ')[1]))
    segmentation.append(float(train_ratio[0][j].split(' ')[1]))
  index += length
  area = np.array(area)

  df = pd.DataFrame({'area' : area, 'label' : label})

  train_df.append(df)

print(index)

1592


In [None]:
with gzip.open("./xgboost/segmentation_test.score", 'w') as f:
  pickle.dump(segmentation, f)

In [None]:
meta[14].loc[18]

0    11.337
0    15.728
0    21.492
0    27.091
0    32.278
Name: 18, dtype: object

In [None]:
train_df = np.array(train_df).flatten()

# 면적-중량 mlp

In [None]:
#train_df 순서는 CASE 순서와 같음.
for i in range(len(train_df)):
  if i == 0:
    total = train_df[0]
  else:
    total = pd.concat([total, train_df[i]])

len(total)

1592

In [None]:
total = total.reset_index(drop = True)

In [None]:
train = total[:1592]
train = train.reset_index(drop = True)
dev = total[1200:]
dev = dev.reset_index(drop=True)

In [None]:
dev

Unnamed: 0,area,label
0,0.329,183.925
1,0.352,200.585
2,0.376,214.632
3,0.395,222.381
4,0.405,227.741
...,...,...
387,0.393,211.497
388,0.395,214.116
389,0.398,214.293
390,0.370,210.872


In [None]:
batch_size = 1

In [None]:
class LeafDataset(Dataset):
  
  def __init__(self, area, label, train_mode=True):
     self.area = area
     self.label = label
     self.train_mode = train_mode

  def __len__(self):
    return len(self.area)

  def __getitem__(self, idx):

    if self.train_mode == True:
      area = self.area[idx]
      label = self.label[idx]
      area = torch.tensor(area, dtype=torch.float32)
      label = torch.tensor(label, dtype=torch.float32)
      return dict(area=area, label=label)
      
    else:
      area = self.area[idx]
      area = torch.tensor(area, dtype=torch.float32)
      return dict(area=area)

In [None]:
train_dataset = LeafDataset(np.array(train['area']), np.array(train['label']))
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle = False, num_workers=0)

In [None]:
dev_dataset = LeafDataset(np.array(dev['area']), np.array(dev['label']))
dev_loader = DataLoader(dev_dataset, batch_size=batch_size, shuffle = False, num_workers=0)

In [None]:
for batch in dev_loader:
  print(batch)

# Model

In [None]:
epochs = 50
learning_rate = 0.0005

#epochs = 50
#learning_rate = 0.0002

In [None]:
class Regressor(nn.Module):

  def __init__(self):
    super().__init__()

    self.fc1 = nn.Linear(1, 3)
    self.fc2 = nn.Linear(3, 3) 
    self.fc3 = nn.Linear(3, 1)
    self.dropout = nn.Dropout(0.2)
    

  def forward(self, x):
    x = F.relu(self.fc1(x))
    x = self.dropout(F.relu(self.fc2(x)))
    x = self.fc3(x)

    return x

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

In [None]:
seed_everything(42)

In [None]:
model = Regressor()
model = model.cuda()
criterion = nn.MSELoss()
scheduler = None
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-7)

In [None]:
def train(model, optimizer, train_loader, def_loader, scheduler, device):
  
  best_loss = 9999

  for epoch in range(epochs):
    model.train()
    train_loss = []
    for batch in tqdm(iter(train_loader)):

      area, label = batch['area'].float().to(device), batch['label'].float().to(device)

      optimizer.zero_grad()

      logit = model(area)
      loss = criterion(logit, label)

      loss.backward()
      optimizer.step()

      train_loss.append(loss.item())

    if scheduler is not None:
      scheduler.step()

    dev_loss = validation(model, dev_loader, criterion, device)

    print(f'Epoch [{epoch}] Train LOSS : [{np.mean(train_loss):.5f}] Validation LOSS : [{dev_loss:.5f}]\n')
    
    if best_loss > dev_loss:
      best_loss = dev_loss
      torch.save(model.state_dict(), './saved/best_model.pth')
      print('Model Saved')

def validation(model, dev_loader, criterion, device):
  model.eval()
  dev_loss = []
  with torch.no_grad():
    for batch in tqdm(iter(dev_loader)):
      area, label = batch['area'].float().to(device), batch['label'].float().to(device)

      logit = model(area)
      loss = criterion(logit, label)

      dev_loss.append(loss.item())

  dev_mean_loss = np.mean(dev_loss)
  return dev_mean_loss

In [None]:
train(model, optimizer, train_loader, dev_loader, scheduler, device)

  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [0] Train LOSS : [16987.87311] Validation LOSS : [15569.82739]



  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [1] Train LOSS : [16051.90617] Validation LOSS : [13741.68279]



  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [2] Train LOSS : [13614.32421] Validation LOSS : [10793.16670]



  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [3] Train LOSS : [11223.68115] Validation LOSS : [8699.66435]

Model Saved


  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [4] Train LOSS : [10327.20241] Validation LOSS : [7652.88779]

Model Saved


  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [5] Train LOSS : [9322.52820] Validation LOSS : [7054.80426]

Model Saved


  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [6] Train LOSS : [9113.45317] Validation LOSS : [6672.06074]

Model Saved


  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [7] Train LOSS : [8663.36204] Validation LOSS : [6363.24380]

Model Saved


  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [8] Train LOSS : [8308.34549] Validation LOSS : [6049.07222]

Model Saved


  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [9] Train LOSS : [8031.73724] Validation LOSS : [5754.61304]

Model Saved


  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [10] Train LOSS : [7876.05197] Validation LOSS : [5438.39055]

Model Saved


  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [11] Train LOSS : [7581.07892] Validation LOSS : [5095.75268]

Model Saved


  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [12] Train LOSS : [6832.76024] Validation LOSS : [4719.70058]

Model Saved


  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [13] Train LOSS : [6612.53007] Validation LOSS : [4318.93079]

Model Saved


  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [14] Train LOSS : [5958.26018] Validation LOSS : [3869.28059]

Model Saved


  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [15] Train LOSS : [5558.63689] Validation LOSS : [3425.41399]

Model Saved


  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [16] Train LOSS : [5044.23201] Validation LOSS : [2932.80244]

Model Saved


  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [17] Train LOSS : [4313.91174] Validation LOSS : [2460.24619]

Model Saved


  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [18] Train LOSS : [3926.44836] Validation LOSS : [1998.40361]

Model Saved


  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [19] Train LOSS : [3453.91954] Validation LOSS : [1548.97667]

Model Saved


  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [20] Train LOSS : [3259.77559] Validation LOSS : [1188.90867]

Model Saved


  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [21] Train LOSS : [2500.26299] Validation LOSS : [917.35135]

Model Saved


  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [22] Train LOSS : [2635.70249] Validation LOSS : [701.83975]

Model Saved


  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [23] Train LOSS : [2617.85776] Validation LOSS : [596.86131]

Model Saved


  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [24] Train LOSS : [2121.37722] Validation LOSS : [537.45733]

Model Saved


  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [25] Train LOSS : [2460.64143] Validation LOSS : [509.26978]

Model Saved


  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [26] Train LOSS : [2337.55762] Validation LOSS : [442.77228]

Model Saved


  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [27] Train LOSS : [2128.60443] Validation LOSS : [447.31034]



  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [28] Train LOSS : [2086.22100] Validation LOSS : [456.65622]



  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [29] Train LOSS : [1937.80181] Validation LOSS : [401.67775]

Model Saved


  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [30] Train LOSS : [2330.77613] Validation LOSS : [394.15633]

Model Saved


  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [31] Train LOSS : [2390.99806] Validation LOSS : [412.62341]



  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [32] Train LOSS : [1972.08834] Validation LOSS : [371.47187]

Model Saved


  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [33] Train LOSS : [2373.18528] Validation LOSS : [414.26665]



  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [34] Train LOSS : [2259.45874] Validation LOSS : [417.67806]



  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [35] Train LOSS : [2008.56628] Validation LOSS : [422.25874]



  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [36] Train LOSS : [2162.95879] Validation LOSS : [395.93692]



  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [37] Train LOSS : [2357.23895] Validation LOSS : [400.13128]



  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [38] Train LOSS : [2352.63318] Validation LOSS : [403.96947]



  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [39] Train LOSS : [2081.57849] Validation LOSS : [358.03703]

Model Saved


  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [40] Train LOSS : [1992.07292] Validation LOSS : [416.08625]



  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [41] Train LOSS : [2028.27279] Validation LOSS : [463.19936]



  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [42] Train LOSS : [2060.07803] Validation LOSS : [455.08382]



  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [43] Train LOSS : [2279.46764] Validation LOSS : [426.22498]



  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [44] Train LOSS : [2842.73826] Validation LOSS : [429.62077]



  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [45] Train LOSS : [2050.95599] Validation LOSS : [417.72847]



  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [46] Train LOSS : [2285.13416] Validation LOSS : [425.75595]



  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [47] Train LOSS : [2339.79312] Validation LOSS : [398.49340]



  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [48] Train LOSS : [1809.42326] Validation LOSS : [413.42261]



  0%|          | 0/1592 [00:00<?, ?it/s]

  0%|          | 0/392 [00:00<?, ?it/s]

Epoch [49] Train LOSS : [2136.44647] Validation LOSS : [420.54401]



# Test

In [None]:
#test leaf area data

#순서 뒤죽박죽
leaf_area =  pd.read_csv('leaf_area.csv', header=None)
test = list()
order = list()
for i in range(len(leaf_area)):
  test.append(float(leaf_area[0][i].split(' ')[1]))
  order.append(int(leaf_area[0][i].split(' ')[0]))

In [None]:
#순서 원래대로 맞춰주기
order_dict = dict()
for i in range(len(order)):
  order_dict[i] = order[i]

test_sorted = [0 for _ in range(len(test))]
for i in range(len(test)):
  test_sorted[order[i]-1] = test[i]


In [None]:
label = []
test_dataset = LeafDataset(np.array(np.array(test_sorted)), label, train_mode=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle = False, num_workers=0)

In [None]:
print(len(test_loader))

460


In [None]:
def evaluate(model, test_loader, device):
  model.eval()
  prediction = []
  with torch.no_grad():
    for batch in tqdm(iter(test_loader)):
      area = batch['area'].float().to(device)

      logit = model(area)
      #loss = criterion(logit, label)

      prediction.append(logit.detach().cpu().item())

  return prediction

In [None]:
prediction = evaluate(model, test_loader, device)

  0%|          | 0/460 [00:00<?, ?it/s]

In [None]:
len(prediction)

460

In [None]:
with gzip.open("./weight.score", 'w') as f:
  pickle.dump(prediction, f)

In [None]:
with gzip.open("./weight.score", 'rb') as f:
  weight = pickle.load(f)
len(weight)

460