In [1]:
%cd "/content/drive/MyDrive/Colab Notebooks/DACON_PLANT"

/content/drive/MyDrive/Colab Notebooks/DACON_PLANT


In [23]:
import numpy as np
import random
import os
import math
import argparse
import time
import matplotlib.pyplot as plt

from glob import glob
import pandas as pd
import cv2
from tqdm.auto import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset

import torchvision.models as models
from torchvision import transforms
import gzip
import pickle

In [None]:
def get_label_data(data_dir):
    img_path_list = []
    label_list = []
    for case_name in tqdm(os.listdir(data_dir), desc='train_data'):
        current_path = os.path.join(data_dir, case_name)
        print(current_path)
        if os.path.isdir(current_path):
            # get image path
            #img_path_list.extend(glob(os.path.join(current_path, 'image', '*.jpg')))
            #img_path_list.extend(glob(os.path.join(current_path, 'image', '*.png')))
            
            # get label
            label_df = pd.read_csv(current_path+'/label.csv')
            label_list.append(label_df['leaf_weight'])
                
    return img_path_list, label_list

In [None]:
img_path, label = get_label_data('./dataset/train')

train_data:   0%|          | 0/75 [00:00<?, ?it/s]

./dataset/train/CASE53
./dataset/train/CASE31
./dataset/train/CASE07
./dataset/train/CASE44
./dataset/train/CASE13
./dataset/train/CASE52
./dataset/train/CASE43
./dataset/train/CASE08
./dataset/train/CASE71
./dataset/train/CASE74
./dataset/train/CASE69
./dataset/train/CASE32
./dataset/train/CASE63
./dataset/train/CASE56
./dataset/train/CASE03
./dataset/train/CASE41
./dataset/train/CASE61
./dataset/train/CASE72
./dataset/train/CASE51
./dataset/train/CASE67
./dataset/train/CASE73
./dataset/train/CASE27
./dataset/train/CASE49
./dataset/train/CASE22
./dataset/train/CASE29
./dataset/train/CASE21
./dataset/train/CASE65
./dataset/train/CASE60
./dataset/train/CASE48
./dataset/train/CASE64
./dataset/train/CASE59
./dataset/train/CASE11
./dataset/train/CASE09
./dataset/train/CASE16
./dataset/train/CASE58
./dataset/train/CASE46
./dataset/train/CASE68
./dataset/train/CASE54
./dataset/train/CASE42
./dataset/train/CASE23
./dataset/train/CASE20
./dataset/train/CASE55
./dataset/train/CASE34
./dataset/t

In [None]:
def get_meta_data(data_dir):
    img_path_list = []
    meta_list = []
    for case_name in tqdm(os.listdir(data_dir), desc='train_data'):
        #current_path ./dataset/train/CASE53
        current_path = os.path.join(data_dir, case_name, 'meta')
        print(current_path)
        if os.path.isdir(current_path):
            for case_num in tqdm(os.listdir(current_path), desc='meta_data'):
            # get label
                open_path = os.path.join(current_path, case_num)
                df = pd.read_csv(open_path)

                #각 이미지마다 nan 없는 하나만 뽑기
                df = df.drop(['청색광추정광량'], axis=1)
                df = df.dropna(axis=0)
                df = df.reset_index(drop=True)

                #빈 df일 경우 0으로 대체
                if len(df) == 0:
                    df.loc[0] = [0 for _ in range(18)]
                
                #첫번째일 경우
                if case_num[-6:-4] == '01':
                    meta = pd.DataFrame(df.iloc[0])
                else:
                    meta = pd.concat([meta, df.iloc[0]], axis = 1)

            meta_list.append(meta)


    return meta_list

In [None]:
meta = get_meta_data('./dataset/train')

train_data:   0%|          | 0/75 [00:00<?, ?it/s]

./dataset/train/CASE53/meta


meta_data:   0%|          | 0/27 [00:00<?, ?it/s]

./dataset/train/CASE31/meta


meta_data:   0%|          | 0/9 [00:00<?, ?it/s]

./dataset/train/CASE07/meta


meta_data:   0%|          | 0/9 [00:00<?, ?it/s]

./dataset/train/CASE44/meta


meta_data:   0%|          | 0/27 [00:00<?, ?it/s]

./dataset/train/CASE13/meta


meta_data:   0%|          | 0/41 [00:00<?, ?it/s]

./dataset/train/CASE52/meta


meta_data:   0%|          | 0/27 [00:00<?, ?it/s]

./dataset/train/CASE43/meta


meta_data:   0%|          | 0/28 [00:00<?, ?it/s]

./dataset/train/CASE08/meta


meta_data:   0%|          | 0/11 [00:00<?, ?it/s]

./dataset/train/CASE71/meta


meta_data:   0%|          | 0/32 [00:00<?, ?it/s]

./dataset/train/CASE74/meta


meta_data:   0%|          | 0/27 [00:00<?, ?it/s]

./dataset/train/CASE69/meta


meta_data:   0%|          | 0/22 [00:00<?, ?it/s]

./dataset/train/CASE32/meta


meta_data:   0%|          | 0/26 [00:00<?, ?it/s]

./dataset/train/CASE63/meta


meta_data:   0%|          | 0/24 [00:00<?, ?it/s]

./dataset/train/CASE56/meta


meta_data:   0%|          | 0/27 [00:00<?, ?it/s]

./dataset/train/CASE03/meta


meta_data:   0%|          | 0/5 [00:00<?, ?it/s]

./dataset/train/CASE41/meta


meta_data:   0%|          | 0/27 [00:00<?, ?it/s]

./dataset/train/CASE61/meta


meta_data:   0%|          | 0/28 [00:00<?, ?it/s]

./dataset/train/CASE72/meta


meta_data:   0%|          | 0/10 [00:00<?, ?it/s]

./dataset/train/CASE51/meta


meta_data:   0%|          | 0/28 [00:00<?, ?it/s]

./dataset/train/CASE67/meta


meta_data:   0%|          | 0/28 [00:00<?, ?it/s]

./dataset/train/CASE73/meta


meta_data:   0%|          | 0/34 [00:00<?, ?it/s]

./dataset/train/CASE27/meta


meta_data:   0%|          | 0/5 [00:00<?, ?it/s]

./dataset/train/CASE49/meta


meta_data:   0%|          | 0/38 [00:00<?, ?it/s]

./dataset/train/CASE22/meta


meta_data:   0%|          | 0/11 [00:00<?, ?it/s]

./dataset/train/CASE29/meta


meta_data:   0%|          | 0/8 [00:00<?, ?it/s]

./dataset/train/CASE21/meta


meta_data:   0%|          | 0/8 [00:00<?, ?it/s]

./dataset/train/CASE65/meta


meta_data:   0%|          | 0/2 [00:00<?, ?it/s]

./dataset/train/CASE60/meta


meta_data:   0%|          | 0/34 [00:00<?, ?it/s]

./dataset/train/CASE48/meta


meta_data:   0%|          | 0/2 [00:00<?, ?it/s]

./dataset/train/CASE64/meta


meta_data:   0%|          | 0/23 [00:00<?, ?it/s]

./dataset/train/CASE59/meta


meta_data:   0%|          | 0/33 [00:00<?, ?it/s]

./dataset/train/CASE11/meta


meta_data:   0%|          | 0/41 [00:00<?, ?it/s]

./dataset/train/CASE09/meta


meta_data:   0%|          | 0/9 [00:00<?, ?it/s]

./dataset/train/CASE16/meta


meta_data:   0%|          | 0/4 [00:00<?, ?it/s]

./dataset/train/CASE58/meta


meta_data:   0%|          | 0/33 [00:00<?, ?it/s]

./dataset/train/CASE46/meta


meta_data:   0%|          | 0/14 [00:00<?, ?it/s]

./dataset/train/CASE68/meta


meta_data:   0%|          | 0/28 [00:00<?, ?it/s]

./dataset/train/CASE54/meta


meta_data:   0%|          | 0/28 [00:00<?, ?it/s]

./dataset/train/CASE42/meta


meta_data:   0%|          | 0/28 [00:00<?, ?it/s]

./dataset/train/CASE23/meta


meta_data:   0%|          | 0/9 [00:00<?, ?it/s]

./dataset/train/CASE20/meta


meta_data:   0%|          | 0/12 [00:00<?, ?it/s]

./dataset/train/CASE55/meta


meta_data:   0%|          | 0/28 [00:00<?, ?it/s]

./dataset/train/CASE34/meta


meta_data:   0%|          | 0/27 [00:00<?, ?it/s]

./dataset/train/CASE75/meta


meta_data:   0%|          | 0/2 [00:00<?, ?it/s]

./dataset/train/CASE50/meta


meta_data:   0%|          | 0/28 [00:00<?, ?it/s]

./dataset/train/CASE26/meta


meta_data:   0%|          | 0/47 [00:00<?, ?it/s]

./dataset/train/CASE24/meta


meta_data:   0%|          | 0/9 [00:00<?, ?it/s]

./dataset/train/CASE02/meta


meta_data:   0%|          | 0/11 [00:00<?, ?it/s]

./dataset/train/CASE06/meta


meta_data:   0%|          | 0/11 [00:00<?, ?it/s]

./dataset/train/CASE01/meta


meta_data:   0%|          | 0/9 [00:00<?, ?it/s]

./dataset/train/CASE30/meta


meta_data:   0%|          | 0/11 [00:00<?, ?it/s]

./dataset/train/CASE14/meta


meta_data:   0%|          | 0/41 [00:00<?, ?it/s]

./dataset/train/CASE40/meta


meta_data:   0%|          | 0/27 [00:00<?, ?it/s]

./dataset/train/CASE15/meta


meta_data:   0%|          | 0/11 [00:00<?, ?it/s]

./dataset/train/CASE36/meta


meta_data:   0%|          | 0/27 [00:00<?, ?it/s]

./dataset/train/CASE38/meta


meta_data:   0%|          | 0/28 [00:00<?, ?it/s]

./dataset/train/CASE70/meta


meta_data:   0%|          | 0/24 [00:00<?, ?it/s]

./dataset/train/CASE37/meta


meta_data:   0%|          | 0/27 [00:00<?, ?it/s]

./dataset/train/CASE04/meta


meta_data:   0%|          | 0/27 [00:00<?, ?it/s]

./dataset/train/CASE45/meta


meta_data:   0%|          | 0/27 [00:00<?, ?it/s]

./dataset/train/CASE35/meta


meta_data:   0%|          | 0/27 [00:00<?, ?it/s]

./dataset/train/CASE66/meta


meta_data:   0%|          | 0/1 [00:00<?, ?it/s]

./dataset/train/CASE47/meta


meta_data:   0%|          | 0/11 [00:00<?, ?it/s]

./dataset/train/CASE33/meta


meta_data:   0%|          | 0/25 [00:00<?, ?it/s]

./dataset/train/CASE62/meta


meta_data:   0%|          | 0/28 [00:00<?, ?it/s]

./dataset/train/CASE19/meta


meta_data:   0%|          | 0/5 [00:00<?, ?it/s]

./dataset/train/CASE28/meta


meta_data:   0%|          | 0/10 [00:00<?, ?it/s]

./dataset/train/CASE17/meta


meta_data:   0%|          | 0/3 [00:00<?, ?it/s]

./dataset/train/CASE12/meta


meta_data:   0%|          | 0/42 [00:00<?, ?it/s]

./dataset/train/CASE05/meta


meta_data:   0%|          | 0/23 [00:00<?, ?it/s]

./dataset/train/CASE25/meta


meta_data:   0%|          | 0/20 [00:00<?, ?it/s]

./dataset/train/CASE39/meta


meta_data:   0%|          | 0/28 [00:00<?, ?it/s]

./dataset/train/CASE57/meta


meta_data:   0%|          | 0/27 [00:00<?, ?it/s]

./dataset/train/CASE18/meta


meta_data:   0%|          | 0/11 [00:00<?, ?it/s]

./dataset/train/CASE10/meta


meta_data:   0%|          | 0/42 [00:00<?, ?it/s]

In [None]:
meta_new = meta.copy()

In [None]:
#meta_new = meta
meta = meta_new.copy()

for i in range(75):
    ret_list = list()
    for j in range(len(label[i].tolist())):
        ret_list.append(label[i][j])
      
    meta[i].loc[len(meta[i])] = ret_list

meta[0]

Unnamed: 0,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,...,0.10,0.11,0.12,0.13,0.14,0.15,0.16,0.17,0.18,0.19
시간,2021-06-04 00:00,2021-06-04 08:59,2021-06-05 08:59,2021-06-06 08:59,2021-06-07 08:59,2021-06-08 08:59,2021-06-09 08:59,2021-06-10 08:59,2021-06-11 08:59,2021-06-12 08:59,...,2021-06-20 08:59,2021-06-21 08:59,2021-06-22 08:59,2021-06-23 08:59,2021-06-24 08:59,2021-06-25 08:59,2021-06-26 08:59,2021-06-27 08:59,2021-06-28 09:00,2021-06-29 09:00
내부온도관측치,31.0,29.6,33.400002,27.700001,27.700001,30.6,27.6,30.299999,29.299999,30.6,...,28.5,28.6,27.799999,27.200001,27.200001,27.9,27.9,27.5,27.9,28.299999
외부온도관측치,32.200001,22.299999,32.299999,22.200001,22.6,25.4,24.299999,23.4,26.4,27.799999,...,27.4,27.700001,26.9,26.1,26.299999,27.0,26.799999,26.9,27.299999,27.9
내부습도관측치,58.599998,48.599998,46.799999,56.200001,50.700001,58.900002,50.900002,52.700001,60.5,53.799999,...,59.299999,59.799999,62.200001,60.299999,60.900002,62.200001,65.900002,64.099998,64.099998,63.400002
외부습도관측치,43.0,43.0,24.700001,47.700001,53.599998,44.900002,46.799999,45.200001,56.900002,55.099998,...,48.099998,45.0,46.799999,50.799999,50.5,52.900002,58.400002,53.200001,52.099998,50.599998
CO2관측치,617.0,594.0,619.0,602.0,647.0,611.0,632.0,646.0,599.0,608.0,...,631.0,610.0,602.0,604.0,609.0,643.0,666.0,630.0,615.0,622.0
EC관측치,1.595598,1.598559,1.600533,1.689364,1.665676,1.604481,1.679494,1.635078,1.654819,1.636065,...,1.900585,2.06443,1.672585,0.022319,1.689364,1.692325,1.758455,1.874923,2.075287,0.221647
최근분무량,0.0,769.0,769.0,769.0,769.0,769.0,769.0,769.0,769.0,769.0,...,769.0,762.57,769.0,762.57,769.0,769.0,762.57,769.0,769.0,769.0
화이트 LED동작강도,0.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,...,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0
레드 LED동작강도,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,30.0,30.0,30.0,30.0,30.0,30.0,30.0,30.0,30.0,30.0


In [None]:
from pandas.core.dtypes.cast import maybe_cast_to_datetime
import gzip
import pickle

with gzip.open("./meta.score", mode='w') as out:
        pickle.dump(meta, out)

In [None]:
df = pd.read_csv('dataset/train/CASE31/meta/CASE31_01.csv')

In [None]:
df = df.drop(['청색광추정광량'], axis=1)
df = df.dropna(axis=0)
df = df.reset_index(drop=True)
df

Unnamed: 0,시간,내부온도관측치,외부온도관측치,내부습도관측치,외부습도관측치,CO2관측치,EC관측치,최근분무량,화이트 LED동작강도,레드 LED동작강도,블루 LED동작강도,냉방온도,냉방부하,난방온도,난방부하,총추정광량,백색광추정광량,적색광추정광량


In [None]:
df.loc[0] = [0 for _ in range(18)]
df

Unnamed: 0,시간,내부온도관측치,외부온도관측치,내부습도관측치,외부습도관측치,CO2관측치,EC관측치,최근분무량,화이트 LED동작강도,레드 LED동작강도,블루 LED동작강도,냉방온도,냉방부하,난방온도,난방부하,총추정광량,백색광추정광량,적색광추정광량
0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Test data

In [46]:
#test meta data
def get_meta_data(data_dir):
    order_list = []
    meta_list = []
    for case_name in tqdm(os.listdir(data_dir), desc='train_data'):
        #case_name 418.csv
        
         # get label
        open_path = os.path.join(data_dir, case_name)
        df = pd.read_csv(open_path)

        #각 이미지마다 nan 없는 하나만 뽑기
        df = df.drop(['청색광추정광량'], axis=1)
        df = df.dropna(axis=0)
        df = df.reset_index(drop=True)

        #빈 df일 경우 0으로 대체
        if len(df) == 0:
              df.loc[0] = [0 for _ in range(18)]
                
        meta = pd.DataFrame(df.iloc[0])

        meta_list.append(meta)
        order_list.append(int(case_name[-8:-4]))


    return order_list, meta_list

In [47]:
order_list, meta_data = get_meta_data('./dataset/test/meta')

train_data:   0%|          | 0/460 [00:00<?, ?it/s]

In [50]:
#순서 원래대로 맞춰주기
order_dict = dict()
for i in range(len(order_list)):
  order_dict[i] = order_list[i]

meta_sorted = [0 for _ in range(len(meta_data))]
for i in range(len(meta_data)):
  meta_sorted[order_list[i]-1] = meta_data[i]

In [44]:
with gzip.open("./weight.score", 'rb') as f:
  weight = pickle.load(f)

In [51]:
meta = meta_sorted.copy()

for i in range(len(meta)):
     
    meta[i].loc[len(meta[i])] = weight[i]

meta[0]

Unnamed: 0,0
시간,2021-05-05 08:59
내부온도관측치,24.9
외부온도관측치,22.1
내부습도관측치,51.599998
외부습도관측치,47.299999
CO2관측치,348.0
EC관측치,0.0
최근분무량,4607.57
화이트 LED동작강도,100.0
레드 LED동작강도,10.0


In [52]:
with gzip.open("./meta_test.score", 'w') as f:
  pickle.dump(meta, f)