### 청경채 성장 예측 AI 경진 대회
- 환경 데이터로부터 청경채의 일별 잎면적 증감률 예측하는 AI 알고리즘 개발
- 각 청경채 케이스의 생육 기간동안 1분 간격으로 수집된 환경 데이터로부터 일별 잎면적 증감률을 예측
- 평가 산식 : 100*RMSE

In [1]:
# !pip install koreanize_matplotlib

### 라이브러리 호출

In [2]:
import numpy as np
import pandas as pd
import glob
import seaborn as sns
import matplotlib.pyplot as plt
import datetime
import koreanize_matplotlib

### 데이터 로드

In [3]:
train_input = glob.glob('train_input/*.csv')
train_input.sort()
print(len(train_input))
train_input[:5]

58


['train_input\\CASE_01.csv',
 'train_input\\CASE_02.csv',
 'train_input\\CASE_03.csv',
 'train_input\\CASE_04.csv',
 'train_input\\CASE_05.csv']

In [4]:
train_target = glob.glob('train_target/*.csv')
train_target.sort()
print(len(train_target))
train_target[:5]

58


['train_target\\CASE_01.csv',
 'train_target\\CASE_02.csv',
 'train_target\\CASE_03.csv',
 'train_target\\CASE_04.csv',
 'train_target\\CASE_05.csv']

In [5]:
test_input = glob.glob('test_input/*.csv')
test_input.sort()
print(len(test_input))
test_input[:5]

6


['test_input\\TEST_01.csv',
 'test_input\\TEST_02.csv',
 'test_input\\TEST_03.csv',
 'test_input\\TEST_04.csv',
 'test_input\\TEST_05.csv']

In [6]:
test_target = glob.glob('test_target/*.csv')
test_target.sort()
print(len(test_target))
test_target[:5]

6


['test_target\\TEST_01.csv',
 'test_target\\TEST_02.csv',
 'test_target\\TEST_03.csv',
 'test_target\\TEST_04.csv',
 'test_target\\TEST_05.csv']

### 데이터 확인 Dataset Info.
- train_input- 총 58개 청경채 케이스
    - 각 청경채 케이스 별 환경 데이터 (1분 간격)
- train_target - 총 58개 청경채 케이스
    - rate : 각 청경채 케이스 별 잎 면적 증감률 (1일 간격)
- test_input - 총 6개 청경채 케이스
    - 각 청경채 케이스 별 환경 데이터 (1분 간격)
- test_target - 총 6개 청경채 케이스
  - rate : 각 청경채 케이스 별 잎 면적 증감률 (1일 간격)

In [7]:
pd.options.display.max_columns = None

In [15]:
pd.read_csv(train_input[0]).head()


Unnamed: 0,시간,내부온도관측치,내부습도관측치,CO2관측치,EC관측치,외부온도관측치,외부습도관측치,펌프상태,펌프작동남은시간,최근분무량,일간누적분무량,냉방상태,냉방작동남은시간,난방상태,난방작동남은시간,내부유동팬상태,내부유동팬작동남은시간,외부환기팬상태,외부환기팬작동남은시간,화이트 LED상태,화이트 LED작동남은시간,화이트 LED동작강도,레드 LED상태,레드 LED작동남은시간,레드 LED동작강도,블루 LED상태,블루 LED작동남은시간,블루 LED동작강도,카메라상태,냉방온도,난방온도,기준온도,난방부하,냉방부하,총추정광량,백색광추정광량,적색광추정광량,청색광추정광량
0,2021-02-17 00:00:00,24.799999,40.5,361.0,0.059069,20.299999,33.200001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,201.0,394.0,100.0,201.0,370.0,10.0,201.0,372.0,25.0,0.0,25.0,23.0,24.0,4.5,0.0,0.0,363.554,16.548,37.596,363.554,309.41,16.548,37.596
1,2021-02-17 00:01:00,20.5,44.099998,355.0,0.910134,19.700001,10.7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,201.0,62.0,0.0,201.0,64.0,0.0,201.0,66.0,0.0,0.0,20.0,18.0,19.0,0.0,2.5,0.0,0.0,0.0,0.0
2,2021-02-17 00:02:00,20.6,44.299999,360.0,0.910134,19.799999,10.6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,201.0,179.0,0.0,201.0,182.0,0.0,201.0,184.0,0.0,0.0,20.0,18.0,19.0,0.0,3.000002,0.0,0.0,0.0,0.0
3,2021-02-17 00:03:00,20.6,44.5,359.0,0.908579,19.799999,10.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,201.0,117.0,0.0,201.0,120.0,0.0,201.0,122.0,0.0,0.0,20.0,18.0,19.0,0.0,3.000002,0.0,0.0,0.0,0.0
4,2021-02-17 00:04:00,20.6,44.5,357.0,0.910134,19.799999,10.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,201.0,57.0,0.0,201.0,60.0,0.0,201.0,62.0,0.0,0.0,20.0,18.0,19.0,0.0,3.000002,0.0,0.0,0.0,0.0


In [16]:
pd.read_csv(train_target[0]).head()


Unnamed: 0,시간,rate
0,2021-02-18 00:00:00,0.5
1,2021-02-19 00:00:00,0.66667
2,2021-02-20 00:00:00,0.6
3,2021-02-21 00:00:00,-0.125
4,2021-02-22 00:00:00,1.42857


In [9]:
column_names = []
for i in train_input:
    column_names.extend(set(pd.read_csv(i).columns))
    
column_names = list(set(column_names))
column_names

['난방상태',
 '외부온도관측치',
 '펌프일간누적분무량',
 '냉방온도',
 '내부습도관측치',
 '펌프상태',
 '외부환기팬상태',
 '기준온도',
 '총추정광량',
 '내부온도관측치',
 '냉방상태',
 '난방온도',
 '펌프최근분무량',
 '내부유동팬상태',
 '외부온도추정관측치',
 '외부습도추정관측치',
 '블루 LED상태',
 '외부환기팬작동남은시간',
 '냉방작동남은시간',
 '화이트 LED동작강도',
 '카메라상태',
 '펌프작동남은시간',
 '화이트 LED작동남은시간',
 '일간누적분무량',
 '블루 LED작동남은시간',
 '레드 LED상태',
 'CO2관측치',
 '시간',
 '난방작동남은시간',
 'EC관측치',
 '내부유동팬작동남은시간',
 '냉방부하',
 '청색광추정광량',
 '펌프작동남은시간.1',
 '난방부하',
 '레드 LED작동남은시간',
 '백색광추정광량',
 '적색광추정광량',
 '최근분무량',
 '외부습도관측치',
 '화이트 LED상태',
 '블루 LED동작강도',
 '레드 LED동작강도']

In [41]:
column_counts = [(i, 0) for i in column_names]
column_counts

[('난방온도', 0),
 ('레드 LED작동남은시간', 0),
 ('외부습도추정관측치', 0),
 ('외부온도관측치', 0),
 ('외부온도추정관측치', 0),
 ('내부유동팬작동남은시간', 0),
 ('최근분무량', 0),
 ('시간', 0),
 ('난방작동남은시간', 0),
 ('펌프상태', 0),
 ('화이트 LED동작강도', 0),
 ('내부습도관측치', 0),
 ('청색광추정광량', 0),
 ('블루 LED상태', 0),
 ('블루 LED동작강도', 0),
 ('냉방온도', 0),
 ('기준온도', 0),
 ('냉방상태', 0),
 ('화이트 LED작동남은시간', 0),
 ('레드 LED동작강도', 0),
 ('냉방작동남은시간', 0),
 ('펌프최근분무량', 0),
 ('적색광추정광량', 0),
 ('펌프작동남은시간.1', 0),
 ('냉방부하', 0),
 ('레드 LED상태', 0),
 ('총추정광량', 0),
 ('난방상태', 0),
 ('백색광추정광량', 0),
 ('펌프일간누적분무량', 0),
 ('블루 LED작동남은시간', 0),
 ('카메라상태', 0),
 ('화이트 LED상태', 0),
 ('내부유동팬상태', 0),
 ('난방부하', 0),
 ('EC관측치', 0),
 ('내부온도관측치', 0),
 ('일간누적분무량', 0),
 ('외부환기팬상태', 0),
 ('외부습도관측치', 0),
 ('CO2관측치', 0),
 ('펌프작동남은시간', 0),
 ('외부환기팬작동남은시간', 0)]

In [17]:
column_names = []
for i in train_input:
    column_names.append(set(pd.read_csv(i).columns))

In [18]:
col_len = []
for i in column_names:
    col_len.append(len(i))
print(set(col_len))

{38}


In [19]:
whole_col = []
for i in column_names:
    for j in i:
        whole_col.append(j)

col_cnt = []
for i in set(whole_col):
    col_cnt.append((i,whole_col.count(i)))

col_cnt

[('난방상태', 58),
 ('외부온도관측치', 33),
 ('펌프일간누적분무량', 13),
 ('냉방온도', 58),
 ('내부습도관측치', 58),
 ('펌프상태', 58),
 ('외부환기팬상태', 58),
 ('기준온도', 58),
 ('총추정광량', 58),
 ('내부온도관측치', 58),
 ('냉방상태', 58),
 ('난방온도', 58),
 ('펌프최근분무량', 13),
 ('내부유동팬상태', 58),
 ('외부온도추정관측치', 25),
 ('외부습도추정관측치', 25),
 ('블루 LED상태', 58),
 ('외부환기팬작동남은시간', 58),
 ('냉방작동남은시간', 58),
 ('화이트 LED동작강도', 58),
 ('카메라상태', 58),
 ('펌프작동남은시간', 58),
 ('화이트 LED작동남은시간', 58),
 ('일간누적분무량', 43),
 ('블루 LED작동남은시간', 58),
 ('레드 LED상태', 58),
 ('CO2관측치', 58),
 ('시간', 58),
 ('난방작동남은시간', 58),
 ('EC관측치', 58),
 ('내부유동팬작동남은시간', 58),
 ('냉방부하', 58),
 ('청색광추정광량', 58),
 ('펌프작동남은시간.1', 2),
 ('난방부하', 58),
 ('레드 LED작동남은시간', 58),
 ('백색광추정광량', 58),
 ('적색광추정광량', 58),
 ('최근분무량', 45),
 ('외부습도관측치', 33),
 ('화이트 LED상태', 58),
 ('블루 LED동작강도', 58),
 ('레드 LED동작강도', 58)]

In [20]:
for i, j in col_cnt:
    if j != 58:
        print(i,j)

외부온도관측치 33
펌프일간누적분무량 13
펌프최근분무량 13
외부온도추정관측치 25
외부습도추정관측치 25
일간누적분무량 43
펌프작동남은시간.1 2
최근분무량 45
외부습도관측치 33


In [22]:
for i in train_input:
    if '펌프작동남은시간.1' in (pd.read_csv(i).columns):
        print(i)
        display(pd.read_csv(i))

train_input\CASE_10.csv


Unnamed: 0,시간,내부온도관측치,내부습도관측치,CO2관측치,EC관측치,외부온도관측치,외부습도관측치,펌프상태,펌프작동남은시간,펌프작동남은시간.1,최근분무량,냉방상태,냉방작동남은시간,난방상태,난방작동남은시간,내부유동팬상태,내부유동팬작동남은시간,외부환기팬상태,외부환기팬작동남은시간,화이트 LED상태,화이트 LED작동남은시간,화이트 LED동작강도,레드 LED상태,레드 LED작동남은시간,레드 LED동작강도,블루 LED상태,블루 LED작동남은시간,블루 LED동작강도,카메라상태,냉방온도,난방온도,기준온도,난방부하,냉방부하,총추정광량,백색광추정광량,적색광추정광량,청색광추정광량
0,2021-04-06 00:00:00,19.900000,53.400002,393.0,0.921792,20.200001,31.900000,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,201.0,203.0,0.0,201.0,184.0,0.0,201.0,223.0,0.0,0.0,20.0,18.0,19.0,0.0,0.000000,0.0,0.0,0.0,0.0
1,2021-04-06 00:01:00,20.000000,53.400002,393.0,0.921792,20.200001,31.900000,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,201.0,142.0,0.0,201.0,124.0,0.0,201.0,163.0,0.0,0.0,20.0,18.0,19.0,0.0,0.000000,0.0,0.0,0.0,0.0
2,2021-04-06 00:02:00,20.100000,53.299999,395.0,0.922569,20.200001,31.900000,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,201.0,379.0,0.0,201.0,366.0,0.0,201.0,399.0,0.0,0.0,20.0,18.0,19.0,0.0,0.500002,0.0,0.0,0.0,0.0
3,2021-04-06 00:03:00,20.200001,53.299999,396.0,0.922569,20.200001,31.900000,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,201.0,319.0,0.0,201.0,298.0,0.0,201.0,333.0,0.0,0.0,20.0,18.0,19.0,0.0,1.000004,0.0,0.0,0.0,0.0
4,2021-04-06 00:04:00,20.200001,53.200001,397.0,0.923347,20.200001,31.900000,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,201.0,262.0,0.0,201.0,267.0,0.0,201.0,273.0,0.0,0.0,20.0,18.0,19.0,0.0,1.250000,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60475,2021-05-17 23:55:00,24.200001,84.400002,467.0,0.247158,24.900000,43.599998,0.0,0.0,769.0,10753.14,201.0,198.0,0.0,0.0,0.0,0.0,0.0,0.0,201.0,257.0,0.0,201.0,243.0,0.0,201.0,237.0,0.0,0.0,20.0,18.0,19.0,0.0,21.000004,0.0,0.0,0.0,0.0
60476,2021-05-17 23:56:00,24.200001,84.599998,468.0,0.247158,24.900000,43.599998,0.0,0.0,769.0,10753.14,201.0,139.0,0.0,0.0,0.0,0.0,0.0,0.0,201.0,195.0,0.0,201.0,182.0,0.0,201.0,175.0,0.0,0.0,20.0,18.0,19.0,0.0,21.000004,0.0,0.0,0.0,0.0
60477,2021-05-17 23:57:00,24.100000,84.699997,468.0,0.247936,24.900000,43.599998,0.0,0.0,769.0,10753.14,201.0,77.0,0.0,0.0,0.0,0.0,0.0,0.0,201.0,134.0,0.0,201.0,127.0,0.0,201.0,115.0,0.0,0.0,20.0,18.0,19.0,0.0,20.500002,0.0,0.0,0.0,0.0
60478,2021-05-17 23:58:00,24.200001,84.699997,466.0,0.248713,24.900000,43.599998,0.0,0.0,769.0,10753.14,201.0,198.0,0.0,0.0,0.0,0.0,0.0,0.0,201.0,373.0,0.0,201.0,362.0,0.0,201.0,355.0,0.0,0.0,20.0,18.0,19.0,0.0,21.000004,0.0,0.0,0.0,0.0


train_input\CASE_18.csv


Unnamed: 0,시간,내부온도관측치,내부습도관측치,CO2관측치,EC관측치,외부온도관측치,외부습도관측치,펌프상태,펌프작동남은시간,펌프작동남은시간.1,최근분무량,냉방상태,냉방작동남은시간,난방상태,난방작동남은시간,내부유동팬상태,내부유동팬작동남은시간,외부환기팬상태,외부환기팬작동남은시간,화이트 LED상태,화이트 LED작동남은시간,화이트 LED동작강도,레드 LED상태,레드 LED작동남은시간,레드 LED동작강도,블루 LED상태,블루 LED작동남은시간,블루 LED동작강도,카메라상태,냉방온도,난방온도,기준온도,난방부하,냉방부하,총추정광량,백색광추정광량,적색광추정광량,청색광추정광량
0,2021-06-03 00:00:00,21.9,80.699997,412.0,0.113475,22.799999,57.000000,0.0,0.0,0.00,0.00,201.0,104.0,0.0,0.0,0.0,0.0,0.0,0.0,201.0,283.0,0.0,201.0,285.0,0.0,201.000000,288.0,0.0,0.0,20.0,18.0,19.0,0.0,19.499998,0.0,0.0,0.0,0.0
1,2021-06-03 00:01:00,22.0,80.599998,412.0,0.111921,22.500000,58.299999,0.0,0.0,0.00,0.00,201.0,128.5,0.0,0.0,0.0,0.0,0.0,0.0,201.0,222.0,0.0,201.0,224.0,0.0,201.000000,227.0,0.0,0.0,20.0,18.0,19.0,0.0,20.000000,0.0,0.0,0.0,0.0
2,2021-06-03 00:02:00,22.0,80.500000,417.0,0.111921,22.400000,57.599998,0.0,0.0,0.00,0.00,201.0,153.0,0.0,0.0,0.0,0.0,0.0,0.0,201.0,162.0,0.0,201.0,164.0,0.0,201.000000,167.0,0.0,0.0,20.0,18.0,19.0,0.0,20.000000,0.0,0.0,0.0,0.0
3,2021-06-03 00:03:00,21.9,80.400002,413.0,0.112698,22.299999,57.900002,0.0,0.0,0.00,0.00,201.0,91.0,0.0,0.0,0.0,0.0,0.0,0.0,201.0,100.0,0.0,201.0,102.0,0.0,201.000000,105.0,0.0,0.0,20.0,18.0,19.0,0.0,20.000000,0.0,0.0,0.0,0.0
4,2021-06-03 00:04:00,21.9,80.300003,408.0,0.112698,22.299999,59.200001,0.0,0.0,0.00,0.00,201.0,30.0,0.0,0.0,0.0,0.0,0.0,0.0,201.0,340.0,0.0,201.0,342.0,0.0,201.000000,344.0,0.0,0.0,20.0,18.0,19.0,0.0,19.499998,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60475,2021-07-14 23:55:00,23.1,84.300003,540.0,0.199748,24.200001,48.500000,0.0,0.0,762.57,12021.08,201.0,56.0,0.0,0.0,0.0,0.0,0.0,0.0,201.0,112.0,0.0,201.0,113.0,0.0,201.000000,163.0,0.0,0.0,20.0,18.0,19.0,0.0,25.500002,0.0,0.0,0.0,0.0
60476,2021-07-14 23:56:00,23.1,84.300003,542.0,0.198970,24.200001,47.900002,0.0,0.0,762.57,12021.08,201.0,177.0,0.0,0.0,0.0,0.0,0.0,0.0,201.0,352.0,0.0,201.0,354.0,0.0,201.000000,399.0,0.0,0.0,20.0,18.0,19.0,0.0,25.500002,0.0,0.0,0.0,0.0
60477,2021-07-14 23:57:00,23.1,84.300003,550.0,0.198970,24.200001,48.099998,0.0,0.0,762.57,12021.08,201.0,115.0,0.0,0.0,0.0,0.0,0.0,0.0,201.0,290.0,0.0,201.0,292.0,0.0,201.000000,337.0,0.0,0.0,20.0,18.0,19.0,0.0,25.500002,0.0,0.0,0.0,0.0
60478,2021-07-14 23:58:00,23.1,84.300003,547.0,0.199748,24.200001,47.799999,0.0,0.0,762.57,12021.08,201.0,54.0,0.0,0.0,0.0,0.0,0.0,107.5,201.0,227.0,0.0,201.0,229.0,0.0,201.000000,276.0,0.0,0.0,20.0,18.0,19.0,0.0,25.500002,0.0,0.0,0.0,0.0


In [28]:
CASE_10 = pd.read_csv(train_input[9])
CASE_10[['펌프작동남은시간','펌프작동남은시간.1','최근분무량']]

Unnamed: 0,펌프작동남은시간,펌프작동남은시간.1,최근분무량
0,0.0,0.0,0.00
1,0.0,0.0,0.00
2,0.0,0.0,0.00
3,0.0,0.0,0.00
4,0.0,0.0,0.00
...,...,...,...
60475,0.0,769.0,10753.14
60476,0.0,769.0,10753.14
60477,0.0,769.0,10753.14
60478,0.0,769.0,10753.14


In [29]:
CASE_02 = pd.read_csv(train_input[1])
CASE_02[['펌프작동남은시간','최근분무량','일간누적분무량']]

Unnamed: 0,펌프작동남은시간,최근분무량,일간누적분무량
0,0.0,0.0,0.00
1,0.0,0.0,0.00
2,0.0,0.0,0.00
3,0.0,0.0,0.00
4,0.0,0.0,0.00
...,...,...,...
41755,0.0,769.0,9971.28
41756,0.0,769.0,9971.28
41757,0.0,769.0,9971.28
41758,0.0,769.0,9971.28



결론

-> '펌프작동누적시간.1' 이 있는 데이터들 (case10, case18)의 경우,

펌프작동누적시간.1을 최근분무량으로, 최근분무량을 누적분무량으로 바꾸어주어야 한다.

In [30]:
train_datasets = []

for n, i in enumerate(train_input):
    data = pd.read_csv(i)
    data['case'] = n+1
    if n in [9,17]:
        train_datasets.append(data.rename(columns = {'최근분무량' : '일간누적분무량', '펌프작동남은시간.1' : '최근분무량',
                            '외부온도추정관측치' : '외부온도관측치', '외부습도추정관측치' : '외부습도관측치'}))
    else : 
        train_datasets.append(data.rename(columns = {'펌프일간누적분무량' : '일간누적분무량', '펌프최근분무량' : '최근분무량', 
                                '외부온도추정관측치' : '외부온도관측치', '외부습도추정관측치' : '외부습도관측치'}))
check_col = []
for j in train_datasets:
    check_col.extend(list(j.columns))
    # print(j.shape)
len(set(check_col))


39

### data concat

In [33]:
X = pd.concat(train_datasets, ignore_index = True)
print(X.shape)
X.head()

(2611507, 39)


Unnamed: 0,시간,내부온도관측치,내부습도관측치,CO2관측치,EC관측치,외부온도관측치,외부습도관측치,펌프상태,펌프작동남은시간,최근분무량,일간누적분무량,냉방상태,냉방작동남은시간,난방상태,난방작동남은시간,내부유동팬상태,내부유동팬작동남은시간,외부환기팬상태,외부환기팬작동남은시간,화이트 LED상태,화이트 LED작동남은시간,화이트 LED동작강도,레드 LED상태,레드 LED작동남은시간,레드 LED동작강도,블루 LED상태,블루 LED작동남은시간,블루 LED동작강도,카메라상태,냉방온도,난방온도,기준온도,난방부하,냉방부하,총추정광량,백색광추정광량,적색광추정광량,청색광추정광량,case
0,2021-02-17 00:00:00,24.799999,40.5,361.0,0.059069,20.299999,33.200001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,201.0,394.0,100.0,201.0,370.0,10.0,201.0,372.0,25.0,0.0,25.0,23.0,24.0,4.5,0.0,0.0,363.554,16.548,37.596,363.554,309.41,16.548,37.596,1
1,2021-02-17 00:01:00,20.5,44.099998,355.0,0.910134,19.700001,10.7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,201.0,62.0,0.0,201.0,64.0,0.0,201.0,66.0,0.0,0.0,20.0,18.0,19.0,0.0,2.5,0.0,0.0,0.0,0.0,1
2,2021-02-17 00:02:00,20.6,44.299999,360.0,0.910134,19.799999,10.6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,201.0,179.0,0.0,201.0,182.0,0.0,201.0,184.0,0.0,0.0,20.0,18.0,19.0,0.0,3.000002,0.0,0.0,0.0,0.0,1
3,2021-02-17 00:03:00,20.6,44.5,359.0,0.908579,19.799999,10.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,201.0,117.0,0.0,201.0,120.0,0.0,201.0,122.0,0.0,0.0,20.0,18.0,19.0,0.0,3.000002,0.0,0.0,0.0,0.0,1
4,2021-02-17 00:04:00,20.6,44.5,357.0,0.910134,19.799999,10.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,201.0,57.0,0.0,201.0,60.0,0.0,201.0,62.0,0.0,0.0,20.0,18.0,19.0,0.0,3.000002,0.0,0.0,0.0,0.0,1


In [37]:
target_data = []
for n, i in enumerate(train_target):
    data = pd.read_csv(i)
    data['case'] = n + 1
    target_data.append(data)
y = pd.concat(target_data, ignore_index = True)
print(y.shape)
y.head()

(1813, 3)


Unnamed: 0,시간,rate,case
0,2021-02-18 00:00:00,0.5,1
1,2021-02-19 00:00:00,0.66667,1
2,2021-02-20 00:00:00,0.6,1
3,2021-02-21 00:00:00,-0.125,1
4,2021-02-22 00:00:00,1.42857,1


## EDA

In [40]:
X.isna().sum()

시간                    0
내부온도관측치           73322
내부습도관측치           73322
CO2관측치            73322
EC관측치             73322
외부온도관측치          412858
외부습도관측치          412858
펌프상태              73322
펌프작동남은시간          73322
최근분무량             73322
일간누적분무량           73322
냉방상태              73322
냉방작동남은시간          73322
난방상태              73322
난방작동남은시간          73322
내부유동팬상태           73322
내부유동팬작동남은시간       73322
외부환기팬상태           73322
외부환기팬작동남은시간       73322
화이트 LED상태         73322
화이트 LED작동남은시간     73322
화이트 LED동작강도       73322
레드 LED상태          73322
레드 LED작동남은시간      73322
레드 LED동작강도        73322
블루 LED상태          73322
블루 LED작동남은시간      73322
블루 LED동작강도        73322
카메라상태             73322
냉방온도              73322
난방온도              73322
기준온도              73322
난방부하              73322
냉방부하              73322
총추정광량             73322
백색광추정광량           73322
적색광추정광량           73322
청색광추정광량           73322
case                  0
dtype: int64