In [1]:
import pandas as pd
pd.set_option('mode.chained_assignment', None)
import numpy as np
from numpy import nan
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('dark')

import warnings
warnings.filterwarnings('ignore')

import torch
from torch import nn
from torchvision import models
from torch.utils.data import Dataset
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from tqdm import tqdm

import platform
import sys
import os
import sklearn 
import cv2
import json
from glob import glob

print(f"- os: {platform.platform()}")
print(f"- python: {sys.version}")
print(f"- pandas: {pd.__version__}")
print(f"- numpy: {np.__version__}")
print(f"- sklearn: {sklearn.__version__}")
print(f"- pytorch: {torch.__version__}")
print(f"- opencv: {cv2.__version__}")

- os: Windows-10-10.0.19041-SP0
- python: 3.7.11 (default, Jul 27 2021, 09:42:29) [MSC v.1916 64 bit (AMD64)]
- pandas: 1.3.0
- numpy: 1.19.1
- sklearn: 0.23.2
- pytorch: 1.7.0
- opencv: 4.5.5


In [2]:
from pycaret.classification import * 

# csv분석

In [5]:
TRAIN_PATH = 'C:/git/jonie_github/VISION/_data/train/'
TEST_PATH = 'C:/git/jonie_github/VISION/_data/test/'
DATASET_NAME = 'CROP_DISEASE_5folds'
BATCH_SIZE = 128
EPOCHS = 150
USE_FOLD = True
SEED = 42
NUM_FOLD = 2

FLIP_IMG = True
BLUR_IMG = True
USE_AIHUB_DATA = False

# image resize
IMG_SIZE = 128

In [6]:
train_csv = sorted(glob(TRAIN_PATH +'*/*.csv'))
train_json = sorted(glob(TRAIN_PATH +'*/*.json'))

test_csv = sorted(glob(TEST_PATH +'*/*.csv'))
test_json = sorted(glob(TEST_PATH +'*/*.json'))

In [7]:
def json_data_load(train_json):
    crops = []
    areas = []
    diseases = []
    risks = []
    labels = []
    img_names = []
    parts=[]
    objects = []
    img_w = []
    img_h = []
    grows = []
    img_dirs = []
    for i in range(len(train_json)):
        with open(train_json[i], 'r') as f:
            sample = json.load(f)
            
            crop = sample['annotations']['crop']
            area = sample['annotations']['area']
            disease = sample['annotations']['disease']
            grow = sample['annotations']['grow']
            risk = sample['annotations']['risk']
            label=f"{crop}_{disease}_{risk}"        
            img_name = sample['description']['image'].replace('.jpg','')
            img_dir = train_json[i].replace('.json', '.jpg')
            width = sample['description']['width']
            height = sample['description']['height']

            crops.append(crop)
            areas.append(area)
            diseases.append(disease)
            risks.append(risk)
            labels.append(label)
            img_names.append(img_name)
            img_w.append(width)
            img_h.append(height)
            img_dirs.append(img_dir)
            grows.append(grow)
            r = sample['annotations']['bbox'][0]
            temp = [int(r['x']), int(r['y']), int(r['w']), int(r['h'])]
            objects.append(temp)    
            
            p = []
            
            for part in sample['annotations']['part']:
                p.append([int(part['x']), int(part['y']), int(part['w']), int(part['h'])])
            parts.append(p)

    df = pd.DataFrame({'image':img_names, 'img_w':img_w, 'img_h':img_h, 'crops':crops,'areas':areas,'diseases':diseases,'risks':risks, 
                       'grows':grows, 'bbox':objects, 'parts':parts, 'labels':labels, 'dir':img_dirs})
    df['crop_area'] = df.crops.astype(str) + '_' + df.areas.astype(str)
    df['disease_risk'] = df.diseases.astype(str) + '_' + df.risks.astype(str)

    df['x'] = df.bbox.apply(lambda x: x[0])
    df['y'] = df.bbox.apply(lambda x: x[1])
    df['w'] = df.bbox.apply(lambda x: x[2])
    df['h'] = df.bbox.apply(lambda x: x[3])
    df['xc'] = df.x + df.w.apply(lambda x:x//2)
    df['yc'] = df.y + df.h.apply(lambda x:x//2)
    
    df.dir = df.dir.apply(lambda x: x.replace('\\','/'))
    df = df[['image','crops','areas','diseases','risks','grows','xc','yc','w','h', 'img_w', 'img_h', 'parts', 'dir']]

    return df

In [27]:
from datetime import datetime, timedelta
def get_df_csv(csv_dir):

    df_csv = pd.DataFrame(columns={'image','month','mean_temp', 'high_temp', 'low_temp', 'mean_humi', 'high_humi', 'low_humi',  
                                    'mean_dew', 'high_dew', 'low_dew', 'temp_cond_1','temp_cond_2','temp_cond_3','temp_cond_4',
                                    'humidity_70','humidity_80','humidity_90','humidity_95',
                                    'RH_70','RH_95','RH_100'})

    for i, csv in enumerate(tqdm(csv_dir)):
        data = pd.read_csv(f'{csv}')
        data = data[['측정시각', '내부 온도 1 평균', '내부 온도 1 최고', '내부 온도 1 최저','내부 습도 1 평균', '내부 습도 1 최고',
           '내부 습도 1 최저','내부 이슬점 평균', '내부 이슬점 최고', '내부 이슬점 최저']]
        data = data.drop_duplicates(subset=['측정시각'])
        # data = data.sort_values(by='측정시각')
        data['측정시각'] = data['측정시각'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S'))
        data = data.replace('0', np.nan).dropna()
        data = data.replace('-', np.nan).dropna()
        

        img_name = csv_dir[i].split('/')[-1].replace('.csv','')
        month = data.측정시각.dt.month.mean()
        lenth = len(data)

        mean_temp = data['내부 온도 1 평균'].median()
        high_temp = data['내부 온도 1 최고'].median()
        low_temp = data['내부 온도 1 최저'].median()
        mean_humi = data['내부 습도 1 평균'].median()
        high_humi = data['내부 습도 1 최고'].median()
        low_humi = data['내부 습도 1 최저'].median()
        mean_dew = data['내부 이슬점 평균'].median()
        high_dew = data['내부 이슬점 최고'].median()
        low_dew = data['내부 이슬점 최저'].median()

        temp_cond_1 = len(data[(data['내부 온도 1 평균']>=20)&(data['내부 온도 1 평균']<=25)])/lenth
        temp_cond_2 = len(data[(data['내부 온도 1 최저']<=13)])/lenth
        temp_cond_3 = len(data[(data['내부 온도 1 최고']>=32) & (data['내부 온도 1 최고']<35)])/lenth
        temp_cond_4 = len(data[(data['내부 온도 1 최고']>=35)])/lenth

        humidity_70 = len(data[(data['내부 습도 1 평균']>=70)& (data['내부 습도 1 평균']<80)])/lenth
        humidity_80 = len(data[(data['내부 습도 1 평균']>=80)& (data['내부 습도 1 평균']<90)])/lenth
        humidity_90 = len(data[(data['내부 습도 1 평균']>=90)& (data['내부 습도 1 평균']<95)])/lenth
        humidity_95 = len(data[(data['내부 습도 1 평균']>=95)])/lenth

        TD = data['내부 이슬점 평균'].astype(float)
        T = data['내부 온도 1 평균'].astype(float)
        RH = 100*(np.exp((17.625*TD)/(243.04+TD))/np.exp((17.625*T)/(243.04+T)))

        RH_70 = len(RH[(RH>=70)& (RH<95)])/lenth
        RH_95 = len(RH[(RH>=95)& (RH<100)])/lenth
        RH_100 = len(RH[(RH>=100)])/lenth

        csv_series = pd.Series({'image':img_name, 'month':month,'mean_temp':mean_temp, 'high_temp':high_temp, 'low_temp':low_temp, 
                                'mean_humi':mean_humi, 'high_humi':high_humi, 'low_humi':low_humi,  
                                'mean_dew':mean_dew, 'high_dew':high_dew, 'low_dew':low_dew,
                                'temp_cond_1':temp_cond_1,'temp_cond_2':temp_cond_2,'temp_cond_3':temp_cond_3,'temp_cond_4':temp_cond_4,
                                'humidity_70':humidity_70,'humidity_80':humidity_80,'humidity_90':humidity_90,'humidity_95':humidity_95,
                                 'RH_70':RH_70,'RH_95':RH_95,'RH_100':RH_100})
        df_csv.loc[i] = csv_series
    return df_csv


In [None]:
df_train = json_data_load(train_json)
csv_train = get_df_csv(train_csv)
csv_test = get_df_csv(test_csv)

print(len(csv_train))
print(len(csv_test))
csv_train

100%|██████████████████████████████████████████████████████████████████████████████| 5767/5767 [02:32<00:00, 37.93it/s]
 41%|██████████████████████████████▉                                             | 21132/51906 [09:46<14:28, 35.44it/s]

In [15]:
df_train['label'] = df_train.crops.astype(str) +'_'+ df_train.diseases +'_'+ df_train.risks.astype(str)
train1 = pd.merge(df_train[['image','label']], csv_train, on='image', how='outer')
train2= pd.merge(df_train[['image','crops','label']], csv_train, on='image', how='outer')
train3= pd.merge(df_train[['image','crops','areas','grows','label']], csv_train, on='image', how='outer')

train1 = train1[['image','month','temp_cond_1','temp_cond_2','temp_cond_3','temp_cond_4',
                    'humidity_70','humidity_80','humidity_90','humidity_95',
                     'RH_70','RH_95','RH_100','label']]
train2 = train2[['image','crops','month','temp_cond_1','temp_cond_2','temp_cond_3','temp_cond_4',
                    'humidity_70','humidity_80','humidity_90','humidity_95',
                     'RH_70','RH_95','RH_100','label']]

train3 = train3[['image','crops','areas','grows','month','temp_cond_1','temp_cond_2','temp_cond_3','temp_cond_4',
                    'humidity_70','humidity_80','humidity_90','humidity_95',
                     'RH_70','RH_95','RH_100','label']]
train1

Unnamed: 0,image,month,temp_cond_1,temp_cond_2,temp_cond_3,temp_cond_4,humidity_70,humidity_80,humidity_90,humidity_95,RH_70,RH_95,RH_100,label
0,10027,11.000000,0.000000,0.727891,0.000000,0.000000,0.115646,0.578231,0.064626,0.000000,0.755102,0.000000,0.000000,3_b7_1
1,10037,9.000000,0.292517,0.000000,0.000000,0.000000,0.146259,0.350340,0.210884,0.000000,0.642857,0.064626,0.000000,3_00_0
2,10043,6.000000,0.408163,0.000000,0.000000,0.000000,0.244898,0.251701,0.282313,0.000000,0.741497,0.027211,0.000000,3_00_0
3,10045,9.000000,0.217687,0.000000,0.000000,0.000000,0.081633,0.391156,0.163265,0.000000,0.632653,0.003401,0.000000,3_00_0
4,10063,6.000000,0.408163,0.000000,0.000000,0.000000,0.244898,0.251701,0.282313,0.000000,0.741497,0.027211,0.000000,3_00_0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5762,67640,9.000000,0.707483,0.000000,0.000000,0.000000,0.095238,0.190476,0.374150,0.251701,0.530612,0.391156,0.000000,3_00_0
5763,67644,9.000000,0.204082,0.000000,0.003401,0.000000,0.081633,0.469388,0.010204,0.000000,0.571429,0.000000,0.000000,1_00_0
5764,67647,10.000000,0.214286,0.251701,0.061224,0.037415,0.139456,0.159864,0.166667,0.472789,0.517007,0.384354,0.040816,2_a5_2
5765,67649,10.000000,0.129252,0.000000,0.006803,0.000000,0.071429,0.125850,0.459184,0.010204,0.588435,0.081633,0.000000,1_00_0


In [20]:
train_img_names1 = train1.pop('image')
train_img_names2 = train2.pop('image')
train_img_names3 = train3.pop('image')
test_img_names = csv_test.pop('image')

In [None]:
X_test = csv_test

In [21]:
ignore_features = []
categorical_features = []
numerical_features = ['month','temp_cond_1','temp_cond_2','temp_cond_3','temp_cond_4',
                    'humidity_70','humidity_80','humidity_90','humidity_95',
                     'RH_70','RH_95','RH_100']

In [22]:
clf = setup(data=train1, 
            target='label', 
            ignore_features=ignore_features,           
            categorical_features=categorical_features, 
            numeric_features=numerical_features,
            #remove_outliers=True,
            #outliers_threshold=0.1,
            normalize=True,                            
            #handle_unknown_categorical=True,
#             imputation_type='iterative',              
#             iterative_imputation_iters=10,            
#             categorical_iterative_imputer='lightgbm',
            #polynomial_features=True,
            session_id=SEED, 
            silent=True,
           # profile=True
            ) 

Unnamed: 0,Description,Value
0,session_id,42
1,Target,label
2,Target Type,Multiclass
3,Label Encoded,"1_00_0: 0, 2_00_0: 1, 2_a5_2: 2, 3_00_0: 3, 3_a9_1: 4, 3_a9_2: 5, 3_a9_3: 6, 3_b3_1: 7, 3_b6_1: 8, 3_b7_1: 9, 3_b8_1: 10, 4_00_0: 11, 5_00_0: 12, 5_a7_2: 13, 5_b6_1: 14, 5_b7_1: 15, 5_b8_1: 16, 6_00_0: 17, 6_a11_1: 18, 6_a11_2: 19, 6_a12_1: 20, 6_a12_2: 21, 6_b4_1: 22, 6_b4_3: 23, 6_b5_1: 24"
4,Original Data,"(5767, 13)"
5,Missing Values,False
6,Numeric Features,12
7,Categorical Features,0
8,Ordinal Features,False
9,High Cardinality Features,False


In [23]:
best_models = compare_models(sort='f1', n_select=3, fold=5)

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.948,0.9988,0.8669,0.9431,0.9416,0.9411,0.9415,0.726
et,Extra Trees Classifier,0.9482,0.9971,0.8707,0.9421,0.9415,0.9414,0.9418,0.204
dt,Decision Tree Classifier,0.948,0.9961,0.8703,0.9419,0.9412,0.9411,0.9416,0.016
rf,Random Forest Classifier,0.948,0.9983,0.8667,0.9419,0.9412,0.9411,0.9416,0.226
gbc,Gradient Boosting Classifier,0.9457,0.9971,0.8558,0.9414,0.9395,0.9386,0.939,3.05
knn,K Neighbors Classifier,0.9391,0.9934,0.8291,0.9383,0.9337,0.931,0.9315,0.624
lr,Logistic Regression,0.8516,0.9898,0.7116,0.8533,0.8335,0.8315,0.8329,1.26
svm,SVM - Linear Kernel,0.7852,0.0,0.6052,0.7705,0.7604,0.7549,0.7589,0.042
nb,Naive Bayes,0.6789,0.9826,0.688,0.8106,0.6862,0.6441,0.6572,0.016
lda,Linear Discriminant Analysis,0.6437,0.9664,0.6474,0.6926,0.6427,0.6035,0.6095,0.014


In [24]:
tuned_top3 = [tune_model(i) for i in best_models]
blender = blend_models(tuned_top3)
best_auc_model = automl(optimize = 'F1')
finalized_model = finalize_model(best_auc_model)

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,0.9505,0.9989,0.8836,0.9378,0.9401,0.944,0.9446
1,0.9455,0.999,0.8496,0.9413,0.9389,0.9383,0.9389
2,0.9406,0.9986,0.832,0.9289,0.9326,0.9327,0.9332
3,0.9554,0.9988,0.8505,0.9435,0.9479,0.9495,0.9499
4,0.948,0.999,0.8396,0.9351,0.9399,0.9412,0.9415
5,0.9381,0.9989,0.8704,0.9409,0.9353,0.9299,0.9304
6,0.9578,0.9992,0.8677,0.9522,0.9509,0.9522,0.9528
7,0.9355,0.9986,0.7675,0.9345,0.9259,0.927,0.9279
8,0.9355,0.9986,0.8356,0.9294,0.9288,0.927,0.9273
9,0.9529,0.999,0.8708,0.9464,0.945,0.9466,0.9473


In [104]:
predictions = predict_model(data=test_df_csv, estimator=finalized_model)

In [105]:
predictions

Unnamed: 0,RH_95,humidity_95,RH_70,humidity_70,month,temp_cond_2,temp_cond_1,humidity_80,temp_cond_3,RH_100,temp_cond_4,humidity_90,Label,Score
0,0.206186,0.333333,0.419244,0.175258,7.000000,0.000000,0.316151,0.178694,0.233677,0.134021,0.089347,0.075601,6_00_0,1.0
1,0.000000,0.000000,0.731293,0.122449,10.738095,0.537415,0.064626,0.602041,0.000000,0.000000,0.000000,0.000000,5_00_0,1.0
2,0.000000,0.000000,0.932432,0.175676,10.000000,0.000000,0.324324,0.621622,0.000000,0.000000,0.000000,0.135135,1_00_0,1.0
3,0.003401,0.000000,0.571429,0.095238,9.000000,0.000000,0.183673,0.234694,0.000000,0.000000,0.000000,0.244898,3_00_0,1.0
4,0.000000,0.000000,0.741497,0.119048,10.500000,0.554422,0.064626,0.619048,0.000000,0.000000,0.000000,0.000000,5_00_0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
51901,0.098639,0.003401,0.561224,0.030612,10.000000,0.000000,0.102041,0.122449,0.020408,0.000000,0.000000,0.496599,2_a5_2,1.0
51902,0.000000,0.000000,0.751701,0.098639,11.000000,0.717687,0.000000,0.588435,0.000000,0.000000,0.000000,0.061224,3_b7_1,1.0
51903,0.207483,0.411565,0.193878,0.068027,7.000000,0.000000,0.421769,0.074830,0.047619,0.207483,0.350340,0.051020,6_00_0,1.0
51904,0.469388,0.574830,0.428571,0.112245,10.000000,0.282313,0.088435,0.139456,0.040816,0.061224,0.030612,0.132653,3_00_0,1.0


In [177]:
submission= pd.read_csv('C:/Users/jeong/Projects/dacon/병해진단/data/submission5.csv')

submission

Unnamed: 0,image,label
0,10000,6_00_0
1,10001,5_b6_1
2,10002,4_00_0
3,10003,3_00_0
4,10004,3_b8_1
...,...,...
51901,67673,4_00_0
51902,67674,3_b7_1
51903,67675,6_00_0
51904,67676,2_a5_2


In [171]:
submission.isna().sum()

image     0
label    40
dtype: int64

In [178]:
submission = submission.fillna('-')

submission['crops'] = submission.label.astype(str).apply(lambda x: x[0])
submission['diseases'] = submission.label.astype(str).apply(lambda x: x[2:4])
submission['risks'] = submission.label.astype(str).apply(lambda x: x[-1])
submission

Unnamed: 0,image,label,crops,diseases,risks
0,10000,6_00_0,6,00,0
1,10001,5_b6_1,5,b6,1
2,10002,4_00_0,4,00,0
3,10003,3_00_0,3,00,0
4,10004,3_b8_1,3,b8,1
...,...,...,...,...,...
51901,67673,4_00_0,4,00,0
51902,67674,3_b7_1,3,b7,1
51903,67675,6_00_0,6,00,0
51904,67676,2_a5_2,2,a5,2


In [181]:
len(submission[submission.label=='-'])

40

In [107]:
predictions['crops'] = predictions.Label.apply(lambda x: x.split('_')[0])
predictions['diseases']= predictions.Label.apply(lambda x: x.split('_')[1])
predictions['risks']= predictions.Label.apply(lambda x: x.split('_')[2])

In [108]:
predictions = predictions[['crops','diseases','risks','Label']]
predictions['image']= test_images


In [182]:
predictions

Unnamed: 0,crops,diseases,risks,Label,image
0,6,00,0,6_00_0,10000
1,5,00,0,5_00_0,10001
2,1,00,0,1_00_0,10002
3,3,00,0,3_00_0,10003
4,5,00,0,5_00_0,10004
...,...,...,...,...,...
51901,2,a5,2,2_a5_2,67673
51902,3,b7,1,3_b7_1,67674
51903,6,00,0,6_00_0,67675
51904,3,00,0,3_00_0,67676


In [183]:
submission.image = submission.image.astype(int)
predictions.image = predictions.image.astype(int)

submissions = pd.merge(submission, predictions, on='image',how='outer')
submissions

Unnamed: 0,image,label,crops_x,diseases_x,risks_x,crops_y,diseases_y,risks_y,Label
0,10000,6_00_0,6,00,0,6,00,0,6_00_0
1,10001,5_b6_1,5,b6,1,5,00,0,5_00_0
2,10002,4_00_0,4,00,0,1,00,0,1_00_0
3,10003,3_00_0,3,00,0,3,00,0,3_00_0
4,10004,3_b8_1,3,b8,1,5,00,0,5_00_0
...,...,...,...,...,...,...,...,...,...
51901,67673,4_00_0,4,00,0,2,a5,2,2_a5_2
51902,67674,3_b7_1,3,b7,1,3,b7,1,3_b7_1
51903,67675,6_00_0,6,00,0,6,00,0,6_00_0
51904,67676,2_a5_2,2,a5,2,3,00,0,3_00_0


In [184]:
submissions[submissions.crops_x!=submissions.crops_y]

Unnamed: 0,image,label,crops_x,diseases_x,risks_x,crops_y,diseases_y,risks_y,Label
2,10002,4_00_0,4,00,0,1,00,0,1_00_0
4,10004,3_b8_1,3,b8,1,5,00,0,5_00_0
5,10005,6_00_0,6,00,0,5,00,0,5_00_0
7,10007,5_b7_1,5,b7,1,3,b7,1,3_b7_1
13,10013,3_a9_1,3,a9,1,1,00,0,1_00_0
...,...,...,...,...,...,...,...,...,...
51894,67666,1_00_0,1,00,0,4,00,0,4_00_0
51898,67670,2_00_0,2,00,0,4,00,0,4_00_0
51900,67672,6_00_0,6,00,0,5,00,0,5_00_0
51901,67673,4_00_0,4,00,0,2,a5,2,2_a5_2


In [112]:
submissions[submissions.label!=submissions.Label]

Unnamed: 0,image,label,crops,diseases,risks,Label
1,10001,5_b6_1,5,00,0,5_00_0
2,10002,4_00_0,1,00,0,1_00_0
4,10004,3_b8_1,5,00,0,5_00_0
5,10005,6_00_0,5,00,0,5_00_0
7,10007,5_b7_1,3,b7,1,3_b7_1
...,...,...,...,...,...,...
51898,67670,2_00_0,4,00,0,4_00_0
51899,67671,6_b5_1,6,a11,1,6_a11_1
51900,67672,6_00_0,5,00,0,5_00_0
51901,67673,4_00_0,2,a5,2,2_a5_2


In [81]:
submission['crops'] = submission.label.apply(lambda x: x.split('_')[0])
submission['diseases']= submission.label.apply(lambda x: x.split('_')[1])
submission['risks']= submission.label.apply(lambda x: x.split('_')[2])
submission

AttributeError: 'float' object has no attribute 'split'

In [158]:
# print(f'temp_cond_1:{temp_cond_1}')
# print(f'temp_cond_2:{temp_cond_2}')
# print(f'temp_cond_3:{temp_cond_3}')
# print(f'temp_cond_4:{temp_cond_4}')
# print(f'humidity_70:{humidity_70}')
# print(f'humidity_80:{humidity_80}')
# print(f'humidity_90:{humidity_90}')
# print(f'humidity_95:{humidity_95}')
# print(f'humidity_100:{humidity_100}')
# print(f'RH_70:{RH_70}')
# print(f'RH_95:{RH_95}')
# print(f'RH_100:{RH_100}')
# print(f'month:{month}')

# b= pd.Series({'month':month,'temp_cond_1':temp_cond_1,'temp_cond_2':temp_cond_2,'temp_cond_3':temp_cond_3,'temp_cond_4':temp_cond_4,
#                     'humidity_70':humidity_70,'humidity_80':humidity_80,'humidity_90':humidity_90,'humidity_95':humidity_95,'humidity_100':humidity_100,
#                      'RH_70':RH_70,'RH_95':RH_95,'RH_100':RH_100})

temp_cond_1:0.5136054421768708
temp_cond_2:0.0
temp_cond_3:0.1326530612244898
temp_cond_4:0.0
humidity_70:0.08163265306122448
humidity_80:0.17346938775510204
humidity_90:0.2619047619047619
humidity_95:0.09863945578231292
humidity_100:0.0
RH_70:0.5238095238095238
RH_95:0.06802721088435375
RH_100:0.027210884353741496
month:8


In [22]:
print(np.any([1,1,1]))
print(np.any([1,0,0,1]))

True
True


## 포도노균병
- 균사 생육온도 5∼21℃, 
- 분생포자 발아적온 **20∼22℃ **
- 발병조건 : 고온 다습
- 분생포자 형성 환경 : 기온 12℃, 습도 95∼100%
- 분생포자, 난포자 발아 적온 : 20∼25℃
- 주로 습기가 많고, 그늘지고, 통풍이 불량한 과원에서 많이 발병

## 포도탄저병
- 5월 월동 병반에서 분생포자 형성, 강우로 미숙과에서 분산, 전파 - 생육온도 7∼37℃, 발병적온 28℃
- 고온 다습(성숙기) 포도에 치명적인 피해

## 포도흰가루
- 고온건조(개화기부터 10월 상순)
- 최저 15℃, 최적 25∼30℃, 70%이상
- 특히 13℃ 이하의 저온, 32℃ 이상의 고온 병의 진전 둔화
- 5월 중순부터 발병하여 6∼7월에 많이 발병
- 특히 이른봄부터 여름사이에 기온이 높은 해에 심하게 발병

## 포도일소피해
- 과립온도 35℃이상이면 호흡이상으로 과실내 알데히드 축적, 세포가 죽음으로써 발생

## 포도 축과병
- 직사광선이 강하게 닿지 않는 과방의 속이나 아랫부분에 발생
- 장마기 또는 직후 온도 및 토양수분의 변화가 급격히 일어나 수분흡수와 증산의 균형이 깨져 일어남
- 비가 내린후 맑은 날씨가 되었을 때 심하게 발생


In [85]:
train_csv[0]

'C:/Users/jeong/Projects/dacon/병해진단/data/train\\10027\\10027.csv'

In [68]:
csv_data[0].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 588 entries, 0 to 587
Data columns (total 10 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   측정시각        588 non-null    object 
 1   내부 온도 1 평균  588 non-null    float64
 2   내부 온도 1 최고  588 non-null    float64
 3   내부 온도 1 최저  588 non-null    float64
 4   내부 습도 1 평균  588 non-null    float64
 5   내부 습도 1 최고  588 non-null    float64
 6   내부 습도 1 최저  588 non-null    float64
 7   내부 이슬점 평균   588 non-null    float64
 8   내부 이슬점 최고   588 non-null    float64
 9   내부 이슬점 최저   588 non-null    float64
dtypes: float64(9), object(1)
memory usage: 46.1+ KB


In [None]:
crop_dict = {1:'딸기', 
             2:'토마토',
             3:'파프리카',
             4:'오이', 
             5:'고추',
             6: '시설포도'}

area_dict = {1:'열매',
             2:'꽃',
             3:'잎',
             4:'가지',
             5:'줄기',
             6:'뿌리',
             7:'해충'}

task_dict = {0:'정상',
             1:'병해',
             2:'생리장애',
             3:'보호제처리반응'}

disease_dict = {'00' : '정상',
                
                # 병해 a
                'a1' : '딸기잿빛곰팡이병',
                'a2' : '딸기흰가루병',
                'a3' : '오이노균병',
                'a4' : '오이흰가루병',
                'a5' : '토마토흰가루병',
                'a6' : '토마토잿빛곰팡이병',
                'a7' : '고추탄저병',
                'a8' : '고추흰가루병',
                'a9' : '파프리카흰가루병',
                'a10' : '파프리카잘록병',
                'a11' : '시설포도탄저병',
                'a12' : '시설포도노균병',
                
                # 생리장애 b
                'b1' : '냉해피해',
                'b2' : '열과',
                'b3' : '칼슘결핍',
                'b4' : '일소피해',
                'b5' : '축과병',
                'b6' : '다량원소결핍 (N)',
                'b7' : '다량원소결핍 (P)',
                'b8' : '다량원소결핍 (K)',
                
                # 보호제 처리반응 c
                'c1' : '딸기잿빛곰팡이병반응',
                'c2' : '딸기흰가루병반응',
                'c3' : '오이노균병반응',
                'c4' : '오이흰가루병반응',
                'c5' : '토마토흰가루병반응',
                'c6' : '토마토잿빛곰팡이병반응',
                'c7' : '고추탄저병반응',
                'c8' : '고추흰가루병반응',
                'c9' : '파프리카흰가루병반응',
                'c10' : '파프리카잘록병반응',
                'c11' : '시설포도탄저병반응',
                'c12' : '시설포도노균병반응'
               }

            # 시설 과채류
grow_dict = {11:'유묘기',
             12:'생장기',
             13:'착화/과실기',
             
             # 시설 과수류 (포도)
             21:'발아기',
             22:'개화기',
             23:'신초생장기',
             24:'과실성숙기',
             25:'수확기',
             26:'휴면기'}

risk_dict = {0:'정상',
             1:'초기',
             2:'중기',
             3:'말기'}