In [1]:
import pandas as pd
pd.set_option('mode.chained_assignment', None)
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('dark')

import warnings
warnings.filterwarnings('ignore')

import torch
from torch import nn
from torchvision import models
from torch.utils.data import Dataset
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from tqdm import tqdm

import platform
import sys
import os
import sklearn 
import cv2
import json
from glob import glob

print(f"- os: {platform.platform()}")
print(f"- python: {sys.version}")
print(f"- pandas: {pd.__version__}")
print(f"- numpy: {np.__version__}")
print(f"- sklearn: {sklearn.__version__}")
print(f"- pytorch: {torch.__version__}")
print(f"- opencv: {cv2.__version__}")

- os: Windows-10-10.0.19041-SP0
- python: 3.7.11 (default, Jul 27 2021, 09:42:29) [MSC v.1916 64 bit (AMD64)]
- pandas: 1.3.0
- numpy: 1.19.1
- sklearn: 1.0.2
- pytorch: 1.7.0
- opencv: 4.5.5


In [2]:
import torch
print(f"Setup complete. Using torch {torch.__version__} ({torch.cuda.get_device_properties(0).name if torch.cuda.is_available() else 'CPU'})")

Setup complete. Using torch 1.7.0 (NVIDIA GeForce GTX 1060 6GB)


In [3]:
from shutil import copyfile
from sklearn.model_selection import StratifiedKFold

#customize iPython writefile so we can write variables
from IPython.core.magic import register_line_cell_magic

@register_line_cell_magic
def writetemplate(line, cell):
    with open(line, 'w') as f:
        f.write(cell.format(**globals()))

In [4]:
# !pip install wandb
# !wandb login

In [104]:
TRAIN_PATH = 'C:/git/jonie_github/VISION/_data/train/'
BATCH_SIZE = 128
EPOCHS = 150
USE_FOLD = True
SEED = 42
NUM_FOLD = 5

FLIP_IMG = True
BLUR_IMG = True
USE_AIHUB_DATA = False

# image resize
IMG_SIZE = 128

In [132]:
def print_prams():
    print(f'BATCH_SIZE: {BATCH_SIZE}')
    print(f'EPOCHS: {EPOCHS}')
    print(f'SEED: {SEED}')
    print(f'NUM_FOLD: {NUM_FOLD}')
    print(f'DATASET_NAME: {DATASET_NAME}')
    print(f'IMG_SIZE: {IMG_SIZE}')
    print(f'FLIP_IMG: {FLIP_IMG}')
    print(f'BLUR_IMG: {BLUR_IMG}')
    print(f'USE_AIHUB_DATA: {USE_AIHUB_DATA}')

In [67]:
train_csv = sorted(glob(TRAIN_PATH +'*/*.csv'))
train_jpg = sorted(glob(TRAIN_PATH +'*/?????.jpg'))
train_json = sorted(glob(TRAIN_PATH +'*/*.json'))

# test_csv = sorted(glob(TRAIN_PATH +'*/*.csv'))
# test_jpg = sorted(glob(TRAIN_PATH +'*/*.jpg'))
# test_json = sorted(glob(TRAIN_PATH +'*/*.json'))

In [6]:
def img_load(path):
    img = cv2.imread(path)
    #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    return img

imgs = [img_load(k) for k in tqdm(train_jpg)]

100%|█████████████████████████████████████████████████████████████████████████████| 5767/5767 [00:51<00:00, 112.44it/s]


In [83]:
crops = []
areas = []
diseases = []
risks = []
labels = []
img_names = []
parts=[]
objects = []
img_w = []
img_h = []
grows = []

for i in range(len(train_json)):
    with open(train_json[i], 'r') as f:
        sample = json.load(f)
        
        crop = sample['annotations']['crop']
        area = sample['annotations']['area']
        disease = sample['annotations']['disease']
        grow = sample['annotations']['grow']
        risk = sample['annotations']['risk']
        label=f"{crop}_{disease}_{risk}"        
        img_name = sample['description']['image'].replace('.jpg','')
        width = sample['description']['width']
        height = sample['description']['height']

        crops.append(crop)
        areas.append(area)
        diseases.append(disease)
        risks.append(risk)
        labels.append(label)
        img_names.append(img_name)
        img_w.append(width)
        img_h.append(height)
        grows.append(grow)
        r = sample['annotations']['bbox'][0]
        temp = [int(r['x']), int(r['y']), int(r['w']), int(r['h'])]
        objects.append(temp)    
        
        p = []
        
        for part in sample['annotations']['part']:
            p.append({
                'bbox': [
                    int(part['x']), int(part['y']), int(part['w']), int(part['h'])
                ],
                'id': part['id']
            })
        parts.append(p)

In [84]:
df = pd.DataFrame({'image':img_names, 'img_w':img_w, 'img_h':img_h, 'crops':crops,'areas':areas,'diseases':diseases,'risks':risks, 'grows':grows, 'bbox':objects, 'labels':labels, 'dir':train_jpg})
df['crop_area'] = df.crops.astype(str) + '_' + df.areas.astype(str)
df['disease_risk'] = df.diseases.astype(str) + '_' + df.risks.astype(str)

df['x'] = df.bbox.apply(lambda x: x[0])
df['y'] = df.bbox.apply(lambda x: x[1])
df['w'] = df.bbox.apply(lambda x: x[2])
df['h'] = df.bbox.apply(lambda x: x[3])
df['xc'] = df.x + df.w.apply(lambda x:x//2)
df['yc'] = df.y + df.h.apply(lambda x:x//2)
df.dir = df.dir.apply(lambda x: x.replace('\\','/'))

df = df[['image','crops','areas','diseases','risks','grows','xc','yc','w','h', 'img_w', 'img_h', 'dir']]
df

Unnamed: 0,image,crops,areas,diseases,risks,grows,xc,yc,w,h,img_w,img_h,dir
0,10027,3,3,b7,1,13,171,280,165,237,384,512,C:/git/jonie_github/VISION/_data/train/10027/1...
1,10037,3,5,00,0,11,190,159,48,315,384,512,C:/git/jonie_github/VISION/_data/train/10037/1...
2,10043,3,1,00,0,13,191,251,272,271,384,512,C:/git/jonie_github/VISION/_data/train/10043/1...
3,10045,3,3,00,0,11,214,319,172,263,384,512,C:/git/jonie_github/VISION/_data/train/10045/1...
4,10063,3,1,00,0,13,184,287,285,283,384,512,C:/git/jonie_github/VISION/_data/train/10063/1...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5762,67640,3,5,00,0,11,131,185,160,370,384,512,C:/git/jonie_github/VISION/_data/train/67640/6...
5763,67644,1,3,00,0,11,401,258,560,488,682,512,C:/git/jonie_github/VISION/_data/train/67644/6...
5764,67647,2,3,a5,2,12,374,303,322,174,682,512,C:/git/jonie_github/VISION/_data/train/67647/6...
5765,67649,1,3,00,0,12,261,154,220,239,384,512,C:/git/jonie_github/VISION/_data/train/67649/6...


## 학습 이미지 생성

In [81]:
# Gaussian이미지 생성
def add_gaussian_img(df:pd.DataFrame, jpg_dir):
    
    for img_dir in tqdm(jpg_dir):
        img = cv2.imread(img_dir)
        temp_dir = img_dir.replace('.jpg', f'_blur.jpg')
        gaussian_img = cv2.GaussianBlur(img, (0, 0), 1)
        cv2.imwrite(temp_dir, gaussian_img)
        
    df_blur = df.copy()
    df_blur.dir = df.dir.apply(lambda x: x.replace('.jpg', '_blur.jpg'))
    df_blur.image = df.image.apply(lambda x: x+'_blur')
    df = pd.concat([df, df_blur])
    df.reset_index(inplace=True, drop=True)
    return df

In [82]:
# filp 이미지 생성
def add_filp_img(df:pd.DataFrame, jpg_dir):
    for img_dir in tqdm(jpg_dir):
        img = cv2.imread(img_dir)
        for flip in range(2):
            flip_dir = img_dir.replace('.jpg', f'_flip{flip}.jpg')
            cv2.imwrite(flip_dir, cv2.flip(img, flip))
    
    # 좌표 변경
    df_flip0 = df.copy()
    df_flip0.yc = df.img_h - df.yc
    df_flip0.image = df_flip0.image + '_flip0'
    df_flip0.dir = df_flip0.dir.apply(lambda x: x.replace('.jpg', '_flip0.jpg'))

    df_flip1 = df.copy()
    df_flip1.xc = df.img_w - df.xc
    df_flip1.image = df_flip1.image + '_flip1'
    df_flip1.dir = df_flip1.dir.apply(lambda x: x.replace('.jpg', '_flip1.jpg'))
    
    df = pd.concat([df, df_flip0, df_flip1])
    df.reset_index(inplace=True, drop=True)

    return df

In [87]:
if BLUR_IMG:
    df = add_gaussian_img(df, df.dir)
if FLIP_IMG:
    df = add_filp_img(df, df.dir)

100%|██████████████████████████████████████████████████████████████████████████████| 5767/5767 [01:01<00:00, 93.20it/s]
100%|████████████████████████████████████████████████████████████████████████████| 11534/11534 [03:36<00:00, 53.21it/s]


In [89]:
df = df.sort_values(by=['image'])
df

Unnamed: 0,image,crops,areas,diseases,risks,grows,xc,yc,w,h,img_w,img_h,dir
0,10027,3,3,b7,1,13,171,280,165,237,384,512,C:/git/jonie_github/VISION/_data/train/10027/1...
5767,10027_blur,3,3,b7,1,13,171,280,165,237,384,512,C:/git/jonie_github/VISION/_data/train/10027/1...
17301,10027_blur_flip0,3,3,b7,1,13,171,232,165,237,384,512,C:/git/jonie_github/VISION/_data/train/10027/1...
28835,10027_blur_flip1,3,3,b7,1,13,213,280,165,237,384,512,C:/git/jonie_github/VISION/_data/train/10027/1...
11534,10027_flip0,3,3,b7,1,13,171,232,165,237,384,512,C:/git/jonie_github/VISION/_data/train/10027/1...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
11533,67678_blur,5,3,b6,1,13,205,253,184,424,384,512,C:/git/jonie_github/VISION/_data/train/67678/6...
23067,67678_blur_flip0,5,3,b6,1,13,205,259,184,424,384,512,C:/git/jonie_github/VISION/_data/train/67678/6...
34601,67678_blur_flip1,5,3,b6,1,13,179,253,184,424,384,512,C:/git/jonie_github/VISION/_data/train/67678/6...
17300,67678_flip0,5,3,b6,1,13,205,259,184,424,384,512,C:/git/jonie_github/VISION/_data/train/67678/6...


In [57]:
# 라벨 확인해 보려고 ..
# for i, dir_ in enumerate(tqdm(df.dir)):
#     temp = cv2.imread(dir_)
#     label_df = df.diseases + '_' + df.risks.apply(lambda x: str(x))
#     label = label_df[i]
    
#     cv2.putText(temp, label, (40,50), cv2.FONT_HERSHEY_SIMPLEX, 1.5,(255,255,0),3)
#     temp_dir = f'./temp/{df.image[i]}.jpg'
#     cv2.imwrite(temp_dir, temp)

100%|██████████████████████████████████████████████████████████████████████████████| 5767/5767 [01:32<00:00, 62.13it/s]


 ## 5fold-split

In [91]:
# Create train and validation split.
# 병해 비율 유지하여 나눔
Fold = StratifiedKFold(n_splits=NUM_FOLD, shuffle=True, random_state=SEED)
ratio = df.diseases.apply(lambda x: str(x)) + '_' + df.risks.apply(lambda x: str(x))

for n, (train_index, val_index) in enumerate(Fold.split(df, ratio)):
    df.loc[val_index, 'fold'] = int(n)
df['fold'] = df['fold'].astype(int)
df.to_csv('train_fold.csv', index=False)
df

Unnamed: 0,image,crops,areas,diseases,risks,grows,xc,yc,w,h,img_w,img_h,dir,fold
0,10027,3,3,b7,1,13,171,280,165,237,384,512,C:/git/jonie_github/VISION/_data/train/10027/1...,3
5767,10027_blur,3,3,b7,1,13,171,280,165,237,384,512,C:/git/jonie_github/VISION/_data/train/10027/1...,3
17301,10027_blur_flip0,3,3,b7,1,13,171,232,165,237,384,512,C:/git/jonie_github/VISION/_data/train/10027/1...,4
28835,10027_blur_flip1,3,3,b7,1,13,213,280,165,237,384,512,C:/git/jonie_github/VISION/_data/train/10027/1...,1
11534,10027_flip0,3,3,b7,1,13,171,232,165,237,384,512,C:/git/jonie_github/VISION/_data/train/10027/1...,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11533,67678_blur,5,3,b6,1,13,205,253,184,424,384,512,C:/git/jonie_github/VISION/_data/train/67678/6...,4
23067,67678_blur_flip0,5,3,b6,1,13,205,259,184,424,384,512,C:/git/jonie_github/VISION/_data/train/67678/6...,3
34601,67678_blur_flip1,5,3,b6,1,13,179,253,184,424,384,512,C:/git/jonie_github/VISION/_data/train/67678/6...,3
17300,67678_flip0,5,3,b6,1,13,205,259,184,424,384,512,C:/git/jonie_github/VISION/_data/train/67678/6...,0


In [98]:
df.loc[df.fold==0, ['diseases', 'risks','crops']].value_counts()

diseases  risks  crops
00        0      3        1442
                 4        1095
                 6         998
                 1         929
a5        2      2         224
b6        1      5         198
b7        1      5         197
b3        1      3         197
b8        1      3         189
                 5         188
a9        1      3         186
b6        1      3         181
00        0      2         164
b7        1      3         161
a9        2      3         144
a7        2      5         125
00        0      5          81
a11       1      6          51
a9        3      3          45
a12       2      6          32
b5        1      6          28
b4        1      6          26
a11       2      6          17
b4        3      6          13
a12       1      6          10
dtype: int64

In [97]:
df.loc[df.fold==1, ['diseases', 'risks','crops']].value_counts()

diseases  risks  crops
00        0      3        1415
                 4        1129
                 6         990
                 1         986
a5        2      2         229
b7        1      5         205
b8        1      5         202
b7        1      3         197
00        0      2         188
b3        1      3         188
b6        1      5         183
a9        1      3         178
b8        1      3         158
b6        1      3         153
a9        2      3         122
a7        2      5         111
00        0      5          84
a9        3      3          52
a11       1      6          43
a12       2      6          33
b4        1      6          22
b5        1      6          16
b4        3      6          15
a12       1      6          13
a11       2      6           9
dtype: int64

## 라벨 생성

In [99]:
# 병해가 있으면 병해+피해정도+촬영위치+생육단계로 라벨링
disease_area = df.diseases.apply(lambda x: str(x)) + '_' + df.risks.apply(lambda x: str(x)) + '_'+ df.areas.apply(lambda x: str(x)) + '_' + df.grows.apply(lambda x: str(x))
disease_area_df = pd.DataFrame({'image': df.image, 'xc':df.xc, 'yc': df.yc, 'w':df.w, 'h':df.h, 'label':disease_area,'fold':df.fold,'path':df.dir,'img_w':df.img_w, 'img_h':df.img_h})

In [101]:
# disease =00 일때는 작물+촬영위치+생육단계로 라벨링
disease_area_df.loc[df.diseases=='00','label'] = df[df.diseases=='00'].crops.apply(lambda x: str(x)) + '_'+ df[df.diseases=='00'].areas.apply(lambda x: str(x)) + '_'+ df[df.diseases=='00'].grows.apply(lambda x: str(x))
disease_area_df

Unnamed: 0,image,xc,yc,w,h,label,fold,path,img_w,img_h
0,10027,171,280,165,237,b7_1_3_13,3,C:/git/jonie_github/VISION/_data/train/10027/1...,384,512
5767,10027_blur,171,280,165,237,b7_1_3_13,3,C:/git/jonie_github/VISION/_data/train/10027/1...,384,512
17301,10027_blur_flip0,171,232,165,237,b7_1_3_13,4,C:/git/jonie_github/VISION/_data/train/10027/1...,384,512
28835,10027_blur_flip1,213,280,165,237,b7_1_3_13,1,C:/git/jonie_github/VISION/_data/train/10027/1...,384,512
11534,10027_flip0,171,232,165,237,b7_1_3_13,3,C:/git/jonie_github/VISION/_data/train/10027/1...,384,512
...,...,...,...,...,...,...,...,...,...,...
11533,67678_blur,205,253,184,424,b6_1_3_13,4,C:/git/jonie_github/VISION/_data/train/67678/6...,384,512
23067,67678_blur_flip0,205,259,184,424,b6_1_3_13,3,C:/git/jonie_github/VISION/_data/train/67678/6...,384,512
34601,67678_blur_flip1,179,253,184,424,b6_1_3_13,3,C:/git/jonie_github/VISION/_data/train/67678/6...,384,512
17300,67678_flip0,205,259,184,424,b6_1_3_13,0,C:/git/jonie_github/VISION/_data/train/67678/6...,384,512


In [103]:
#라벨이 궁금해서..
disease_area_df['kor_label'] = df.diseases.apply(lambda x: disease_dict[x]) + '_' + df.risks.apply(lambda x: risk_dict[x]) + '_'+ df.areas.apply(lambda x: area_dict[x]) + '_' + df.grows.apply(lambda x: grow_dict[x])
disease_area_df.loc[df.diseases=='00','kor_label'] = df[df.diseases=='00'].crops.apply(lambda x: crop_dict[x]) + '_'+ df[df.diseases=='00'].areas.apply(lambda x: area_dict[x]) + '_'+ df[df.diseases=='00'].grows.apply(lambda x: grow_dict[x])
print(disease_area_df.kor_label.unique())
print(disease_area_df.label.unique())
print(len(disease_area_df.kor_label.unique()))

['다량원소결핍(P)_초기_잎_착화/과실기' '파프리카_줄기_유묘기' '파프리카_열매_착화/과실기' '파프리카_잎_유묘기'
 '오이_잎_착화/과실기' '파프리카흰가루병_초기_잎_착화/과실기' '딸기_잎_생장기' '오이_꽃_착화/과실기'
 '칼슘결핍_초기_잎_착화/과실기' '다량원소결핍(K)_초기_잎_착화/과실기' '파프리카_잎_생장기'
 '다량원소결핍(N)_초기_잎_착화/과실기' '딸기_잎_유묘기' '시설포도_열매_과실성숙기' '시설포도_잎_과실성숙기'
 '토마토흰가루병_중기_잎_생장기' '파프리카흰가루병_중기_잎_착화/과실기' '토마토_잎_착화/과실기'
 '고추탄저병_중기_열매_착화/과실기' '오이_잎_생장기' '다량원소결핍(P)_초기_잎_유묘기' '오이_줄기_생장기'
 '오이_꽃_생장기' '파프리카흰가루병_말기_잎_착화/과실기' '일소피해_초기_잎_과실성숙기' '고추_잎_생장기'
 '일소피해_말기_잎_과실성숙기' '토마토_열매_착화/과실기' '시설포도탄저병_초기_열매_과실성숙기' '시설포도_줄기_과실성숙기'
 '시설포도노균병_초기_잎_과실성숙기' '시설포도노균병_중기_잎_과실성숙기' '파프리카_꽃_생장기' '축과병_초기_열매_과실성숙기'
 '시설포도탄저병_중기_열매_과실성숙기' '고추_열매_생장기' '시설포도_가지_과실성숙기']
['b7_1_3_13' '3_5_11' '3_1_13' '3_3_11' '4_3_13' 'a9_1_3_13' '1_3_12'
 '4_2_13' 'b3_1_3_13' 'b8_1_3_13' '3_3_12' 'b6_1_3_13' '1_3_11' '6_1_24'
 '6_3_24' 'a5_2_3_12' 'a9_2_3_13' '2_3_13' 'a7_2_1_13' '4_3_12'
 'b7_1_3_11' '4_5_12' '4_2_12' 'a9_3_3_13' 'b4_1_3_24' '5_3_12'
 'b4_3_3_24' '2_1_13' 'a11_1_1_24' '6_5_24' 'a12_1_3_24' 'a12_2_3_24'
 '3_2_12' 'b5_1_

In [119]:
disease_area_df.path[5767]

'C:/git/jonie_github/VISION/_data/train/10027/10027_blur.jpg'

## 5 fold images copy

In [112]:
# 디렉터리에 fold로 지정된 이미지 복사
def nfold_img_copy(df):
    # Remove existing dirs
    for fold in range(NUM_FOLD):
        print(f'{"="*30} CREATE {NUM_FOLD}-FOLD DATASETS (IMAGE) FOLD_{fold} {"="*30}')
        
        # Prepare train and valid df
        train_df = df.loc[df.fold != fold].reset_index(drop=True)
        valid_df = df.loc[df.fold == fold].reset_index(drop=True)

        try:
            shutil.rmtree(f'{DATASET_NAME}/dataset_folds_{fold}/images')
            shutil.rmtree(f'{DATASET_NAME}/dataset_folds_{fold}/labels')
        except:
            print('No dirs')

            # Make new dirs
        os.makedirs(f'{DATASET_NAME}/dataset_folds_{fold}/images/train', exist_ok=True)
        os.makedirs(f'{DATASET_NAME}/dataset_folds_{fold}/images/valid', exist_ok=True)
        os.makedirs(f'{DATASET_NAME}/dataset_folds_{fold}/labels/train', exist_ok=True)
        os.makedirs(f'{DATASET_NAME}/dataset_folds_{fold}/labels/valid', exist_ok=True)

        # Move the images to relevant split folder.
        for i in tqdm(range(len(train_df))):
            row = train_df.loc[i]
            copyfile(row.path, f'{DATASET_NAME}/dataset_folds_{fold}/images/train/{row.image}.jpg')
        for i in tqdm(range(len(valid_df))):
            row = valid_df.loc[i]
            copyfile(row.path, f'{DATASET_NAME}/dataset_folds_{fold}/images/valid/{row.image}.jpg')

In [125]:
def get_yolo_format_bbox(row):
    # yolo format bbox
    xc = row.xc/row.img_w
    yc = row.yc/row.img_h
    w = row.w/row.img_w
    h = row.h/row.img_h
    if (xc*yc*w*h<0) | ((xc>1) | (yc>1) | (w>1) | (h>1)) :
        raise Exception('Please check the bbox coordinates.')
    
    return [str(xc), str(yc), str(w), str(h)]

In [126]:
def write_bbox_files(tmp_df, fold_num, split):
    path = f'{DATASET_NAME}/dataset_folds_{fold}/labels/{split}'
    
    for i in tqdm(range(len(tmp_df))):
        row = tmp_df.loc[i]

        # Get image name
        img_name = row.image
        
        # Get label
        label = le.transform([row.label])[0]
        
        bbox_str = ' '.join(get_yolo_format_bbox(row))
        
        file_name = f'{path}/{img_name}.txt'

        with open(file_name, 'w') as f:
            f.write(str(label) + ' ' + bbox_str + '\n')       

In [127]:
def generate_nfold_annotation(df):
    # Prepare the txt files for bounding box
    for fold in range(NUM_FOLD):
        print(f'{"="*30} CREATE {NUM_FOLD}-FOLD DATASETS (ANNOTATION) FOLD_{fold} {"="*30}')

        # Prepare train and valid df
        train_df = df.loc[df.fold != fold].reset_index(drop=True)
        valid_df = df.loc[df.fold == fold].reset_index(drop=True)

        # prepare label for train
        write_bbox_files(train_df, fold, 'train')
        # prepare label for valid
        write_bbox_files(valid_df, fold, 'valid')

In [128]:
if not USE_FOLD:
    pass
else:
    DATASET_NAME = 'DISEASE_AREA'
    le = LabelEncoder()
    le.fit(disease_area_df.label)
    nfold_img_copy(disease_area_df)
    generate_nfold_annotation(disease_area_df)



100%|██████████████████████████████████████████████████████████████████████████| 27681/27681 [00:25<00:00, 1096.50it/s]
100%|████████████████████████████████████████████████████████████████████████████| 6921/6921 [00:06<00:00, 1153.13it/s]




100%|██████████████████████████████████████████████████████████████████████████| 27681/27681 [00:22<00:00, 1239.37it/s]
100%|████████████████████████████████████████████████████████████████████████████| 6921/6921 [00:06<00:00, 1096.72it/s]




100%|██████████████████████████████████████████████████████████████████████████| 27682/27682 [00:21<00:00, 1262.50it/s]
100%|█████████████████████████████████████████████████████████████████████████████| 6920/6920 [00:06<00:00, 996.73it/s]




100%|██████████████████████████████████████████████████████████████████████████| 27682/27682 [00:20<00:00, 1374.63it/s]
100%|████████████████████████████████████████████████████████████████████████████| 6920/6920 [00:06<00:00, 1149.36it/s]




100%|██████████████████████████████████████████████████████████████████████████| 27682/27682 [00:20<00:00, 1331.38it/s]
100%|████████████████████████████████████████████████████████████████████████████| 6920/6920 [00:06<00:00, 1119.14it/s]


In [129]:
# Create .yaml file 
import yaml
label_unique = le.classes_

for fold in range(NUM_FOLD):
    data_yaml = dict(
            path = f'../DISEASE_AREA/dataset_folds_{fold}/images',
            train = 'train',
            val = 'valid',
            nc = len(label_unique),
            names = [str(x) for x in label_unique]
        )

    with open(f'yolov5/data/DISEASE_AREA_data_fold_{fold}.yaml', 'w') as outfile:
        yaml.dump(data_yaml, outfile, default_flow_style=True)

# 🚄🚄🚄

In [130]:
%cd  yolov5

C:\git\jonie_github\VISION\yolov5


In [133]:
print_prams()

BATCH_SIZE: 128
EPOCHS: 150
SEED: 42
NUM_FOLD: 5
DATASET_NAME: DISEASE_AREA
IMG_SIZE: 128
FLIP_IMG: True
BLUR_IMG: True
USE_AIHUB_DATA: False


In [None]:
yolo_weight = 'yolov5l'
for fold in range(NUM_FOLD):    
    print('FOLD NUMBER: ', fold)
    !python train.py --img {IMG_SIZE} \
                      --batch {BATCH_SIZE} \
                      --epochs {EPOCHS} \
                      --data {DATASET_NAME}_data_fold_{fold}.yaml \
                      --weights {yolo_weight}.pt \
                      --project {DATASET_NAME}-GROW \
                      --name {yolo_weight}-b-{BATCH_SIZE}-e-{EPOCHS}-img-{IMG_SIZE}-fold-{fold}                 
    print('###########################################################################################\n')

FOLD NUMBER:  0


In [None]:
print(crop_le.classes_)
print(disease_le.classes_)
print(len(crop_le.classes_))
print(len(disease_le.classes_))

['1_3' '2_1' '2_3' '3_1' '3_2' '3_3' '3_5' '4_2' '4_3' '4_5' '5_1' '5_3'
 '6_1' '6_3' '6_4' '6_5']
['00_0' 'a11_1' 'a11_2' 'a12_1' 'a12_2' 'a5_2' 'a7_2' 'a9_1' 'a9_2' 'a9_3'
 'b3_1' 'b4_1' 'b4_3' 'b5_1' 'b6_1' 'b7_1' 'b8_1']
16
17


## 환경정보 [LINK](https://www.nongsaro.go.kr/portal/ps/pss/pssa/photoSearchLst.ps?menuId=PS00202&sKidofcomdtyTabCode=VC&sKidofcomdtyCode=&hlsctCode=&sicknsCode=&nnmyInsectCode=)
1. 노균병
  - 노균병은 20∼25℃의 다습한 상태나 밀식으로 통풍과 채광이 불량 할 때 많이 발생
  - **생육 후기**에 저온, 다습하면 아랫잎부터 발생하여 큰 피해를 주는데, **최소 6시간 동안 100%의 상대습도가 유지**되어야만 병원균의 포자낭이 형성되고, 발병 온도 범위는 5～30℃이며, 발병 적온은 **15～20℃**이다.
2. 탄저병
  - 온도 25～30℃, 상대습도 70% 이상에서 잘 이루어지며, 분생포자의 전반은 고온기에 관수시의 물방울이나 빗방울 혹은 바람에 의해 이루어진다.
  - (고추)노지포장에서는 여름철 장마기에 분생포자가 주로 비, 바람에 의해 전반된다. 노지재배의 풋고추에서는 7월 초순부터 병이 발생하기 시작하여 수확기까지 계속 발생한다.
3. 잿빛곰팡이병
  - 시설재배시 기온이 20℃내외이고 습도가 높을 때 많이 발생하며, 노지재배시는 여름철 장마기때 주로 발생한다.
  - 20℃ 전,후의 저온과 다습 조건이 가장 중요한 다 발생 원인
  - 이른 봄과 가을의 시설재배지에서 심하게 발생하며, 특히 과습한 재배환경에서 피해가 크다.
4. 흰가루병
  - 주로 봄과 가을의 시설재배에서 많이 발생하며, 여름에는 발생하지 않는다. (본 병은 일반적으로 15～28℃에서 많이 발생되며, 32℃이상의 고온에서는 병 발생이 억제된다.)
  - 시설재배지와 가을 날씨가 건조할 때 심하게 발생한다.
5. 잘록병
  - 기온이 다소 서늘하고(20℃이하) 토양이 **다습한 조건**에서 발생이 심하지만 외부 병징은 고온 건조 시에 잘 나타난다.
  - 병든 식물체내에서 난포자 상태로 겨울을 보낸 병원균은 토양 온도가 10℃이상이 되면 다시 발아하여 활동을 시작한다.
  - 유묘기부터 생육 초기까지 발생


In [38]:
crop_dict = {1:'딸기', 
             2:'토마토',
             3:'파프리카',
             4:'오이', 
             5:'고추',
             6: '시설포도'}

area_dict = {1:'열매',
             2:'꽃',
             3:'잎',
             4:'가지',
             5:'줄기',
             6:'뿌리',
             7:'해충'}

task_dict = {0:'정상',
             1:'병해',
             2:'생리장애',
             3:'보호제처리반응'}

disease_dict = {'00' : '정상',
                
                # 병해 a
                'a1' : '딸기잿빛곰팡이병',
                'a2' : '딸기흰가루병',
                'a3' : '오이노균병',
                'a4' : '오이흰가루병',
                'a5' : '토마토흰가루병',
                'a6' : '토마토잿빛곰팡이병',
                'a7' : '고추탄저병',
                'a8' : '고추흰가루병',
                'a9' : '파프리카흰가루병',
                'a10' : '파프리카잘록병',
                'a11' : '시설포도탄저병',
                'a12' : '시설포도노균병',
                
                # 생리장애 b
                'b1' : '냉해피해',
                'b2' : '열과',
                'b3' : '칼슘결핍',
                'b4' : '일소피해',
                'b5' : '축과병',
                'b6' : '다량원소결핍(N)',
                'b7' : '다량원소결핍(P)',
                'b8' : '다량원소결핍(K)',
                
                # 보호제 처리반응 c
                'c1' : '딸기잿빛곰팡이병반응',
                'c2' : '딸기흰가루병반응',
                'c3' : '오이노균병반응',
                'c4' : '오이흰가루병반응',
                'c5' : '토마토흰가루병반응',
                'c6' : '토마토잿빛곰팡이병반응',
                'c7' : '고추탄저병반응',
                'c8' : '고추흰가루병반응',
                'c9' : '파프리카흰가루병반응',
                'c10' : '파프리카잘록병반응',
                'c11' : '시설포도탄저병반응',
                'c12' : '시설포도노균병반응'
               }

            # 시설 과채류
grow_dict = {11:'유묘기',
             12:'생장기',
             13:'착화/과실기',
             
             # 시설 과수류 (포도)
             21:'발아기',
             22:'개화기',
             23:'신초생장기',
             24:'과실성숙기',
             25:'수확기',
             26:'휴면기'}

risk_dict = {0:'정상',
             1:'초기',
             2:'중기',
             3:'말기'}