In [1]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
from tqdm import tqdm 
import os 
from glob import glob 
import pickle
import cv2

# 초기 세팅 

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
image_size= 256 
image_channels =3

# 데이터 로드 

In [None]:
!unzip '/content/drive/MyDrive/HY_데이터/생육환경최적화.zip'
!unzip '/content/drive/MyDrive/데이터공유폴더/생육환경최적화.zip'

In [5]:
folder_dir = glob('/content/train/*')
folder_dir.sort()

In [6]:
image_dir_lst = [] 
csv_dir_lst = []
label_dir_lst = [] 

for folder in folder_dir:
  image_dir = glob(os.path.join(folder,'image','*.png'))
  image_dir.extend(glob(os.path.join(folder,'image','*.jpg')))
  image_dir_lst.extend(image_dir)

  csv_dir = glob(os.path.join(folder,'meta','*.csv'))
  csv_dir_lst.extend(csv_dir)

  label_df = pd.read_csv(folder + '/label.csv')
  label_dir_lst.extend(np.array(label_df))
 
image_dir_lst.sort()
csv_dir_lst.sort()
label_dir_lst = np.array(pd.DataFrame(label_dir_lst).sort_values(by=0))

In [7]:
df = pd.DataFrame([image_dir_lst,csv_dir_lst,label_dir_lst[:,1]]).T
df.columns = ['IMG_DIR','META_DIR','LEAF_WEIGHT']
df['LEAF_WEIGHT'] = df['LEAF_WEIGHT'].map(float)

image_size = 2464,3280

# 데이터 제너레이터

## **이미지 데이터 

In [8]:
from albumentations.augmentations.transforms import VerticalFlip,Transpose,GaussNoise,Normalize,RandomGamma,SmallestMaxSize,GridDistortion,Rotate,HorizontalFlip
from albumentations.augmentations.transforms import RandomBrightnessContrast,Transpose,CLAHE,LongestMaxSize
def image_augmentation(normal_image):
  import albumentations as A
  transform = A.Compose([
                         VerticalFlip(),
                         Rotate(),
                         RandomBrightnessContrast()
  ])
  augmentated_image = transform(image= normal_image)['image']
  return augmentated_image
  
def img_preprocess(img,augmentation):
  if augmentation:
    img = image_augmentation(img)
  img = cv2.resize(img,dsize=(image_size,image_size)) #resize Albu mentations -> vision 관련 augmentation document 
  img = img/255.
  return img 

def make_batch_img(batch_df_img_dir,augmentation):
  batch_imgs = np.zeros(len(batch_df_img_dir)*image_size*image_size*image_channels).reshape(-1,image_size,image_size,image_channels)
  for n,img_dir in enumerate(batch_df_img_dir):
    img = cv2.imread(img_dir)
    img = img_preprocess(img,augmentation)
    batch_imgs[n,:,:,:] = img
  return batch_imgs

## 라벨 

In [9]:
def label_scailng(temp_batch_label):
  global min,max
  return_value = (temp_batch_label-min)/(max-min)
  return return_value 
  
def init_minmax(df):
  min = np.min(df['LEAF_WEIGHT'],axis=0)
  max = np.max(df['LEAF_WEIGHT'],axis=0)
  return min,max

## 제너레이터 

In [10]:
from tensorflow.keras.preprocessing import image
from tensorflow.keras.utils import Sequence
import math

class DataGenerator(Sequence):
  def __init__(self,df,
                 batch_size: int,
               shuffle : bool = False,
               augmentation : bool = False,
               Train : bool = True):
        self.shuffle = shuffle 
        self.df = df  
        self.batch_size = batch_size
        self.augmentation = augmentation
        self.Train = Train
        

        
  def __len__(self):
    return math.ceil(len(self.df) / self.batch_size)

  def on_epoch_end(self):
    self.indices = np.arange(len(self.df))
    if self.shuffle == True:
      np.random.shuffle(self.indices)
      self.df = self.df.iloc[self.indices]
    
  def __getitem__(self, index):
    self.batch_df = self.df.iloc[index*self.batch_size:(index+1)*self.batch_size]

    batch_img = make_batch_img(self.batch_df['IMG_DIR'].values,self.augmentation)
    if self.Train:
      batch_label = self.batch_df['LEAF_WEIGHT'].values
      batch_label = label_scailng(batch_label)
      return batch_img.astype(float), batch_label
    else:
      return batch_img.astype(float)

# 모델 로드 

In [11]:
from tensorflow.keras import optimizers
from tensorflow.keras.metrics import Recall
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers
from tensorflow.keras.layers import  Activation, AveragePooling2D
from tensorflow.keras.layers import Conv2D,MaxPooling2D,Activation,Dense,Flatten
from tensorflow.keras.layers import BatchNormalization, Input, Dropout,Reshape
from tensorflow.keras import Model
from tensorflow.keras.layers import concatenate
from tensorflow.keras.applications import VGG19,EfficientNetV2S, ResNet50V2, vgg19,resnet_v2
import math
from tensorflow.keras import backend as K
from tensorflow.keras.utils import plot_model
from keras.layers.pooling import GlobalAveragePooling2D
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.optimizers.schedules import CosineDecay, ExponentialDecay

In [12]:
try:
  model_path = '/content/drive/MyDrive/Colab Notebooks/Repositary/생육환경최적화/model/0428'
  model_list = os.listdir(model_path)
except:
  model_path = '/content/drive/MyDrive/Colab Notebooks/Hanyang_repo/생육환경최적화/model/0428'
  model_list = os.listdir(model_path)

model_dict = {} 
for model_ in model_list:
  name = model_.split('.')[0]
  model = tf.keras.models.load_model(os.path.join(model_path,model_),compile=False)
  model_dict[name]= model

In [13]:
model_dict

{'0428_1_0fold': <keras.engine.functional.Functional at 0x7f6f0c38bc90>,
 '0428_1_1fold': <keras.engine.functional.Functional at 0x7f6e6e1ec650>,
 '0428_1_2fold': <keras.engine.functional.Functional at 0x7f6e4dbc3810>,
 '0428_1_3fold': <keras.engine.functional.Functional at 0x7f6e4db4c690>,
 '0428_1_4fold': <keras.engine.functional.Functional at 0x7f6e4dd04cd0>,
 '0428_2_0fold': <keras.engine.functional.Functional at 0x7f6e8e2e4350>,
 '0428_2_1fold': <keras.engine.functional.Functional at 0x7f6e6e108450>,
 '0428_2_2fold': <keras.engine.functional.Functional at 0x7f6e4db59750>,
 '0428_2_3fold': <keras.engine.functional.Functional at 0x7f6e4d707f90>,
 '0428_2_4fold': <keras.engine.functional.Functional at 0x7f6e4d7a83d0>}

# 테스트 데이터 로드 

In [14]:
test_dir = glob('/content/test/image/*')
test_dir.sort()
submission = pd.read_csv('/content/sample_submission.csv')
test_df = pd.DataFrame(test_dir)
test_df.columns = ['IMG_DIR']

In [16]:
model_names = ['0428_2_0fold','0428_2_1fold','0428_2_2fold','0428_2_3fold','0428_2_4fold']

# submission 

## Kfold

In [17]:
#Train - valid split 
from sklearn.model_selection import KFold
fold_index = np.arange(len(df))
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

predicted_list = [] 
for n,(train, valid) in enumerate(kfold.split(fold_index)):
  train_df = df.iloc[train]
  valid_df = df.iloc[valid]
  
  #min,max 
  min,max = init_minmax(train_df)
  #제너레이터 
  test_generator = DataGenerator(test_df,
                               batch_size=32,
                               shuffle=False,
                               augmentation=False,
                               Train=False)
  
  model = model_dict[model_names[n]]
  y_ = model.predict(test_generator)
  predicted = y_*(max-min) + min 
  predicted_list.append(predicted)

In [18]:
submission['leaf_weight'] = np.mean(np.array(predicted_list),axis=0).squeeze()
submission_path = '/content/drive/MyDrive/Colab Notebooks/Hanyang_repo/생육환경최적화/submission/'
submission.to_csv(os.path.join(submission_path,'submission0428_2_fold.csv'),index=False)

## Holdout

In [None]:
model = model_dict['0424_3_0fold']

In [None]:
y_ = model.predict(test_generator)
predicted = y_*(max-min) + min 
submission['leaf_weight'] = predicted.squeeze()
submission_path = '/content/drive/MyDrive/Colab Notebooks/Repositary/생육환경최적화/submission/'
submission_path = '/content/'
submission.to_csv(os.path.join(submission_path,'submission0424_4.csv'),index=False)

In [None]:
!cp /content/submission0424_4.csv