In [None]:
# 데이터 보기
import pandas as pd
import numpy as np
from glob import glob

# 이미지데이터 로딩
from PIL import Image
import cv2
from tqdm import tqdm

# 파일경로 설정
import os
import shutil
import json

# Modeling
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from keras import backend as K 
from keras.callbacks import EarlyStopping, ModelCheckpoint,ReduceLROnPlateau
from sklearn.model_selection import KFold,StratifiedKFold

# Others
import os
import warnings
warnings.filterwarnings("ignore")
warnings.simplefilter('ignore')
import matplotlib.pyplot as plt

In [None]:
from google.colab import drive
drive.mount('/content/drive')

### Data load

In [None]:
data_path = '/content/drive/My Drive/hands'

train_path = data_path + '/train'
test_path = data_path + '/test'

hand_gesture = pd.read_csv(data_path + '/hand_gesture_pose.csv')
sample_submission = pd.read_csv(data_path + '/sample_submission.csv')

In [None]:
# Train 데이터에 있는 폴더를 glob로 불러와
# sorted method를 통해 숫자 순으로 정렬

train_folders = sorted(glob(train_path + '/*'), key = lambda x : int(x.split('/')[-1]))
test_folders  = sorted(glob(test_path + '/*'), key = lambda x : int(x.split('/')[-1]))
train_folders[:5]

In [None]:
answers = []
for train_folder in train_folders :
    json_path = glob(train_folder + '/*.json')[0]
    js = json.load(open(json_path))
    cat = js.get('action')[0]
    cat_name = js.get('action')[1]
    answers.append([train_folder.replace(data_path,''),cat, cat_name])

answers = pd.DataFrame(answers, columns = ['train_path','answer', 'answer_name'])
answers

### Preprocessing

In [None]:
classes = pd.get_dummies(answers[['answer']], columns = ['answer']).to_numpy()

In [None]:
import concurrent.futures

In [None]:
def load(image_path):
    img = image.load_img(image_path, target_size=(112,112,3))
    img = image.img_to_array(img)
    img = img/255
    return img

def add_ (img):
    images.append(img)

def start_processing(train_folders):

    for idx,train_folder in enumerate(tqdm(train_folders)) : 
        query_path  = train_folder.replace(data_path,'')
        
        with concurrent.futures.ProcessPoolExecutor() as executor:
            image_paths = sorted(glob(train_folder + '/*.png'), key = lambda x : int(x.split('/')[-1].replace('.png','')))

            future_proc = {executor.submit(load, f): f for f in image_paths}
            for future in concurrent.futures.as_completed(future_proc):
                add_(future.result())

In [None]:
images  = []
targets = []
start_processing(train_folders[:])

for idx,train_folder in enumerate(tqdm(train_folders[:])) : 
      image_paths = sorted(glob(train_folder + '/*.png'), key = lambda x : int(x.split('/')[-1].replace('.png','')))
      target = classes[int(train_folder.split('/')[-1])] 
      
      for image_path in image_paths:
          targets.append(target)

In [None]:
X = np.array(images)
print('Train X Shape : ', X.shape)

y = np.array(targets)
print('Train y Shape : ', y.shape)

### Resnet152 Modeling

In [None]:
skf = StratifiedKFold(n_splits = 5, random_state = 2021, shuffle = True)
reLR = ReduceLROnPlateau(patience = 4,verbose = 1,factor = 0.5) 
es =EarlyStopping(monitor='val_loss', patience=6, mode='min')
mc = ModelCheckpoint(f'/content/drive/My Drive/hands/model_kf/resnet.h5',save_best_only=True, verbose=0, monitor = 'val_loss', mode = 'min', save_weights_only=True)

In [None]:
def get_model():
    baseModel = Resnet152v2(weights='imagenet', include_top=False)
    baseModel.trainable = False

    model_in = Input(shape = (112,112,3))
    base_model = baseModel(model_in)
    head_model = GlobalAveragePooling2D()(base_model)
    head_model = Dense(256, activation="relu")(head_model)
    head_model = Dropout(0.3)(head_model)
    model_out = Dense(classes.shape[1], activation="softmax")(head_model)

    model = Model(inputs=model_in, outputs=model_out)
    model.compile(loss='categorical_crossentropy',optimizer=tf.keras.optimizers.Adam(),metrics=['accuracy'])
    
    return model

In [None]:
skf = StratifiedKFold(n_splits = 5, random_state = 2021, shuffle = True)
reLR = ReduceLROnPlateau(patience = 4,verbose = 1,factor = 0.5) 
es =EarlyStopping(monitor='val_loss', patience=6, mode='min')

accuracy = []
losss=[]
models=[]

for i, (train, validation) in enumerate(skf.split(X, y.argmax(1))) :
    mc = ModelCheckpoint(f'/content/drive/My Drive/hands/model_kf/cv_study{i + 1}.h5',save_best_only=True, verbose=0, monitor = 'val_loss', mode = 'min', save_weights_only=True)
    print("-" * 20 +"Fold_"+str(i+1)+ "-" * 20)
    model = get_model()
    history = model.fit(X[train], y[train], epochs = 130, validation_data= (X[validation], y[validation]), 
                        verbose=1,batch_size=64,callbacks=[es,mc,reLR])
    model.load_weights(f'/content/drive/My Drive/hands/model_kf/cv_study{i + 1}.h5')
    
    k_accuracy = '%.4f' % (model.evaluate(X[validation], y[validation])[1])
    k_loss = '%.4f' % (model.evaluate(X[validation], y[validation])[0])
    
    accuracy.append(k_accuracy)
    losss.append(k_loss)
    models.append(model)

print('\nK-fold cross validation Auc: {}'.format(accuracy))
print('\nK-fold cross validation loss: {}'.format(losss))

### Test data prediction

In [None]:
test_images  = []
for test_folder in tqdm(test_folders, total = len(test_folders)) :
    image_paths = sorted(glob(test_folder + '/*.png'), key = lambda x : int(x.split('/')[-1].replace('.png','')))
    query_path  = test_folder.replace(data_path,'')
    test_image = []
    for image_path in image_paths:
        img = image.load_img(image_path, target_size=(112,112,3))
        img = image.img_to_array(img)
        img = img/255
        test_image.append(img)
    test_images.append(test_image)

test_images = np.array(test_images)
print(test_images.shape)

In [None]:

pred=[]
for model in models:
    predictions = []
    for test_image in tqdm(test_images, total = len(test_images)) : 
        prediction = np.mean(models[0].predict(np.array(test_image)), axis = 0)
        predictions.append(prediction)
    print(len(predictions))

    pred.append(predictions)

In [None]:
sample_submission.iloc[:,1:] = np.mean(pred, axis=0)
display(sample_submission.head())

In [None]:
sample_submission.to_csv('/content/drive/My Drive/hands/resnet152.csv', index=False)