In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
from sklearn.preprocessing import *
from sklearn.model_selection import train_test_split
import os
import tensorflow as tf
from tensorflow.keras import models, layers
from tensorflow.keras import Input
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import optimizers, initializers, regularizers, metrics
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.layers import BatchNormalization, Conv2D, Activation , AveragePooling2D , Input ,Dropout
from tensorflow.keras.layers import Dense,  MaxPooling2D, Add, Flatten
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import tensorflow_addons as tfa

# GPU 설정
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    # 특정 GPU에 1GB 메모리만 할당하도록 제한
    try:
        tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
        tf.config.experimental.set_virtual_device_configuration(
            gpus[1],
            [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=7048)])
    except RuntimeError as e:
    # 프로그램 시작시에 가상 장치가 설정되어야만 합니다
        print(e)

# 테스트 파일을  generator로 예측하신다면 아래 코드 !

In [2]:
# test 데이터 안 '6' 폴더에 있는 체크포인트 오류 데이터 제거 후 실행
path = './public'
test_dirs = path + '/test'
files = []
ids=[]
for img_cat in os.listdir(test_dirs):
    id_dir =  img_cat
    for filename in os.listdir(test_dirs + '/' + id_dir):
        files.append(id_dir + '/' +filename)
        ids.append(filename.split('.JPG')[0])
                           
test_data = pd.DataFrame(
                    {"file":files,
                    "id":ids}
                )    

display(test_data.head())
print(test_data.shape)

Unnamed: 0,file,id
0,0/0hmnf5orki.JPG,0hmnf5orki
1,0/0bgj9co0zl.JPG,0bgj9co0zl
2,0/03123sl42g.JPG,03123sl42g
3,0/0vwaki2su2.JPG,0vwaki2su2
4,0/09jgq862fk.JPG,09jgq862fk


(37964, 2)


In [3]:
# 데이터 형상 관련 상수 정의 (훈련할때 썼던 이미지 크기 사용)
IMAGE_WIDTH=224
IMAGE_HEIGHT=224
IMAGE_SIZE=(IMAGE_WIDTH, IMAGE_HEIGHT)
IMAGE_CHANNEL=3
batch_size=128

In [4]:
# test
with tf.device('/device:GPU:0'):
    test_datagen=ImageDataGenerator(rescale=1./255) # MinMaxScaling
    test_generator=test_datagen.flow_from_dataframe(
        test_data,
        test_dirs,
        x_col= "file",
        y_col= "id",
        target_size = IMAGE_SIZE,
        class_mode = "raw",
        batch_size = batch_size,
        shuffle=False)


Found 37964 validated image filenames.


In [5]:
# 모델불러오기
with tf.device('/device:GPU:0'):

    model = load_model('./landmark_MobileNetv2_model_270_480_radam_2.h5', compile=False)

In [None]:
with tf.device('/device:GPU:0'):
    model.compile(loss='categorical_crossentropy',
                optimizer=optimizers.RectifiedAdam(),
                metrics=['accuracy'])
    pred = model.predict(test_generator)

In [7]:
pred_df = pd.DataFrame({'id':test_data['id'].values,
                       'landmark_id':np.argmax(pred, axis=1),
                       'conf':np.max(pred, axis=1)})

In [10]:
submission = pd.read_csv(path + '/sample_submisstion.csv')
sub = pd.merge(submission[['id']], pred_df, on='id', how='left')
sub.head()


Unnamed: 0,id,landmark_id,conf
0,xlf1tgh2ih,956,1.0
1,68a3ot4osk,956,0.99999
2,si2lek4u0a,956,0.999785
3,rmtqxhipnv,956,0.936642
4,2flmjdud0e,956,1.0


In [11]:
sub.to_csv(path + '/Densenet.csv', encoding='cp949', index=False)

# 테스트 파일을  TFRecord로 예측하신다면 아래 코드 ! 

In [7]:
with tf.device('/device:GPU:0'):
    path = './public'
    test_tfrecord_path = path + '/tf_record_test.tfrecords'
    BATCH_SIZE = 32
    NUM_CLASS = 1049
    img_size = (331,331) # <- 학습할때 썼던 이미지 사이즈 입력해주세요 !
    
    image_feature_description_test = {
        'image_raw': tf.io.FixedLenFeature([], tf.string),
        'id': tf.io.FixedLenFeature([], tf.string),
    
    }


    def _parse_image_function_test(example_proto):
        return tf.io.parse_single_example(example_proto, image_feature_description_test)

    def map_func_test(target_record):
        img = target_record['image_raw']
        label = target_record['id']
        img = tf.image.decode_jpeg(img, channels=3)
        img = tf.dtypes.cast(img, tf.float32)
        return img, label

    def prep_func_test(image, label):
        result_image = image / 255
        result_image = tf.image.resize(result_image, img_size)

        return result_image, label

    test_dataset = tf.data.TFRecordDataset(test_tfrecord_path, compression_type='GZIP')
    test_dataset = test_dataset.map(_parse_image_function_test, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    test_dataset = test_dataset.map(map_func_test, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    test_dataset = test_dataset.batch(BATCH_SIZE)
    test_dataset = test_dataset.map(prep_func_test, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    test_dataset = test_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
    
    
    # 모델불러오기
    model = load_model('./landmark_inception_resnet_model_299_299_radam_dropout_update.h5', compile=False) # 학습하셨던 모델을 불러와주세요
    id_list = []
    for _, label in test_dataset:
        id_list.extend(list(label))

    pred = model.predict(test_dataset)
    
    pred_df = pd.DataFrame({'id':id_list,
                       'landmark_id':np.argmax(pred,axis=1),
                       'conf':np.max(pred,axis=1)})
    pred_df['id'] = pred_df['id'].apply(lambda x : x.numpy().decode('utf-8'))
    
    submission = pd.read_csv(path + '/sample_submisstion.csv')
    sub = pd.merge(submission[['id']], pred_df, on='id', how='left')
    display(sub.head())

Unnamed: 0,id,landmark_id,conf
0,xlf1tgh2ih,956,0.999983
1,68a3ot4osk,956,0.999711
2,si2lek4u0a,956,0.999894
3,rmtqxhipnv,956,0.999436
4,2flmjdud0e,956,0.999974


In [8]:
sub.to_csv(path + '/landmark_inception_resnet_model_299_299_radam_dropout_update.csv', encoding='cp949', index=False) # 저장할 파일 명 정해서 제출파일 만들어주세요~~~

## 앙상블  

In [9]:
pred_1 = pd.read_csv('./public/mobile_gp_270_480.csv')
pred_2 = pd.read_csv('./public/landmark_EFNET5_model_270_480_radam_dropout.csv')
pred_3 = pd.read_csv('./public/inception_299_299.csv')
pred_4 = pd.read_csv('./public/landmark_inception_resnet_model_299_299_radam_dropout_update.csv')
pred_5 = pd.read_csv('./landmark_densenet201_1114.csv')

## max_ensemble

In [10]:
landmark_id = []
conf = []
df_dict = {0:pred_1, 1:pred_2, 2: pred_3, 3: pred_4, 4: pred_5}
for i in range(len(pred_1)):
    max_conf = max([df['conf'][i] for df in df_dict.values()])
    max_landmark_df = np.argmax([[df['conf'][i] for df in df_dict.values()]])
    max_landmark_id = df_dict[max_landmark_df]['landmark_id'][i]
    landmark_id.append(max_landmark_id)
    conf.append(max_conf)


In [12]:
path = './public'
submission = pd.read_csv(path + '/sample_submisstion.csv')
submission['landmark_id'] = landmark_id
submission['conf'] = conf
submission.head()

Unnamed: 0,id,landmark_id,conf
0,xlf1tgh2ih,956,0.999999
1,68a3ot4osk,956,0.999999
2,si2lek4u0a,956,0.999998
3,rmtqxhipnv,956,1.0
4,2flmjdud0e,956,1.0


In [13]:
submission.to_csv(path + '/test_ensemble_5model.csv', encoding='cp949', index=False) # 저장할 파일 명 정해서 제출파일 만들어주세요~~~

## voting + max_ensemble 

In [15]:
from collections import Counter
landmark_id = []
conf = []
df_dict = {0:pred_1, 1:pred_2, 2: pred_3, 3: pred_4, 4:pred_5}
for i in range(len(pred_1)):
    count_id = Counter([df['landmark_id'][i] for df in df_dict.values()])
    max_id = max(count_id.items(), key = lambda x: x[1])
    if max_id[1] == 1:
        max_conf = max([df['conf'][i] for df in df_dict.values()])
        max_landmark_df = np.argmax([[df['conf'][i] for df in df_dict.values()]])
        max_landmark_id = df_dict[max_landmark_df]['landmark_id'][i]
        landmark_id.append(max_landmark_id)
        conf.append(max_conf)
    else:
        landmark_id.append(max_id[0])
        conf.append(max([df['conf'][i] for df in df_dict.values() if df['landmark_id'][i]==max_id[0]]))
        

In [16]:
submission = pd.read_csv(path + '/sample_submisstion.csv')
submission['landmark_id'] = landmark_id
submission['conf'] = conf
submission.head()

Unnamed: 0,id,landmark_id,conf
0,xlf1tgh2ih,956,0.999999
1,68a3ot4osk,956,0.999999
2,si2lek4u0a,956,0.999998
3,rmtqxhipnv,956,1.0
4,2flmjdud0e,956,1.0


In [17]:
submission.to_csv(path + '/test_ensemble_voting_max_5model.csv', encoding='cp949', index=False) # 저장할 파일 명 정해서 제출파일 만들어주세요~~~

## Soft Voting 

In [9]:
def prediction(model_path, data_path):
    model = load_model(model_path,compile=False)
    img_size = tuple(model.input.shape[1:3])
    BATCH_SIZE = 32
    NUM_CLASS = 1049
    
    image_feature_description_test = {
        'image_raw': tf.io.FixedLenFeature([], tf.string),
        'id': tf.io.FixedLenFeature([], tf.string),
    
    }


    def _parse_image_function_test(example_proto):
        return tf.io.parse_single_example(example_proto, image_feature_description_test)

    def map_func_test(target_record):
        img = target_record['image_raw']
        label = target_record['id']
        img = tf.image.decode_jpeg(img, channels=3)
        img = tf.dtypes.cast(img, tf.float32)
        return img, label

    def prep_func_test(image, label):
        result_image = image / 255
        result_image = tf.image.resize(result_image, img_size)

        return result_image, label

    test_dataset = tf.data.TFRecordDataset(data_path, compression_type='GZIP')
    test_dataset = test_dataset.map(_parse_image_function_test, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    test_dataset = test_dataset.map(map_func_test, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    test_dataset = test_dataset.batch(BATCH_SIZE)
    test_dataset = test_dataset.map(prep_func_test, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    test_dataset = test_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
    
    id_list = []
    for _, label in test_dataset:
        id_list.extend(list(label))

    pred = model.predict(test_dataset)
    
    return id_list, pred

In [10]:
id_list = prediction('./landmark_MobileNetv2_model_270_480_radam_2.h5','./public/tf_record_test.tfrecords')

In [7]:
with tf.device('/device:GPU:0'):
    print('pred_1')
    id_list, pred_1 = prediction('./landmark_MobileNetv2_model_270_480_radam_2.h5','./public/tf_record_test.tfrecords')
    print('pred_2')
    _, pred_2 = prediction('./landmark_EFNET5_model_270_480_radam_dropout.h5', './public/tf_record_test.tfrecords')
    print('pred_3')
    _, pred_3 = prediction('./landamark_inception_model_299_299.h5', './public/tf_record_test.tfrecords')
    print('pred_4')
    _, pred_4 = prediction('./landmark_inception_resnet_model_299_299_radam_dropout_update.h5', './public/tf_record_test.tfrecords')
    print('pred_5')
    _, pred_5 = prediction('./landmark_densenet201_270by480_9918.h5', './public/tf_record_test.tfrecords')
#     print('pred_6')
#     _, pred_6 = prediction('./landmark_NasNet_Large_model_331_331_radam_dropout.h5','./public/tf_record_test.tfrecords')

pred_1
pred_2
pred_3
pred_4
pred_5
pred_6


In [8]:
np.save('pred_1.npy',pred_1)
np.save('pred_2.npy',pred_2)
np.save('pred_3.npy',pred_3)
np.save('pred_4.npy',pred_4)
np.save('pred_5.npy',pred_5)
np.save('pred_6.npy',pred_6)

In [5]:
pred_1 = np.load('./pred_1.npy')
pred_2 = np.load('./pred_2.npy')
pred_3 = np.load('./pred_3.npy')
pred_4 = np.load('./pred_4.npy')
pred_5 = np.load('./pred_5.npy')

In [19]:
pred_1

array([[8.5265166e-15, 4.9434663e-18, 1.9810954e-17, ..., 1.0334400e-17,
        1.2217659e-17, 3.3298591e-16],
       [6.6885886e-11, 3.5676258e-12, 5.5092481e-13, ..., 2.5416981e-13,
        2.7326053e-11, 8.6306044e-14],
       [5.5723346e-09, 1.8538246e-17, 2.4921706e-15, ..., 7.9169940e-14,
        3.1434604e-15, 2.8477790e-14],
       ...,
       [3.0951910e-10, 4.2182859e-15, 4.5240778e-14, ..., 2.7640436e-13,
        7.7156746e-14, 6.8946343e-10],
       [2.3851907e-14, 3.4372250e-21, 3.5139145e-17, ..., 2.0842823e-09,
        1.5614553e-19, 9.9588213e-19],
       [2.2326470e-16, 4.6426410e-20, 1.4189071e-17, ..., 2.3009171e-14,
        3.2451187e-20, 3.1946650e-18]], dtype=float32)

In [20]:
pred_soft_voting = ((pred_2)+(pred_3)+ (pred_4)+ (2*pred_5) )/5
pred_df = pd.DataFrame({'id':id_list,
                   'landmark_id':np.argmax(pred_soft_voting,axis=1),
                   'conf':np.max(pred_soft_voting,axis=1)})
pred_df['id'] = pred_df['id'].apply(lambda x : x.numpy().decode('utf-8'))
pred_df.head()

Unnamed: 0,id,landmark_id,conf
0,0hmnf5orki,274,0.999995
1,0bgj9co0zl,393,0.999982
2,03123sl42g,575,0.999996
3,0vwaki2su2,388,0.999985
4,09jgq862fk,832,0.999886


In [21]:
path = './public'
submission = pd.read_csv(path + '/sample_submisstion.csv')
sub = pd.merge(submission[['id']], pred_df, on='id', how='left')
display(sub.head())

Unnamed: 0,id,landmark_id,conf
0,xlf1tgh2ih,956,0.999995
1,68a3ot4osk,956,0.99994
2,si2lek4u0a,956,0.99994
3,rmtqxhipnv,956,0.99985
4,2flmjdud0e,956,0.999992


In [22]:
sub.to_csv(path + '/Final_soft_voting_Weighted_model_3.csv', encoding='cp949', index=False) 

In [22]:

path = './public'
train_tfrecord_path = path + '/tf_record_train.tfrecords'
valid_tfrecord_path = path + '/tf_record_valid.tfrecords'

BUFFER_SIZE = 256
BATCH_SIZE = 32
NUM_CLASS = 1049
img_size = (270,480)
with tf.device('/device:GPU:0'):
    image_feature_description = {
        'image_raw': tf.io.FixedLenFeature([], tf.string),
        'randmark_id': tf.io.FixedLenFeature([], tf.int64),
        # 'id': tf.io.FixedLenFeature([], tf.string),
    }



    def _parse_image_function(example_proto):
        return tf.io.parse_single_example(example_proto, image_feature_description)

    def map_func(target_record):
        img = target_record['image_raw']
        label = target_record['randmark_id']
        img = tf.image.decode_jpeg(img, channels=3)
        img = tf.dtypes.cast(img, tf.float32)
        return img, label

    def prep_func(image, label):
        result_image = image / 255
        result_image = tf.image.resize(result_image, img_size)

        onehot_label = tf.one_hot(label, depth=NUM_CLASS)
        return result_image, onehot_label

    dataset = tf.data.TFRecordDataset(train_tfrecord_path, compression_type='GZIP')
    dataset = dataset.map(_parse_image_function, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    dataset = dataset.map(map_func, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    dataset = dataset.shuffle(BUFFER_SIZE)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.map(prep_func, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

    valid_dataset = tf.data.TFRecordDataset(valid_tfrecord_path, compression_type='GZIP')
    valid_dataset = valid_dataset.map(_parse_image_function, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    valid_dataset = valid_dataset.map(map_func, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    valid_dataset = valid_dataset.batch(BATCH_SIZE)
    valid_dataset = valid_dataset.map(prep_func, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    valid_dataset = valid_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
     
    


In [23]:
with tf.device('/device:GPU:0'):
    model = load_model('./landmark_MobileNetv2_model_270_480_radam_2.h5', compile=False)
    model.compile(loss='categorical_crossentropy',
                optimizer=Adam(learning_rate=0.0001),
                metrics=['accuracy'])
    
    model.evaluate(valid_dataset)



In [24]:
list1 = [98.27, 98.40, 98.45,98.93,98.47]

In [27]:
for i in list1:
    print(i/sum(list1))

0.19952489239015675
0.19978884106229192
0.19989035978234387
0.20086493949484288
0.19993096727036466
