In [1]:
import warnings

warnings.filterwarnings('ignore')

import os
import numpy as np
import pandas as pd
from keras.layers import GlobalAveragePooling2D
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.resnet import ResNet101, preprocess_input

In [2]:
########### change dataset here ##################
dataset = 'plant'
##################################################

batch_size = 64
image_size = 224

img_path = f'./data/{dataset}/IMG'
train_dir = f'{img_path}/train'
val_dir = f'{img_path}/val'
test_dir = f'{img_path}/test'

attr_binary_path = f'./data/{dataset}/predicate-matrix-binary.txt'
attr_continous_path = f'./data/{dataset}/predicate-matrix-continuous.txt'
classname = pd.read_csv(f'./data/{dataset}/classes.txt', header=None, sep='\t')

In [3]:
seen_class_num = len(os.listdir(f'{img_path}/train'))
unseen_class_num = len(os.listdir(f'{img_path}/test'))
class_attr_dim = sum(1 for line in open(f'./data/{dataset}/predicates.txt') if line.rstrip())
class_attr_shape = (class_attr_dim, )

In [4]:
model = ResNet101(weights='imagenet', include_top=False) # without fully connected layer, the output is 2048 dim

2022-07-14 11:05:45.052019: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-14 11:05:45.057510: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-14 11:05:45.057911: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-14 11:05:45.058687: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

In [5]:
# add a global averge pooling layer
x = model.output 
x = GlobalAveragePooling2D()(x) # 2048
model_ft = Model(inputs=model.input, outputs=x)

In [6]:
model_ft.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, None, None,  0           []                               
                                 3)]                                                              
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, None, None,   0           ['input_1[0][0]']                
                                3)                                                                
                                                                                                  
 conv1_conv (Conv2D)            (None, None, None,   9472        ['conv1_pad[0][0]']              
                                64)                                                           

In [7]:
image_gen = ImageDataGenerator(preprocessing_function=preprocess_input)

train_gen = image_gen.flow_from_directory(
    batch_size=batch_size,
    directory=train_dir,
    color_mode="rgb",
    target_size=(image_size,image_size),
    class_mode='sparse',
    seed=42
)

val_gen = image_gen.flow_from_directory(
    batch_size=batch_size,
    directory=val_dir,
    target_size=(image_size, image_size),
    class_mode='sparse',
    color_mode="rgb",
    seed=42
)

test_gen = image_gen.flow_from_directory(
    batch_size = batch_size,
    directory = test_dir,
    target_size=(image_size, image_size),
    class_mode='sparse',
    color_mode="rgb",
    seed=42
)

Found 36936 images belonging to 25 classes.
Found 9230 images belonging to 25 classes.
Found 24129 images belonging to 13 classes.


In [8]:
train_gen.class_indices

{'Apple___Black_rot': 0,
 'Apple___Cedar_apple_rust': 1,
 'Apple___healthy': 2,
 'Blueberry___healthy': 3,
 'Cherry_(including_sour)___healthy': 4,
 'Corn_(maize)___Common_rust_': 5,
 'Corn_(maize)___Northern_Leaf_Blight': 6,
 'Corn_(maize)___healthy': 7,
 'Grape___Esca_(Black_Measles)': 8,
 'Grape___Leaf_blight_(Isariopsis_Leaf_Spot)': 9,
 'Grape___healthy': 10,
 'Peach___healthy': 11,
 'Pepper,_bell___healthy': 12,
 'Potato___healthy': 13,
 'Raspberry___healthy': 14,
 'Soybean___healthy': 15,
 'Strawberry___healthy': 16,
 'Tomato___Bacterial_spot': 17,
 'Tomato___Early_blight': 18,
 'Tomato___Late_blight': 19,
 'Tomato___Leaf_Mold': 20,
 'Tomato___Septoria_leaf_spot': 21,
 'Tomato___Spider_mites Two-spotted_spider_mite': 22,
 'Tomato___Target_Spot': 23,
 'Tomato___healthy': 24}

### Binary attr

In [9]:
attr_list_b = []
RealCE_binary = pd.read_csv(attr_binary_path, header=None, sep='\t')
for idx in range(len(RealCE_binary)):
    tmp = RealCE_binary[0][idx].split(' ')
    attr = [float(i) for i in tmp if i != '']
    attr = np.array(attr)
    attr_list_b.append(attr)

### Continous

In [10]:
attr_list_c = []
RealCE_continous = pd.read_csv(attr_continous_path, header=None, sep='\t')
for idx in range(len(RealCE_continous)):
    tmp = RealCE_continous[0][idx].split(' ')
    attr = [float(i) for i in tmp if i != '']
    attr = np.array(attr)
    attr_list_c.append(attr)

### Continous attr min max

In [11]:
attr_list_cmm = []
RealCE_continous = pd.read_csv(attr_continous_path, header=None, sep='\t')
for idx in range(len(RealCE_continous)):
    tmp = RealCE_continous[0][idx].split(' ')
    attr = [float(i) for i in tmp if i != '']
    attr = np.array(attr)
    attr = (attr - np.min(attr)) / (np.max(attr) - np.min(attr))
    attr_list_cmm.append(attr)

### Continous attr mean std

In [12]:
attr_list_cms = []
RealCE_continous = pd.read_csv(attr_continous_path, header=None, sep='\t')
for idx in range(len(RealCE_continous)):
    tmp = RealCE_continous[0][idx].split(' ')
    attr = [float(i) for i in tmp if i != '']
    attr = np.array(attr)
    attr = (attr - np.mean(attr)) / np.std(attr)
    attr_list_cms.append(attr)

### make the convert_attr_list for train, val

In [13]:
train_attr_list_b = []
train_attr_list_c = []
train_attr_list_cmm = []
train_attr_list_cms = []

for k,v in train_gen.class_indices.items():
    idx = np.where(classname[1] == k)
    train_attr_list_b.append(attr_list_b[idx[0][0]])
    train_attr_list_c.append(attr_list_c[idx[0][0]])
    train_attr_list_cmm.append(attr_list_cmm[idx[0][0]])
    train_attr_list_cms.append(attr_list_cms[idx[0][0]])

### make the convert_attr_list for test

In [14]:
test_attr_list_b = []
test_attr_list_c = []
test_attr_list_cmm = []
test_attr_list_cms = []

for k,v in test_gen.class_indices.items():
    idx = np.where(classname[1] == k)
    test_attr_list_b.append(attr_list_b[idx[0][0]])
    test_attr_list_c.append(attr_list_c[idx[0][0]])
    test_attr_list_cmm.append(attr_list_cmm[idx[0][0]])
    test_attr_list_cms.append(attr_list_cms[idx[0][0]])

In [15]:
save_path = f'./data/{dataset}/feature_label_attr'

if not os.path.isdir(save_path):
    os.mkdir(save_path)
    os.mkdir(f'{save_path}/train')
    os.mkdir(f'{save_path}/val')
    os.mkdir(f'{save_path}/test')

In [16]:
def calculate_ft_attr(sets, img_gen, model_ft, save_path, attr_list_b, attr_list_c, attr_list_cmm, attr_list_cms):
    label_list = []
    attr_b, attr_c = [], []
    attr_cmm, attr_cms = [], []
    
    ft_feature = np.array([], dtype='float32').reshape(0, 2048)
    count = 0
    while count < img_gen.n:
        data, label = img_gen.next()

        # get 2048 feature
        predict = model_ft.predict(data) # pic to 2048
        ft_feature = np.concatenate((ft_feature, predict))

        # attr
        for l in label:
            attr_b.append(train_attr_list_b[int(l)])
            attr_c.append(train_attr_list_c[int(l)])
            attr_cmm.append(train_attr_list_cmm[int(l)])
            attr_cms.append(train_attr_list_cms[int(l)])
            label_list.append(int(l)) # label
        count += len(data)
        
    attr_b = np.array(attr_b)
    attr_c = np.array(attr_c)
    attr_cmm = np.array(attr_cmm)
    attr_cms = np.array(attr_cms)
    label_list = np.array(label_list)

    print(ft_feature.shape)
    print(attr_b.shape)
    print(attr_c.shape)
    print(attr_cmm.shape)
    print(attr_cms.shape)
    print(label_list.shape)

    np.save(f'{save_path}/{sets}/feature.npy', ft_feature)
    np.save(f'{save_path}/{sets}/attr_b.npy', attr_b)
    np.save(f'{save_path}/{sets}/attr_c.npy', attr_c)
    np.save(f'{save_path}/{sets}/attr_cmm.npy', attr_cmm)
    np.save(f'{save_path}/{sets}/attr_cms.npy', attr_cms)
    np.save(f'{save_path}/{sets}/label.npy', label_list)

## train data

In [17]:
calculate_ft_attr('train', train_gen, model_ft, save_path, train_attr_list_b, 
                  train_attr_list_c, train_attr_list_cmm, train_attr_list_cms)

2022-07-14 11:05:54.098245: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8201


(36936, 2048)
(36936, 35)
(36936, 46)
(36936, 46)
(36936, 46)
(36936,)


## val data

In [18]:
calculate_ft_attr('val', val_gen, model_ft, save_path, train_attr_list_b, 
                  train_attr_list_c, train_attr_list_cmm, train_attr_list_cms)

(9230, 2048)
(9230, 35)
(9230, 46)
(9230, 46)
(9230, 46)
(9230,)


## test data

In [19]:
calculate_ft_attr('test', test_gen, model_ft, save_path, test_attr_list_b, 
                  test_attr_list_c, test_attr_list_cmm, test_attr_list_cms)

(24129, 2048)
(24129, 35)
(24129, 46)
(24129, 46)
(24129, 46)
(24129,)
