In [None]:
import pandas as pd
data = pd.read_csv("/kaggle/input/petfinder-pawpularity-score/train.csv", sep=',')
data['Id'] = data['Id'] + '.jpg'
data = data.rename(columns={'Id': 'filename'})
print("shape data: ", data.shape)

dataset_dir = '/kaggle/input/petfinder-pawpularity-score/train'
width, height = 224, 224
batch_size = 32

from tensorflow.keras.preprocessing.image import ImageDataGenerator
data_generator = ImageDataGenerator(
    rescale=1.0 / 255,
    rotation_range=5,
    width_shift_range=0.1,
    height_shift_range=0.1,
    brightness_range=(0.75, 1),
    shear_range=0.1,
    zoom_range=[0.75, 1],
    horizontal_flip=True,
    validation_split=0.2
)

data_generator = data_generator.flow_from_dataframe(
    dataframe=data,
    directory=dataset_dir,
    x_col="filename",
    y_col="Pawpularity",
    class_mode="raw",  # "raw" pour les regressions
    target_size=(width, height),
    batch_size=batch_size,
    shuffle=False
)

In [None]:
import sys
import os
sys.path.insert(0, "/kaggle/input/efnetv2src/efficientnet-v2-keras-main")
sys.path.append('../input/tfkeras-efficientnetsv2/')

from efficientnet_v2 import EfficientNetV2XL
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, BatchNormalization, Dropout
from tensorflow.keras.metrics import MeanAbsoluteError, MeanAbsolutePercentageError
import tensorflow_addons as tfa

efficientnet = EfficientNetV2XL(
    include_top=False,
    weights='../input/tfkeras-efficientnetsv2/21_ft1k_notop/efficientnetv2-xl-21k-ft1k_notop.h5', 
    input_shape=(height, width, 3)
)

efficientnet.trainable = False

radam = tfa.optimizers.RectifiedAdam(learning_rate=0.001)
optimizer = tfa.optimizers.Lookahead(radam, sync_period=6, slow_step_size=0.5)

efficientnet.compile(
    optimizer=optimizer,
    loss="mean_absolute_error",
    metrics=[MeanAbsoluteError(), MeanAbsolutePercentageError()]
)

In [None]:
import tensorflow as tf
def preprocess(image):  
    return (tf.cast(image, dtype=tf.float32) - 128.00) / 128.00

X_csvPath = "/kaggle/working/X_features.csv"
y_csvPath = "/kaggle/working/y_targets.csv"

from tqdm import tqdm
X_csv = open(X_csvPath, "w")
y_csv = open(y_csvPath, "w")
batch_nb = 0
data_generator.reset()
for images, scores in tqdm(data_generator):  # renvoie des batchs de 32
    idx = (data_generator.batch_index - 1) * batch_size
    for i in range(images.shape[0]):
        image = images[i, :, :, :]  # i-ème image
        filename = data_generator.filenames[idx + i]
        score = scores[i]
        img = preprocess(image.reshape(1, width, height, 3))
        img_features = efficientnet.predict(img)
        # Features: (1, 7, 7, 1280) (62720,)
        features_vec = ",".join([str(v) for v in img_features.flatten()])
        existing_cols = data[data['filename'] == filename]
        target_col = existing_cols['Pawpularity'].to_csv(header=False, index=False)
        existing_cols = existing_cols.drop(labels=['filename', 'Pawpularity'], axis=1)
        existing_cols = existing_cols.to_csv(header=False, index=False).rstrip()  # (i == 0 and idx == 0)
        X_csv.write("{},{}\n".format(existing_cols, features_vec))
        y_csv.write(target_col)
    if batch_nb > len(data) / batch_size:
        break
    batch_nb = batch_nb + 1
X_csv.close()
y_csv.close()