In [1]:
import tensorflow as tf
import pandas as pd
import tqdm
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [2]:
df= pd.read_csv("../input/petfinder-pawpularity-score/train.csv")
#df = df[:100]
all_id = np.array(df['Id'])


In [3]:
def get_images_from_id(data_id, _type = 'train', img_size=128, colours=3):
    pictures = np.zeros((len(data_id), img_size, img_size, colours))
    for i, single_id in tqdm.tqdm(enumerate(data_id)):
        img = cv2.cvtColor(cv2.imread(f"../input/petfinder-pawpularity-score/{_type}/{single_id}.jpg"), cv2.COLOR_BGR2RGB)
        new_img = cv2.resize(img, (img_size, img_size))
        pictures[i] = new_img/255
    return pictures

In [4]:
# y = df['Pawpularity']
# X_image = get_images_from_id(df['Id'])
# X_metadata = df.drop(columns=['Id', 'Pawpularity'])

In [5]:
X_train_id, X_valid_id, y_train, y_valid = train_test_split(all_id, df['Pawpularity'], test_size=0.2, random_state=42)

X_train_metadata = df[df['Id'].isin(X_train_id)]
X_train_pictures = get_images_from_id(X_train_metadata['Id'])
X_train_metadata = X_train_metadata.drop(columns=['Id', 'Pawpularity'])

X_valid_metadata = df[df['Id'].isin(X_valid_id)]
X_valid_pictures = get_images_from_id(X_valid_metadata['Id'])
X_valid_metadata = X_valid_metadata.drop(columns=['Id', 'Pawpularity'])

7929it [01:47, 73.46it/s]
1983it [00:26, 74.84it/s]


In [6]:
tf.random.set_seed(42)
IMG_SIZE = 128


input_image = tf.keras.Input((IMG_SIZE, IMG_SIZE, 3)) # Wejście na zdjęcia 128X128x3
input_metadata = tf.keras.Input(X_train_metadata.shape[1]) # Wejście na metadane 13 kategorii

image_conv2D_1 = tf.keras.layers.Conv2D(8, 3, activation=tf.keras.activations.selu, padding='same')(input_image)
image_conv2D_2 = tf.keras.layers.Conv2D(8, 3, activation=tf.keras.activations.selu, padding='same')(image_conv2D_1)
image_maxpool2D_1 = tf.keras.layers.MaxPooling2D(2)(image_conv2D_2)

image_conv2D_3 = tf.keras.layers.Conv2D(16, 3, activation=tf.keras.activations.selu, padding='same')(image_maxpool2D_1)
image_conv2D_4 = tf.keras.layers.Conv2D(16, 3, activation=tf.keras.activations.selu, padding='same')(image_conv2D_3)
image_maxpool2D_2 = tf.keras.layers.MaxPooling2D(2)(image_conv2D_4)

image_conv2D_5 = tf.keras.layers.Conv2D(32, 3, activation=tf.keras.activations.selu, padding='same')(image_maxpool2D_2)
image_conv2D_6 = tf.keras.layers.Conv2D(32, 3, activation=tf.keras.activations.selu, padding='same')(image_conv2D_5)
image_maxpool2D_3 = tf.keras.layers.MaxPooling2D(2)(image_conv2D_6)

image_conv2D_7 = tf.keras.layers.Conv2D(64, 3, activation=tf.keras.activations.selu, padding='same')(image_maxpool2D_3)
image_conv2D_8 = tf.keras.layers.Conv2D(64, 3, activation=tf.keras.activations.selu, padding='same')(image_conv2D_7)
image_maxpool2D_4 = tf.keras.layers.MaxPooling2D(2)(image_conv2D_8)

image_conv2D_9 = tf.keras.layers.Conv2D(128, 3, activation=tf.keras.activations.selu, padding='same')(image_maxpool2D_4)
image_conv2D_10 = tf.keras.layers.Conv2D(128, 3, activation=tf.keras.activations.selu, padding='same')(image_conv2D_9)
image_maxpool2D_5 = tf.keras.layers.MaxPooling2D(2)(image_conv2D_10) 
flatten = tf.keras.layers.GlobalAveragePooling2D()(image_maxpool2D_5) #Podobne działanie do Flatten

metadata_dense_1 = tf.keras.layers.Dense(16, activation="relu", kernel_regularizer=tf.keras.regularizers.l2())(input_metadata)

concat = tf.keras.layers.concatenate([flatten, metadata_dense_1])
output = tf.keras.layers.Dense(1)(concat)
model = tf.keras.Model(inputs=[input_image, input_metadata], outputs=[output])

model.compile(loss=tf.keras.losses.MeanSquaredError(), optimizer=tf.keras.optimizers.Adam(1e-3), metrics=["mse"])

checkpoint_cb = tf.keras.callbacks.ModelCheckpoint('best_model.h5', save_best_only=True)
early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)

history_1 = model.fit((X_train_pictures, X_train_metadata), y_train, epochs=300, validation_data=((X_valid_pictures, X_valid_metadata), y_valid), callbacks=[checkpoint_cb, early_stopping_cb])

model = tf.keras.models.load_model('best_model.h5')

2021-12-15 20:40:36.484784: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-12-15 20:40:36.570959: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-12-15 20:40:36.571784: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-12-15 20:40:36.573293: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

Epoch 1/300


2021-12-15 20:40:44.882831: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005


Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300


In [7]:
df_test =  pd.read_csv("../input/petfinder-pawpularity-score/test.csv")
X_images_test = get_images_from_id(df_test['Id'], 'test')
X_metadata_test = df_test.drop(columns=['Id'])

8it [00:00, 173.84it/s]


In [8]:
pred = model.predict((X_images_test, X_metadata_test))
df_test['Pawpularity'] = pred
df_test

Unnamed: 0,Id,Subject Focus,Eyes,Face,Near,Action,Accessory,Group,Collage,Human,Occlusion,Info,Blur,Pawpularity
0,4128bae22183829d2b5fea10effdb0c3,1,0,1,0,0,1,1,0,0,1,0,1,37.551666
1,43a2262d7738e3d420d453815151079e,0,1,0,0,0,0,1,1,0,0,0,0,37.967136
2,4e429cead1848a298432a0acad014c9d,0,0,0,1,0,1,1,1,0,1,1,1,38.78141
3,80bc3ccafcc51b66303c2c263aa38486,1,0,1,0,0,0,0,0,0,0,1,0,38.630039
4,8f49844c382931444e68dffbe20228f4,1,1,1,0,1,1,0,1,0,1,1,0,40.962891
5,b03f7041962238a7c9d6537e22f9b017,0,0,1,1,1,1,1,1,1,0,1,0,40.982422
6,c978013571258ed6d4637f6e8cc9d6a3,1,0,0,0,1,1,0,1,0,1,1,1,40.192234
7,e0de453c1bffc20c22b072b34b54e50f,1,0,1,0,0,0,0,0,1,0,0,1,37.499016


In [9]:
submission_df = df_test[['Id','Pawpularity']]
submission_df.to_csv("submission.csv", index=False)
submission_df

Unnamed: 0,Id,Pawpularity
0,4128bae22183829d2b5fea10effdb0c3,37.551666
1,43a2262d7738e3d420d453815151079e,37.967136
2,4e429cead1848a298432a0acad014c9d,38.78141
3,80bc3ccafcc51b66303c2c263aa38486,38.630039
4,8f49844c382931444e68dffbe20228f4,40.962891
5,b03f7041962238a7c9d6537e22f9b017,40.982422
6,c978013571258ed6d4637f6e8cc9d6a3,40.192234
7,e0de453c1bffc20c22b072b34b54e50f,37.499016
