In [1]:
import time
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns
import os
from glob import glob
from pathlib import Path
import cv2

from sklearn.metrics import r2_score, mean_squared_error, accuracy_score, f1_score, recall_score
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers,datasets,models

import matplotlib.pyplot as plt

In [26]:
#settings
# path = 'D:/Kaggle/PetFinder/train/'

TARGET_NAME = 'Pawpularity'
# Image data directories
TRAIN_DIRECTORY = 'D:/Kaggle/PetFinder/train/'
TEST_DIRECTORY = 'D:/Kaggle/PetFinder/test/'
# train data split setting
VAL_SIZE = 0.2
SEED = 0
# tf setting
AUTOTUNE = tf.data.experimental.AUTOTUNE
IMG_SIZE = 224
BATCH_SIZE = 64

In [48]:
# learning material:
# http://www.datalearner.com/blog/1051556350245210
# https://blog.csdn.net/rainweic/article/details/95737315

@tf.function
def get_image(path: str) -> tf.Tensor:
    """Function loads image from a file and preprocesses it.
    :param path: Path to image file
    :return: Tensor with preprocessed image
    """
    image = tf.image.decode_jpeg(tf.io.read_file(path), channels=3)
    # # 把numpy数据转为Tensor
    image = tf.cast(tf.image.resize_with_pad(image, IMG_SIZE, IMG_SIZE), dtype=tf.int32)
    return tf.keras.applications.efficientnet.preprocess_input(image)

# @tf.function
# def id_to_path(img_id: str, dir: str):
#     """Function returns a path to an image file.
#     :param img_id: Image Id
#     :param dir: Path to the directory with images
#     :return: Image file path
#     """
#     return os.path.join(dir, f'{img_id}.jpg')

def id_to_path(img_id: str, dir: str):
    """Function returns a path to an image file.
    :param img_id: Image Id
    :param dir: Path to the directory with images
    :return: Image file path
    """
    return os.path.join(dir, f'{img_id}.jpg')

@tf.function
def process_dataset(path: str, label: int) -> tuple:
    """Function returns preprocessed image and label.
    :param path: Path to image file
    :param label: Class label
    :return: tf.Tensor with preprocessed image, numeric label
    """
    return get_image(path), label


@tf.function
def get_dataset(x, y=None) -> tf.data.Dataset:
    """Function creates batched optimized dataset for the model
    out of an array of file paths and (optionally) class labels.
    :param x: Input data for the model (array of file paths)
    :param y: Target values for the model (array of class indexes)
    :return TensorFlow Dataset object
    """
    if y is not None:
        ds = tf.data.Dataset.from_tensor_slices((x, y))
        return ds.map(process_dataset, num_parallel_calls=AUTOTUNE) \
            .batch(BATCH_SIZE).prefetch(buffer_size=AUTOTUNE)
    else:
        ds = tf.data.Dataset.from_tensor_slices(x)
        return ds.map(get_image, num_parallel_calls=AUTOTUNE) \
            .batch(BATCH_SIZE).prefetch(buffer_size=AUTOTUNE)

# Data

In [44]:
data_train = pd.read_csv('D:/Kaggle/PetFinder/train.csv')

In [45]:
# Reconstruct the paths to train and test images.
data_train['path'] = data_train['Id'].apply(lambda x: id_to_path(x, TRAIN_DIRECTORY))
# data_test['path'] = data_test['Id'].apply(lambda x: id_to_path(x, TEST_DIRECTORY))

# Keep a portion of the labeled data for validation.
train_subset, valid_subset = train_test_split(
    data_train[['path', TARGET_NAME]],
    test_size=VAL_SIZE, shuffle=True, random_state=SEED
)

In [46]:
train_subset.head()

Unnamed: 0,path,Pawpularity
6258,D:/Kaggle/PetFinder/train/a1c4da5842d088af2d04...,85
8188,D:/Kaggle/PetFinder/train/d41e8327d33ad2a70470...,18
3043,D:/Kaggle/PetFinder/train/4ec0240d2cbb5cf0b176...,36
6609,D:/Kaggle/PetFinder/train/aa8a14e16f31dcd1ab01...,42
2395,D:/Kaggle/PetFinder/train/3dac372745d2acd1371f...,28


In [49]:
# Create TensorFlow datasets
train_ds = get_dataset(x=train_subset['path'], y=train_subset[TARGET_NAME])
valid_ds = get_dataset(x=valid_subset['path'], y=valid_subset[TARGET_NAME])
# test_ds = get_dataset(x=data_test['path'])

In [50]:
train_ds

<_VariantDataset shapes: ((None, 224, 224, 3), (None,)), types: (tf.int32, tf.int64)>

In [71]:
cnn = models.Sequential([
        # cnn
        layers.Conv2D(filters=16, kernel_size=(3,3),activation='relu',input_shape=(224,224,3)),
        layers.MaxPool2D((2,2)),
        layers.Conv2D(filters=16, kernel_size=(3,3),activation='relu'),
        layers.MaxPool2D((2,2)),
        layers.Conv2D(filters=8, kernel_size=(3,3),activation='relu'),
        layers.MaxPool2D((2,2)),
        #dense
        layers.Flatten(),
        layers.Dense(8, activation='relu'),
        layers.Dense(1, activation='softmax'),# will nomralize your probability
])

cnn.compile(
    optimizer = 'adam',
    loss=tf.keras.losses.MeanSquaredError(),
    metrics=[tf.keras.metrics.RootMeanSquaredError()]
)

In [72]:
cnn.summary()

Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_18 (Conv2D)           (None, 222, 222, 16)      448       
_________________________________________________________________
max_pooling2d_18 (MaxPooling (None, 111, 111, 16)      0         
_________________________________________________________________
conv2d_19 (Conv2D)           (None, 109, 109, 16)      2320      
_________________________________________________________________
max_pooling2d_19 (MaxPooling (None, 54, 54, 16)        0         
_________________________________________________________________
conv2d_20 (Conv2D)           (None, 52, 52, 8)         1160      
_________________________________________________________________
max_pooling2d_20 (MaxPooling (None, 26, 26, 8)         0         
_________________________________________________________________
flatten_9 (Flatten)          (None, 5408)             

In [74]:
cnn.fit(train_ds,epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x20e3b09a908>