# Machine Learning Project - Pawpularity ##
### Armando Fortes (2021403383), David Pissarra (2021403381)

#### Imports ####

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from matplotlib import pyplot as plt
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError
from sklearn.model_selection import train_test_split
from tensorflow.keras.metrics import RootMeanSquaredError

In [None]:
physical_devices = tf.config.list_physical_devices('GPU')
print(f'Number of GPUs available: {len(physical_devices)}')
if physical_devices:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)

#### Constants and Hyperparameters ####

In [None]:
dataset_dir = '../Dataset/'
train_images_path = dataset_dir + 'train/'
test_images_path = dataset_dir + 'test/'
train_meta_path = dataset_dir + 'train.csv'
test_meta_path = dataset_dir + 'test.csv'

In [None]:
EPOCHS = 10
BATCH_SIZE = 64
IMAGE_DIM = 128
BUFFER_SIZE = 1024
LEARNING_RATE = 0.001

AUTOTUNE = tf.data.experimental.AUTOTUNE

#### Image Dataset Preprocessing ####

In [None]:
def load_image(image_path):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.cast(image, tf.float32) / 255.
    image = tf.image.resize(image, (IMAGE_DIM, IMAGE_DIM))
    return image

In [None]:
def map_image(image_path, label):
    return load_image(image_path), label

Training and Validation Sets

In [None]:
train_metadata = pd.read_csv(train_meta_path).sort_values(by='Id')
images_names = (train_images_path + train_metadata['Id'] + '.jpg').values
images_paws = (train_metadata['Pawpularity']).values

In [None]:
train_images_names, valid_images_names, train_images_paws, valid_images_paws = train_test_split(
    images_names,
    images_paws,
    test_size=0.1
    )

In [None]:
ds_train = tf.data.Dataset.from_tensor_slices((train_images_names, train_images_paws))
ds_train = ds_train.map(map_image, num_parallel_calls=AUTOTUNE)
# ds_train = ds_train.repeat()
ds_train = ds_train.shuffle(buffer_size=BUFFER_SIZE, reshuffle_each_iteration=True)
ds_train = ds_train.batch(BATCH_SIZE)
ds_train = ds_train.prefetch(AUTOTUNE)

# for x in ds_test:
#     plt.imshow(x[0].numpy())
#     plt.show()
#     break

In [None]:
ds_valid = tf.data.Dataset.from_tensor_slices((valid_images_names, valid_images_paws))
ds_valid = ds_valid.map(map_image, num_parallel_calls=AUTOTUNE)
ds_valid = ds_valid.batch(BATCH_SIZE)
ds_valid = ds_valid.prefetch(AUTOTUNE)

Test Set

In [None]:
test_metadata = pd.read_csv(test_meta_path).sort_values(by='Id')
test_images_names = (test_images_path + test_metadata['Id'] + '.jpg').values

In [None]:
ds_test = tf.data.Dataset.from_tensor_slices((test_images_names,))
ds_test = ds_test.map(load_image, num_parallel_calls=AUTOTUNE)
ds_test = ds_test.batch(BATCH_SIZE)
ds_test = ds_test.prefetch(AUTOTUNE)

# for x in ds_test:
#     plt.imshow(x[0].numpy())
#     plt.show()
#     break

#### Model and training ####

In [None]:
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(IMAGE_DIM, IMAGE_DIM, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))

model.summary()

In [None]:
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10))
model.add(layers.Dense(1))

model.summary()

In [None]:
model.compile(optimizer=Adam(learning_rate=LEARNING_RATE),
              loss=MeanSquaredError(),
              metrics=[RootMeanSquaredError('rmse')])

history = model.fit(ds_train, epochs=EPOCHS, validation_data=ds_valid)

#### Submission ####

In [None]:
yhat_test = model.predict(ds_test, verbose=1) * 100

In [None]:
print(yhat_test)

In [None]:
test_predictions = pd.DataFrame()
test_predictions['Id'] = test_metadata.Id
test_predictions['Pawpularity'] = yhat_test
test_predictions.to_csv('submission.csv', index=False)

In [None]:
test_predictions.head(8)